diff --git a/.github/prehooks/pre-commit b/.github/prehooks/pre-commit index d35cdff5a..ddbce453d 100755 --- a/.github/prehooks/pre-commit +++ b/.github/prehooks/pre-commit @@ -1,5 +1,34 @@ #!/bin/sh +# ==================================================================== # +# TITLE # +# AMR: An R Package for Working with Antimicrobial Resistance Data # +# # +# SOURCE # +# https://github.com/msberends/AMR # +# # +# CITE AS # +# Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C # +# (2022). AMR: An R Package for Working with Antimicrobial Resistance # +# Data. Journal of Statistical Software, 104(3), 1-31. # +# doi:10.18637/jss.v104.i03 # +# # +# Developed at the University of Groningen, the Netherlands, in # +# collaboration with non-profit organisations Certe Medical # +# Diagnostics & Advice, and University Medical Center Groningen. # +# # +# This R package is free software; you can freely use and distribute # +# it for both personal and commercial purposes under the terms of the # +# GNU General Public License version 2.0 (GNU GPL-2), as published by # +# the Free Software Foundation. # +# We created this package for both routine data analysis and academic # +# research and it was publicly released in the hope that it will be # +# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. # +# # +# Visit our website for the full manual and a complete tutorial about # +# how to conduct AMR data analysis: https://msberends.github.io/AMR/ # +# ==================================================================== # + echo "Running pre-commit hook..." # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/DESCRIPTION b/DESCRIPTION index beeeb61be..e653f002a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: AMR -Version: 1.8.2.9026 +Version: 1.8.2.9027 Date: 2022-10-04 Title: Antimicrobial Resistance Data Analysis Description: Functions to simplify and standardise antimicrobial resistance (AMR) diff --git a/NEWS.md b/NEWS.md index 6f4994e57..0cc3e3291 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# AMR 1.8.2.9026 +# AMR 1.8.2.9027 This version will eventually become v2.0! We're happy to reach a new major milestone soon! diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R index c6856b8f3..a3abbb248 100755 --- a/R/aa_helper_functions.R +++ b/R/aa_helper_functions.R @@ -409,7 +409,7 @@ word_wrap <- function(..., msg <- paste0(c(...), collapse = "") if (isTRUE(as_note)) { - msg <- paste0(pkg_env$info_icon, " ", gsub("^note:? ?", "", msg, ignore.case = TRUE)) + msg <- paste0(AMR_env$info_icon, " ", gsub("^note:? ?", "", msg, ignore.case = TRUE)) } if (msg %like% "\n") { @@ -742,14 +742,14 @@ meet_criteria <- function(object, # if object is missing, or another error: tryCatch(invisible(object), - error = function(e) pkg_env$meet_criteria_error_txt <- e$message + error = function(e) AMR_env$meet_criteria_error_txt <- e$message ) - if (!is.null(pkg_env$meet_criteria_error_txt)) { - error_txt <- pkg_env$meet_criteria_error_txt - pkg_env$meet_criteria_error_txt <- NULL + if (!is.null(AMR_env$meet_criteria_error_txt)) { + error_txt <- AMR_env$meet_criteria_error_txt + AMR_env$meet_criteria_error_txt <- NULL stop(error_txt, call. = FALSE) # don't use stop_() here, our pkg may not be loaded yet } - pkg_env$meet_criteria_error_txt <- NULL + AMR_env$meet_criteria_error_txt <- NULL if (is.null(object)) { stop_if(allow_NULL == FALSE, "argument `", obj_name, "` must not be NULL", call = call_depth) @@ -990,9 +990,9 @@ message_not_thrown_before <- function(fn, ..., entire_session = FALSE) { # this is to prevent that messages/notes will be printed for every dplyr group or more than once per session # e.g. this would show a msg 4 times: example_isolates %>% group_by(ward) %>% filter(mo_is_gram_negative()) salt <- gsub("[^a-zA-Z0-9|_-]", "?", substr(paste(c(...), sep = "|", collapse = "|"), 1, 512), perl = TRUE) - not_thrown_before <- is.null(pkg_env[[paste0("thrown_msg.", fn, ".", salt)]]) || + not_thrown_before <- is.null(AMR_env[[paste0("thrown_msg.", fn, ".", salt)]]) || !identical( - pkg_env[[paste0("thrown_msg.", fn, ".", salt)]], + AMR_env[[paste0("thrown_msg.", fn, ".", salt)]], unique_call_id( entire_session = entire_session, match_fn = fn @@ -1003,7 +1003,7 @@ message_not_thrown_before <- function(fn, ..., entire_session = FALSE) { assign( x = paste0("thrown_msg.", fn, ".", salt), value = unique_call_id(entire_session = entire_session, match_fn = fn), - envir = pkg_env + envir = AMR_env ) } not_thrown_before @@ -1354,11 +1354,11 @@ percentage <- function(x, digits = NULL, ...) { } time_start_tracking <- function() { - pkg_env$time_start <- round(as.double(Sys.time()) * 1000) + AMR_env$time_start <- round(as.double(Sys.time()) * 1000) } time_track <- function(name = NULL) { - paste("(until now:", trimws(round(as.double(Sys.time()) * 1000) - pkg_env$time_start), "ms)") + paste("(until now:", trimws(round(as.double(Sys.time()) * 1000) - AMR_env$time_start), "ms)") } trimws2 <- function(..., whitespace = "[\u0009\u000A\u000B\u000C\u000D\u0020\u0085\u00A0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u200C\u200D\u2028\u2029\u202F\u205F\u2060\u3000\uFEFF]") { @@ -1370,19 +1370,19 @@ trimws2 <- function(..., whitespace = "[\u0009\u000A\u000B\u000C\u000D\u0020\u00 # Faster data.table implementations ---- match <- function(x, ...) { - if (isTRUE(pkg_env$has_data.table) && is.character(x)) { + if (isTRUE(AMR_env$has_data.table) && is.character(x)) { # data.table::chmatch() is 35% faster than base::match() for character getExportedValue(name = "chmatch", ns = asNamespace("data.table"))(x, ...) } else { base::match(x, ...) } } -`%in%` <- function(x, ...) { - if (isTRUE(pkg_env$has_data.table) && is.character(x)) { - # data.table::`%chin%`() is 20% faster than base::`%in%`() for character - getExportedValue(name = "%chin%", ns = asNamespace("data.table"))(x, ...) +`%in%` <- function(x, table) { + if (isTRUE(AMR_env$has_data.table) && is.character(x) && is.character(table)) { + # data.table::`%chin%`() is 20-50% faster than base::`%in%`() for character + getExportedValue(name = "%chin%", ns = asNamespace("data.table"))(x, table) } else { - base::`%in%`(x, ...) + base::`%in%`(x, table) } } diff --git a/R/eucast_rules.R b/R/eucast_rules.R index 93d01b91c..6b25a9973 100755 --- a/R/eucast_rules.R +++ b/R/eucast_rules.R @@ -1178,7 +1178,7 @@ eucast_dosage <- function(ab, administration = "iv", version_breakpoints = 11.0) meet_criteria(administration, allow_class = "character", is_in = dosage$administration[!is.na(dosage$administration)], has_length = 1) meet_criteria(version_breakpoints, allow_class = c("numeric", "integer"), has_length = 1, is_in = as.double(names(EUCAST_VERSION_BREAKPOINTS))) - # show used version_breakpoints number once per session (pkg_env will reload every session) + # show used version_breakpoints number once per session (AMR_env will reload every session) if (message_not_thrown_before("eucast_dosage", "v", gsub("[^0-9]", "", version_breakpoints), entire_session = TRUE)) { message_( "Dosages for antimicrobial drugs, as meant for ", diff --git a/R/guess_ab_col.R b/R/guess_ab_col.R index 9185496a9..a7890bfd8 100755 --- a/R/guess_ab_col.R +++ b/R/guess_ab_col.R @@ -112,17 +112,17 @@ get_column_abx <- function(x, entire_session = FALSE, match_fn = fn ), - pkg_env$get_column_abx.call + AMR_env$get_column_abx.call )) { # so within the same call, within the same environment, we got here again. # but we could've come from another function within the same call, so now only check the columns that changed # first remove the columns that are not existing anymore - previous <- pkg_env$get_column_abx.out + previous <- AMR_env$get_column_abx.out current <- previous[previous %in% colnames(x)] # then compare columns in current call with columns in original call - new_cols <- colnames(x)[!colnames(x) %in% pkg_env$get_column_abx.checked_cols] + new_cols <- colnames(x)[!colnames(x) %in% AMR_env$get_column_abx.checked_cols] if (length(new_cols) > 0) { # these columns did not exist in the last call, so add them new_cols_rsi <- get_column_abx(x[, new_cols, drop = FALSE], reuse_previous_result = FALSE, info = FALSE, sort = FALSE) @@ -132,11 +132,11 @@ get_column_abx <- function(x, } # update pkg environment to improve speed on next run - pkg_env$get_column_abx.out <- current - pkg_env$get_column_abx.checked_cols <- colnames(x) + AMR_env$get_column_abx.out <- current + AMR_env$get_column_abx.checked_cols <- colnames(x) # and return right values - return(pkg_env$get_column_abx.out) + return(AMR_env$get_column_abx.out) } meet_criteria(x, allow_class = "data.frame") @@ -243,9 +243,9 @@ get_column_abx <- function(x, if (info == TRUE && all_okay == TRUE) { message_("No columns found.") } - pkg_env$get_column_abx.call <- unique_call_id(entire_session = FALSE, match_fn = fn) - pkg_env$get_column_abx.checked_cols <- colnames(x.bak) - pkg_env$get_column_abx.out <- out + AMR_env$get_column_abx.call <- unique_call_id(entire_session = FALSE, match_fn = fn) + AMR_env$get_column_abx.checked_cols <- colnames(x.bak) + AMR_env$get_column_abx.out <- out return(out) } @@ -320,9 +320,9 @@ get_column_abx <- function(x, } } - pkg_env$get_column_abx.call <- unique_call_id(entire_session = FALSE, match_fn = fn) - pkg_env$get_column_abx.checked_cols <- colnames(x.bak) - pkg_env$get_column_abx.out <- out + AMR_env$get_column_abx.call <- unique_call_id(entire_session = FALSE, match_fn = fn) + AMR_env$get_column_abx.checked_cols <- colnames(x.bak) + AMR_env$get_column_abx.out <- out out } diff --git a/R/mdro.R b/R/mdro.R index 96e630b92..47dffbce5 100755 --- a/R/mdro.R +++ b/R/mdro.R @@ -1955,14 +1955,14 @@ run_custom_mdro_guideline <- function(df, guideline, info) { for (i in seq_len(n_dots)) { qry <- tryCatch(eval(parse(text = guideline[[i]]$query), envir = df, enclos = parent.frame()), error = function(e) { - pkg_env$err_msg <- e$message + AMR_env$err_msg <- e$message return("error") } ) if (identical(qry, "error")) { warning_("in `custom_mdro_guideline()`: rule ", i, " (`", as.character(guideline[[i]]$query), "`) was ignored because of this error message: ", - pkg_env$err_msg, + AMR_env$err_msg, call = FALSE, add_fn = font_red ) diff --git a/R/mo.R b/R/mo.R index f85d948a7..3afd84406 100755 --- a/R/mo.R +++ b/R/mo.R @@ -179,12 +179,14 @@ as.mo <- function(x, x <- replace_old_mo_codes(x, property = "mo") # ignore cases that match the ignore pattern x <- replace_ignore_pattern(x, ignore_pattern) - + + x_lower <- tolower(x) + # WHONET: xxx = no growth - x[tolower(x) %in% c("", "xxx", "na", "nan")] <- NA_character_ + x[x_lower %in% c("", "xxx", "na", "nan")] <- NA_character_ out <- rep(NA_character_, length(x)) - + # below we use base R's match(), known for powering '%in%', and incredibly fast! # From reference_df ---- @@ -195,9 +197,11 @@ as.mo <- function(x, # From MO code ---- out[is.na(out) & x %in% MO_lookup$mo] <- x[is.na(out) & x %in% MO_lookup$mo] # From full name ---- - out[is.na(out) & x %in% MO_lookup$fullname] <- MO_lookup$mo[match(x[is.na(out) & x %in% MO_lookup$fullname], MO_lookup$fullname)] + out[is.na(out) & x_lower %in% MO_lookup$fullname_lower] <- MO_lookup$mo[match(x_lower[is.na(out) & x_lower %in% MO_lookup$fullname_lower], MO_lookup$fullname_lower)] + # one exception: "Fungi" matches the kingdom, but instead it should return the 'unknown' code for fungi + out[out == "F_[KNG]_FUNGI"] <- "F_FUNGUS" # From known codes ---- - out[is.na(out) & x %in% AMR::microorganisms.codes$code] <- AMR::microorganisms.codes$mo[match(x[is.na(out) & x %in% AMR::microorganisms.codes$code], AMR::microorganisms.codes$code)] + out[is.na(out) & toupper(x) %in% AMR::microorganisms.codes$code] <- AMR::microorganisms.codes$mo[match(toupper(x)[is.na(out) & toupper(x) %in% AMR::microorganisms.codes$code], AMR::microorganisms.codes$code)] # From SNOMED ---- if (any(is.na(out) & !is.na(x)) && any(is.na(out) & x %in% unlist(microorganisms$snomed), na.rm = TRUE)) { # found this extremely fast gem here: https://stackoverflow.com/a/11002456/4575331 @@ -208,7 +212,7 @@ as.mo <- function(x, out[is.na(out)] <- convert_colloquial_input(x[is.na(out)]) # From previous hits in this session ---- old <- out - out[is.na(out) & paste(x, minimum_matching_score) %in% pkg_env$mo_previously_coerced$x] <- pkg_env$mo_previously_coerced$mo[match(paste(x, minimum_matching_score)[is.na(out) & paste(x, minimum_matching_score) %in% pkg_env$mo_previously_coerced$x], pkg_env$mo_previously_coerced$x)] + out[is.na(out) & paste(x, minimum_matching_score) %in% AMR_env$mo_previously_coerced$x] <- AMR_env$mo_previously_coerced$mo[match(paste(x, minimum_matching_score)[is.na(out) & paste(x, minimum_matching_score) %in% AMR_env$mo_previously_coerced$x], AMR_env$mo_previously_coerced$x)] new <- out if (isTRUE(info) && message_not_thrown_before("as.mo", old, new, entire_session = TRUE) && any(is.na(old) & !is.na(new), na.rm = TRUE)) { message_( @@ -220,8 +224,8 @@ as.mo <- function(x, # For all other input ---- if (any(is.na(out) & !is.na(x))) { # reset uncertainties - pkg_env$mo_uncertainties <- pkg_env$mo_uncertainties[0, ] - pkg_env$mo_failures <- NULL + AMR_env$mo_uncertainties <- AMR_env$mo_uncertainties[0, ] + AMR_env$mo_failures <- NULL # Laboratory systems: remove (translated) entries like "no growth", "not E. coli", etc. x[trimws2(x) %like% translate_into_language("no .*growth", language = language)] <- NA_character_ @@ -238,8 +242,6 @@ as.mo <- function(x, x_coerced <- vapply(FUN.VALUE = character(1), x_unique, function(x_search) { progress$tick() - print(x_search) - # some required cleaning steps x_out <- trimws2(x_search) # this applies the `remove_from_input` argument, which defaults to mo_cleaning_regex() @@ -248,7 +250,11 @@ as.mo <- function(x, x_search_cleaned <- x_out x_out <- tolower(x_out) - print(x_out) + # input must not be too short + if (nchar(x_out) < 3) { + return("UNKNOWN") + } + # take out the parts, split by space x_parts <- strsplit(gsub("-", " ", x_out, fixed = TRUE), " ", fixed = TRUE)[[1]] @@ -282,7 +288,7 @@ as.mo <- function(x, } else { mo_to_search <- MO_lookup$fullname[filtr] } - pkg_env$mo_to_search <- mo_to_search + AMR_env$mo_to_search <- mo_to_search # determine the matching score on the original search value m <- mo_matching_score(x = x_search_cleaned, n = mo_to_search) if (is.null(minimum_matching_score)) { @@ -302,20 +308,21 @@ as.mo <- function(x, result_mo <- NA_character_ } else { result_mo <- MO_lookup$mo[match(top_hits[1], MO_lookup$fullname)] - pkg_env$mo_uncertainties <- rbind(pkg_env$mo_uncertainties, + AMR_env$mo_uncertainties <- rbind(AMR_env$mo_uncertainties, data.frame( - minimum_matching_score = ifelse(is.null(minimum_matching_score), "NULL", minimum_matching_score), original_input = x_search, input = x_search_cleaned, fullname = top_hits[1], mo = result_mo, candidates = ifelse(length(top_hits) > 1, paste(top_hits[2:min(26, length(top_hits))], collapse = ", "), ""), + minimum_matching_score = ifelse(is.null(minimum_matching_score), "NULL", minimum_matching_score), + keep_synonyms = keep_synonyms, stringsAsFactors = FALSE ), stringsAsFactors = FALSE ) # save to package env to save time for next time - pkg_env$mo_previously_coerced <- unique(rbind(pkg_env$mo_previously_coerced, + AMR_env$mo_previously_coerced <- unique(rbind(AMR_env$mo_previously_coerced, data.frame( x = paste(x_search, minimum_matching_score), mo = result_mo, @@ -334,21 +341,21 @@ as.mo <- function(x, out[is.na(out)] <- x_coerced[match(x[is.na(out)], x_unique)] # Throw note about uncertainties ---- - if (isTRUE(info) && NROW(pkg_env$mo_uncertainties) > 0) { - if (message_not_thrown_before("as.mo", "uncertainties", pkg_env$mo_uncertainties$original_input)) { + if (isTRUE(info) && NROW(AMR_env$mo_uncertainties) > 0) { + if (message_not_thrown_before("as.mo", "uncertainties", AMR_env$mo_uncertainties$original_input)) { plural <- c("", "this") - if (length(pkg_env$mo_uncertainties$original_input) > 1) { + if (length(AMR_env$mo_uncertainties$original_input) > 1) { plural <- c("s", "these uncertainties") } - if (length(pkg_env$mo_uncertainties$original_input) <= 3) { + if (length(AMR_env$mo_uncertainties$original_input) <= 3) { examples <- vector_and(paste0( - '"', pkg_env$mo_uncertainties$original_input, - '" (assumed ', font_italic(pkg_env$mo_uncertainties$fullname, collapse = NULL), ")" + '"', AMR_env$mo_uncertainties$original_input, + '" (assumed ', font_italic(AMR_env$mo_uncertainties$fullname, collapse = NULL), ")" ), quotes = FALSE ) } else { - examples <- paste0(nr2char(length(pkg_env$mo_uncertainties$original_input)), " microorganism", plural[1]) + examples <- paste0(nr2char(length(AMR_env$mo_uncertainties$original_input)), " microorganism", plural[1]) } msg <- paste0( "Microorganism translation was uncertain for ", examples, @@ -364,18 +371,18 @@ as.mo <- function(x, gbif_matches[!gbif_matches %in% AMR::microorganisms$gbif] <- NA lpsn_matches <- AMR::microorganisms$lpsn_renamed_to[match(out, AMR::microorganisms$mo)] lpsn_matches[!lpsn_matches %in% AMR::microorganisms$lpsn] <- NA - pkg_env$mo_renamed <- list(old = out[!is.na(gbif_matches) | !is.na(lpsn_matches)], + AMR_env$mo_renamed <- list(old = out[!is.na(gbif_matches) | !is.na(lpsn_matches)], gbif_matches = gbif_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)], lpsn_matches = lpsn_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)]) if (isFALSE(keep_synonyms)) { out[which(!is.na(gbif_matches))] <- AMR::microorganisms$mo[match(gbif_matches[which(!is.na(gbif_matches))], AMR::microorganisms$gbif)] out[which(!is.na(lpsn_matches))] <- AMR::microorganisms$mo[match(lpsn_matches[which(!is.na(lpsn_matches))], AMR::microorganisms$lpsn)] - if (isTRUE(info) && length(pkg_env$mo_renamed$old) > 0) { + if (isTRUE(info) && length(AMR_env$mo_renamed$old) > 0) { print(mo_renamed(), extra_txt = " (use `keep_synonyms = TRUE` to leave uncorrected)") } - } else if (is.null(getOption("AMR_keep_synonyms")) && length(pkg_env$mo_renamed$old) > 0 && message_not_thrown_before("as.mo", "keep_synonyms_warning", entire_session = TRUE)) { + } else if (is.null(getOption("AMR_keep_synonyms")) && length(AMR_env$mo_renamed$old) > 0 && message_not_thrown_before("as.mo", "keep_synonyms_warning", entire_session = TRUE)) { # keep synonyms is TRUE, so check if any do have synonyms - warning_("Function `as.mo()` returned some old taxonomic names. Use `as.mo(..., keep_synonyms = FALSE)` to clean the input to currently accepted taxonomic names, or set the R option `AMR_keep_synonyms` to `FALSE`. This warning will be shown once per session.") + warning_("Function `as.mo()` returned ", nr2char(length(unique(AMR_env$mo_renamed$old))), " old taxonomic name", ifelse(length(unique(AMR_env$mo_renamed$old)) > 1, "s", ""), ". Use `as.mo(..., keep_synonyms = FALSE)` to clean the input to currently accepted taxonomic names, or set the R option `AMR_keep_synonyms` to `FALSE`. This warning will be shown once per session.") } # Apply Becker ---- @@ -432,7 +439,10 @@ as.mo <- function(x, # All unknowns ---- out[is.na(out) & !is.na(x)] <- "UNKNOWN" - pkg_env$mo_failures <- unique(x[out == "UNKNOWN" & x != "UNKNOWN" & !is.na(x)]) + AMR_env$mo_failures <- unique(x[out == "UNKNOWN" & x != "UNKNOWN" & !is.na(x)]) + if (length(AMR_env$mo_failures) > 0) { + warning_("The following input could not be coerced and was returned as \"UNKNOWN\": ", vector_and(AMR_env$mo_failures, quotes = TRUE), ".\nYou can retrieve this list with `mo_failures()`.") + } # Return class ---- set_clean_class(out, @@ -440,12 +450,73 @@ as.mo <- function(x, ) } +# OTHER DOCUMENTED FUNCTIONS ---------------------------------------------- + #' @rdname as.mo #' @export is.mo <- function(x) { inherits(x, "mo") } +#' @rdname as.mo +#' @export +mo_uncertainties <- function() { + set_clean_class(AMR_env$mo_uncertainties, new_class = c("mo_uncertainties", "data.frame")) +} + +#' @rdname as.mo +#' @export +mo_renamed <- function() { + x <- AMR_env$mo_renamed + + x$new <- synonym_mo_to_accepted_mo(x$old) + mo_old <- AMR::microorganisms$fullname[match(x$old, AMR::microorganisms$mo)] + mo_new <- AMR::microorganisms$fullname[match(x$new, AMR::microorganisms$mo)] + ref_old <- AMR::microorganisms$ref[match(x$old, AMR::microorganisms$mo)] + ref_new <- AMR::microorganisms$ref[match(x$new, AMR::microorganisms$mo)] + + df_renamed <- data.frame(old = mo_old, + new = mo_new, + ref_old = ref_old, + ref_new = ref_new, + stringsAsFactors = FALSE) + df_renamed <- unique(df_renamed) + df_renamed <- df_renamed[order(df_renamed$old), , drop = FALSE] + set_clean_class(df_renamed, new_class = c("mo_renamed", "data.frame")) +} + +#' @rdname as.mo +#' @export +mo_failures <- function() { + AMR_env$mo_failures +} + +#' @rdname as.mo +#' @export +mo_reset_session <- function() { + if (NROW(AMR_env$mo_previously_coerced) > 0) { + message_("Reset ", nr2char(NROW(AMR_env$mo_previously_coerced)), " previously matched input value", ifelse(NROW(AMR_env$mo_previously_coerced) > 1, "s", ""), ".") + AMR_env$mo_previously_coerced <- AMR_env$mo_previously_coerced[0, , drop = FALSE] + AMR_env$mo_uncertainties <- AMR_env$mo_uncertainties[0, , drop = FALSE] + } else { + message_("No previously matched input values to reset.") + } +} + +#' @rdname as.mo +#' @export +mo_cleaning_regex <- function() { + paste0( + "(", + "[^A-Za-z- \\(\\)\\[\\]{}]+", + "|", + "([({]|\\[).+([})]|\\])", + "|", + "(^| )(e?spp|e?ssp|e?ss|e?sp|e?subsp|sube?species|biovar|biotype|serovar|e?species)[.]*( |$))") +} + +# UNDOCUMENTED METHODS ---------------------------------------------------- + # will be exported using s3_register() in R/zzz.R pillar_shaft.mo <- function(x, ...) { out <- format(x) @@ -675,18 +746,6 @@ rep.mo <- function(x, ...) { y } -#' @rdname as.mo -#' @export -mo_failures <- function() { - pkg_env$mo_failures -} - -#' @rdname as.mo -#' @export -mo_uncertainties <- function() { - set_clean_class(pkg_env$mo_uncertainties, new_class = c("mo_uncertainties", "data.frame")) -} - #' @method print mo_uncertainties #' @export #' @noRd @@ -768,7 +827,13 @@ print.mo_uncertainties <- function(x, ...) { ), collapse = "\n" ), + # Add "Based on {input}" text if it differs from the original input ifelse(x[i, ]$original_input != x[i, ]$input, paste0(strrep(" ", nchar(x[i, ]$original_input) + 6), "Based on input \"", x[i, ]$input, "\""), ""), + # Add note if result was coerced to accepted taxonomic name + ifelse(x[i, ]$keep_synonyms == FALSE & x[i, ]$mo %in% AMR::microorganisms$mo[which(AMR::microorganisms$status == "synonym")], + paste0(strrep(" ", nchar(x[i, ]$original_input) + 6), + font_red(paste0("This old taxonomic name was converted to ", font_italic(AMR::microorganisms$fullname[match(synonym_mo_to_accepted_mo(x[i, ]$mo), AMR::microorganisms$mo)], collapse = NULL), " (", synonym_mo_to_accepted_mo(x[i, ]$mo), ")."), collapse = NULL)), + ""), candidates, sep = "\n" ) @@ -777,30 +842,6 @@ print.mo_uncertainties <- function(x, ...) { cat(txt) } - -#' @rdname as.mo -#' @export -mo_renamed <- function() { - x <- pkg_env$mo_renamed - - x$new <- ifelse(is.na(x$lpsn_matches), - AMR::microorganisms$mo[match(x$gbif_matches, AMR::microorganisms$gbif)], - AMR::microorganisms$mo[match(x$lpsn_matches, AMR::microorganisms$lpsn)]) - mo_old <- AMR::microorganisms$fullname[match(x$old, AMR::microorganisms$mo)] - mo_new <- AMR::microorganisms$fullname[match(x$new, AMR::microorganisms$mo)] - ref_old <- AMR::microorganisms$ref[match(x$old, AMR::microorganisms$mo)] - ref_new <- AMR::microorganisms$ref[match(x$new, AMR::microorganisms$mo)] - - df_renamed <- data.frame(old = mo_old, - new = mo_new, - ref_old = ref_old, - ref_new = ref_new, - stringsAsFactors = FALSE) - df_renamed <- unique(df_renamed) - df_renamed <- df_renamed[order(df_renamed$old), , drop = FALSE] - set_clean_class(df_renamed, new_class = c("mo_renamed", "data.frame")) -} - #' @method print mo_renamed #' @export #' @noRd @@ -812,6 +853,8 @@ print.mo_renamed <- function(x, extra_txt = "", n = 25, ...) { x$ref_old[!is.na(x$ref_old)] <- paste0(" (", gsub("et al.", font_italic("et al."), x$ref_old[!is.na(x$ref_old)], fixed = TRUE), ")") x$ref_new[!is.na(x$ref_new)] <- paste0(" (", gsub("et al.", font_italic("et al."), x$ref_new[!is.na(x$ref_new)], fixed = TRUE), ")") + x$ref_old[is.na(x$ref_old)] <- " (author unknown)" + x$ref_new[is.na(x$ref_new)] <- " (author unknown)" rows <- seq_len(min(NROW(x), n)) @@ -825,28 +868,57 @@ print.mo_renamed <- function(x, extra_txt = "", n = 25, ...) { ) } -#' @rdname as.mo -#' @export -mo_reset_session <- function() { - if (NROW(pkg_env$mo_previously_coerced) > 0) { - message_("Reset ", NROW(pkg_env$mo_previously_coerced), " previously matched input values.") - pkg_env$mo_previously_coerced <- pkg_env$mo_previously_coerced[0, , drop = FALSE] - pkg_env$mo_uncertainties <- pkg_env$mo_uncertainties[0, , drop = FALSE] - } else { - message_("No previously matched input values to reset.") - } -} +# UNDOCUMENTED HELPER FUNCTIONS ------------------------------------------- -#' @rdname as.mo -#' @export -mo_cleaning_regex <- function() { - paste0( - "(", - "[^A-Za-z- \\(\\)\\[\\]{}]+", - "|", - "([({]|\\[).+([})]|\\])", - "|", - "(^| )(e?spp|e?ssp|e?ss|e?sp|e?subsp|sube?species|biovar|biotype|serovar|e?species)( |$))") +convert_colloquial_input <- function(x) { + x.bak <- trimws2(x) + x <- trimws2(tolower(x)) + out <- rep(NA_character_, length(x)) + + # Streptococci, like GBS = Group B Streptococci (B_STRPT_GRPB) + out[x %like_case% "^g[abcdfghkl]s$"] <- gsub("g([abcdfghkl])s", + "B_STRPT_GRP\\U\\1", + x[x %like_case% "^g[abcdfghkl]s$"], + perl = TRUE) + # Streptococci in different languages, like "estreptococos grupo B" + out[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"] <- gsub(".*e?strepto[ck]o[ck].* ([abcdfghkl])$", + "B_STRPT_GRP\\U\\1", + x[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"], + perl = TRUE) + out[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"] <- gsub(".*group ([abcdfghkl]) strepto[ck]o[ck].*", + "B_STRPT_GRP\\U\\1", + x[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"], + perl = TRUE) + out[x %like_case% "ha?emoly.*strep"] <- "B_STRPT_HAEM" + out[x %like_case% "(strepto.* mil+er+i|^mgs[^a-z]*$)"] <- "B_STRPT_MILL" + out[x %like_case% "mil+er+i gr"] <- "B_STRPT_MILL" + out[x %like_case% "((strepto|^s).* viridans|^vgs[^a-z]*$)"] <- "B_STRPT_VIRI" + + # CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese) + out[x %like_case% "([ck]oagulas[ea].negatie?[vf]|^[ck]o?ns[^a-z]*$)"] <- "B_STPHY_CONS" + out[x %like_case% "([ck]oagulas[ea].positie?[vf]|^[ck]o?ps[^a-z]*$)"] <- "B_STPHY_COPS" + + # Gram stains + out[x %like_case% "gram[ -]?neg.*|negatie?[vf]"] <- "B_GRAMN" + out[x %like_case% "gram[ -]?pos.*|positie?[vf]"] <- "B_GRAMP" + + # yeasts and fungi + out[x %like_case% "^yeast?"] <- "F_YEAST" + out[x %like_case% "^fung(us|i)"] <- "F_FUNGUS" + + # Salmonella city names, starting with capital species name - they are all S. enterica + out[x.bak %like_case% "[sS]almonella [A-Z][a-z]+ ?.*" & x %unlike% "typhi"] <- "B_SLMNL_ENTR" + out[x %like_case% "salmonella group"] <- "B_SLMNL" + + # trivial names known to the field + out[x %like_case% "meningo[ck]o[ck]"] <- "B_NESSR_MNNG" + out[x %like_case% "gono[ck]o[ck]"] <- "B_NESSR_GNRR" + out[x %like_case% "pneumo[ck]o[ck]"] <- "B_STRPT_PNMN" + + # unexisting names (xxx and con are WHONET codes) + out[x %in% c("con", "other", "none", "unknown") | x %like_case% "virus"] <- "UNKNOWN" + + out } nr2char <- function(x) { @@ -861,17 +933,6 @@ nr2char <- function(x) { } } -get_mo_uncertainties <- function() { - remember <- list(uncertainties = pkg_env$mo_uncertainties) - # empty them, otherwise e.g. mo_shortname("Chlamydophila psittaci") will give 3 notes - pkg_env$mo_uncertainties <- NULL - remember -} - -load_mo_uncertainties <- function(metadata) { - pkg_env$mo_uncertainties <- metadata$uncertainties -} - parse_and_convert <- function(x) { if (tryCatch(is.character(x) && all(Encoding(x) == "unknown", na.rm = TRUE), error = function(e) FALSE)) { return(trimws2(x)) @@ -1008,51 +1069,24 @@ repair_reference_df <- function(reference_df) { reference_df } -convert_colloquial_input <- function(x) { - x.bak <- trimws2(x) - x <- trimws2(tolower(x)) - out <- rep(NA_character_, length(x)) - - # Streptococci, like GBS = Group B Streptococci (B_STRPT_GRPB) - out[x %like_case% "^g[abcdfghkl]s$"] <- gsub("g([abcdfghkl])s", - "B_STRPT_GRP\\U\\1", - x[x %like_case% "^g[abcdfghkl]s$"], - perl = TRUE) - # Streptococci in different languages, like "estreptococos grupo B" - out[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"] <- gsub(".*e?strepto[ck]o[ck].* ([abcdfghkl])$", - "B_STRPT_GRP\\U\\1", - x[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"], - perl = TRUE) - out[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"] <- gsub(".*group ([abcdfghkl]) strepto[ck]o[ck].*", - "B_STRPT_GRP\\U\\1", - x[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"], - perl = TRUE) - out[x %like_case% "ha?emoly.*strep"] <- "B_STRPT_HAEM" - out[x %like_case% "(strepto.* mil+er+i|^mgs[^a-z]*$)"] <- "B_STRPT_MILL" - out[x %like_case% "((strepto|^s).* viridans|^vgs[^a-z]*$)"] <- "B_STRPT_VIRI" - - # CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese) - out[x %like_case% "([ck]oagulas[ea].negatie?[vf]|^[ck]o?ns[^a-z]*$)"] <- "B_STPHY_CONS" - out[x %like_case% "([ck]oagulas[ea].positie?[vf]|^[ck]o?ps[^a-z]*$)"] <- "B_STPHY_COPS" - - # Gram stains - out[x %like_case% "gram[ -]?neg.*|negatie?[vf]"] <- "B_GRAMN" - out[x %like_case% "gram[ -]?pos.*|positie?[vf]"] <- "B_GRAMP" - - # yeasts and fungi - out[x %like_case% "^yeast?"] <- "F_YEAST" - out[x %like_case% "^fung(us|i)"] <- "F_FUNGUS" - - # Salmonella city names, starting with capital species name - they are all S. enterica - out[x.bak %like_case% "[sS]almonella [A-Z][a-z]+ ?.*" & x %unlike% "typhi"] <- "B_SLMNL_ENTR" - - # trivial names known to the field - out[x %like_case% "meningo[ck]o[ck]"] <- "B_NESSR_MNNG" - out[x %like_case% "gono[ck]o[ck]"] <- "B_NESSR_GNRR" - out[x %like_case% "pneumo[ck]o[ck]"] <- "B_STRPT_PNMN" - - # unexisting names (xxx and con are WHONET codes) - out[x %in% c("con", "other", "none", "unknown") | x %like_case% "virus"] <- "UNKNOWN" - - out +get_mo_uncertainties <- function() { + remember <- list(uncertainties = AMR_env$mo_uncertainties) + # empty them, otherwise e.g. mo_shortname("Chlamydophila psittaci") will give 3 notes + AMR_env$mo_uncertainties <- NULL + remember +} + +load_mo_uncertainties <- function(metadata) { + AMR_env$mo_uncertainties <- metadata$uncertainties +} + +synonym_mo_to_accepted_mo <- function(x) { + x_gbif <- AMR::microorganisms$gbif_renamed_to[match(x, AMR::microorganisms$mo)] + x_lpsn <- AMR::microorganisms$lpsn_renamed_to[match(x, AMR::microorganisms$mo)] + x_gbif[!x_gbif %in% AMR::microorganisms$gbif] <- NA + x_lpsn[!x_lpsn %in% AMR::microorganisms$lpsn] <- NA + + ifelse(is.na(x_lpsn), + AMR::microorganisms$mo[match(x_gbif, AMR::microorganisms$gbif)], + AMR::microorganisms$mo[match(x_lpsn, AMR::microorganisms$lpsn)]) } diff --git a/R/mo_matching_score.R b/R/mo_matching_score.R index ba339c1ee..f0b8671d6 100755 --- a/R/mo_matching_score.R +++ b/R/mo_matching_score.R @@ -79,7 +79,10 @@ mo_matching_score <- function(x, n) { # only keep one space x <- gsub(" +", " ", x) - + + # start with a capital letter + substr(x, 1, 1) <- toupper(substr(x, 1, 1)) + # n is always a taxonomically valid full name if (length(n) == 1) { n <- rep(n, length(x)) diff --git a/R/mo_property.R b/R/mo_property.R index f57371a69..4346b91bd 100755 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -33,13 +33,13 @@ #' @param x any [character] (vector) that can be coerced to a valid microorganism code with [as.mo()]. Can be left blank for auto-guessing the column containing microorganism codes if used in a data set, see *Examples*. #' @param property one of the column names of the [microorganisms] data set: `r vector_or(colnames(microorganisms), sort = FALSE, quotes = TRUE)`, or must be `"shortname"` #' @inheritParams as.mo -#' @param ... other arguments passed on to [as.mo()], such as 'allow_uncertain' and 'ignore_pattern' +#' @param ... other arguments passed on to [as.mo()], such as 'minimum_matching_score', 'ignore_pattern', and 'remove_from_input' #' @param ab any (vector of) text that can be coerced to a valid antibiotic code with [as.ab()] #' @param open browse the URL using [`browseURL()`][utils::browseURL()] #' @details All functions will, at default, keep old taxonomic properties. Please refer to this example, knowing that *Escherichia blattae* was renamed to *Shimwellia blattae* in 2010: #' - `mo_name("Escherichia blattae")` will return `"Shimwellia blattae"` (with a message about the renaming) -#' - `mo_ref("Escherichia blattae")` will return `"Burgess et al., 1973"` (with a message about the renaming) -#' - `mo_ref("Shimwellia blattae")` will return `"Priest et al., 2010"` (without a message) +#' - `mo_ref("Escherichia blattae", keep_synonyms = TRUE)` will return `"Burgess et al., 1973"` (with a warning about the renaming) +#' - `mo_ref("Shimwellia blattae", keep_synonyms = FALSE)` will return `"Priest et al., 2010"` (without a message) #' #' The short name - [mo_shortname()] - almost always returns the first character of the genus and the full species, like `"E. coli"`. Exceptions are abbreviations of staphylococci (such as *"CoNS"*, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (such as *"GBS"*, Group B Streptococci). Please bear in mind that e.g. *E. coli* could mean *Escherichia coli* (kingdom of Bacteria) as well as *Entamoeba coli* (kingdom of Protozoa). Returning to the full name will be done using [as.mo()] internally, giving priority to bacteria and human pathogens, i.e. `"E. coli"` will be considered *Escherichia coli*. In other words, `mo_fullname(mo_shortname("Entamoeba coli"))` returns `"Escherichia coli"`. #' @@ -504,7 +504,7 @@ mo_is_intrinsic_resistant <- function(x, ab, language = get_AMR_locale(), keep_s stop_("length of `x` and `ab` must be equal, or one of them must be of length 1.") } - # show used version number once per session (pkg_env will reload every session) + # show used version number once per session (AMR_env will reload every session) if (message_not_thrown_before("mo_is_intrinsic_resistant", "version.mo", entire_session = TRUE)) { message_( "Determining intrinsic resistance based on ", diff --git a/R/mo_source.R b/R/mo_source.R index 1b223ee13..609e97312 100644 --- a/R/mo_source.R +++ b/R/mo_source.R @@ -134,7 +134,7 @@ set_mo_source <- function(path, destination = getOption("AMR_mo_source", "~/mo_s stop_ifnot(interactive(), "this function can only be used in interactive mode, since it must ask for the user's permission to write a file to their file system.") if (is.null(path) || path %in% c(FALSE, "")) { - pkg_env$mo_source <- NULL + AMR_env$mo_source <- NULL if (file.exists(mo_source_destination)) { unlink(mo_source_destination) message_("Removed mo_source file '", font_bold(mo_source_destination), "'", @@ -227,7 +227,7 @@ set_mo_source <- function(path, destination = getOption("AMR_mo_source", "~/mo_s attr(df, "mo_source_destination") <- mo_source_destination attr(df, "mo_source_timestamp") <- file.mtime(path) saveRDS(df, mo_source_destination) - pkg_env$mo_source <- df + AMR_env$mo_source <- df message_( action, " mo_source file '", font_bold(mo_source_destination), "' (", formatted_filesize(mo_source_destination), @@ -247,24 +247,24 @@ get_mo_source <- function(destination = getOption("AMR_mo_source", "~/mo_source. } return(NULL) } - if (is.null(pkg_env$mo_source)) { - pkg_env$mo_source <- readRDS(path.expand(destination)) + if (is.null(AMR_env$mo_source)) { + AMR_env$mo_source <- readRDS(path.expand(destination)) } - old_time <- attributes(pkg_env$mo_source)$mo_source_timestamp - new_time <- file.mtime(attributes(pkg_env$mo_source)$mo_source_location) + old_time <- attributes(AMR_env$mo_source)$mo_source_timestamp + new_time <- file.mtime(attributes(AMR_env$mo_source)$mo_source_location) if (interactive() && !identical(old_time, new_time)) { # source file was updated, also update reference - set_mo_source(attributes(pkg_env$mo_source)$mo_source_location) + set_mo_source(attributes(AMR_env$mo_source)$mo_source_location) } - pkg_env$mo_source + AMR_env$mo_source } check_validity_mo_source <- function(x, refer_to_name = "`reference_df`", stop_on_error = TRUE) { if (paste(deparse(substitute(x)), collapse = "") == "get_mo_source()") { return(TRUE) } - if (is.null(pkg_env$mo_source) && (identical(x, get_mo_source()))) { + if (is.null(AMR_env$mo_source) && (identical(x, get_mo_source()))) { return(TRUE) } if (is.null(x)) { diff --git a/R/rsi.R b/R/rsi.R index bb60ea9a0..074671338 100755 --- a/R/rsi.R +++ b/R/rsi.R @@ -906,8 +906,8 @@ as_rsi_method <- function(method_short, } # write to verbose output - pkg_env$rsi_interpretation_history <- rbind( - pkg_env$rsi_interpretation_history, + AMR_env$rsi_interpretation_history <- rbind( + AMR_env$rsi_interpretation_history, data.frame( datetime = Sys.time(), index = i, @@ -964,7 +964,7 @@ as_rsi_method <- function(method_short, rsi_interpretation_history <- function(clean = FALSE) { meet_criteria(clean, allow_class = "logical", has_length = 1) - out.bak <- pkg_env$rsi_interpretation_history + out.bak <- AMR_env$rsi_interpretation_history out <- out.bak if (NROW(out) == 0) { message_("No results to return. Run `as.rsi()` on MIC values or disk diffusion zones first to see a 'logbook' data set here.") @@ -975,9 +975,9 @@ rsi_interpretation_history <- function(clean = FALSE) { out$interpretation <- as.rsi(out$interpretation) # keep stored for next use if (isTRUE(clean)) { - pkg_env$rsi_interpretation_history <- pkg_env$rsi_interpretation_history[0, , drop = FALSE] + AMR_env$rsi_interpretation_history <- AMR_env$rsi_interpretation_history[0, , drop = FALSE] } else { - pkg_env$rsi_interpretation_history <- out.bak + AMR_env$rsi_interpretation_history <- out.bak } if (pkg_is_available("tibble", also_load = FALSE)) { diff --git a/R/zzz.R b/R/zzz.R index 6014c6ca3..d821db8ee 100755 --- a/R/zzz.R +++ b/R/zzz.R @@ -28,23 +28,24 @@ # ==================================================================== # # set up package environment, used by numerous AMR functions -pkg_env <- new.env(hash = FALSE) -pkg_env$mo_uncertainties <- data.frame( - uncertainty = integer(0), +AMR_env <- new.env(hash = FALSE) +AMR_env$mo_uncertainties <- data.frame( original_input = character(0), input = character(0), fullname = character(0), mo = character(0), candidates = character(0), + minimum_matching_score = integer(0), + keep_synonyms = logical(0), stringsAsFactors = FALSE ) -pkg_env$mo_renamed <- list() -pkg_env$mo_previously_coerced <- data.frame( +AMR_env$mo_renamed <- list() +AMR_env$mo_previously_coerced <- data.frame( x = character(0), mo = character(0), stringsAsFactors = FALSE ) -pkg_env$rsi_interpretation_history <- data.frame( +AMR_env$rsi_interpretation_history <- data.frame( datetime = Sys.time()[0], index = integer(0), ab_input = character(0), @@ -60,7 +61,7 @@ pkg_env$rsi_interpretation_history <- data.frame( interpretation = character(0), stringsAsFactors = FALSE ) -pkg_env$has_data.table <- pkg_is_available("data.table", also_load = FALSE) +AMR_env$has_data.table <- pkg_is_available("data.table", also_load = FALSE) # determine info icon for messages utf8_supported <- isTRUE(base::l10n_info()$`UTF-8`) @@ -69,9 +70,9 @@ is_latex <- tryCatch(import_fn("is_latex_output", "knitr", error_on_fail = FALSE ) if (utf8_supported && !is_latex) { # \u2139 is a symbol officially named 'information source' - pkg_env$info_icon <- "\u2139" + AMR_env$info_icon <- "\u2139" } else { - pkg_env$info_icon <- "i" + AMR_env$info_icon <- "i" } .onLoad <- function(lib, pkg) { diff --git a/data-raw/read_EUCAST.R b/data-raw/read_EUCAST.R index 70dee5f3a..1ec667263 100644 --- a/data-raw/read_EUCAST.R +++ b/data-raw/read_EUCAST.R @@ -109,7 +109,7 @@ read_EUCAST <- function(sheet, file, guideline_name) { for (i in seq_len(length(x))) { y <- trimws2(unlist(strsplit(x[i], "(,|and)"))) y <- trimws2(gsub("[(].*[)]", "", y)) - y <- suppressWarnings(suppressMessages(as.mo(y, allow_uncertain = FALSE))) + y <- suppressWarnings(suppressMessages(as.mo(y))) if (!is.null(mo_uncertainties())) uncertainties <<- add_uncertainties(uncertainties, mo_uncertainties()) y <- y[!is.na(y) & y != "UNKNOWN"] x[i] <- paste(y, collapse = "|") diff --git a/data/microorganisms.codes.rda b/data/microorganisms.codes.rda index e8435c653..4164a79ac 100644 Binary files a/data/microorganisms.codes.rda and b/data/microorganisms.codes.rda differ diff --git a/inst/tinytest/test-mo.R b/inst/tinytest/test-mo.R index 7106ada4f..7cb766f87 100644 --- a/inst/tinytest/test-mo.R +++ b/inst/tinytest/test-mo.R @@ -40,7 +40,7 @@ expect_equal(as.character(as.mo("Escherichia coli")), "B_ESCHR_COLI") expect_equal(as.character(as.mo(112283007)), "B_ESCHR_COLI") expect_equal(as.character(as.mo("Escherichia species")), "B_ESCHR") expect_equal(as.character(as.mo("Escherichia")), "B_ESCHR") -expect_equal(as.character(as.mo("Esch spp.")), "B_ESCHR") +expect_equal(as.character(as.mo("Eschr spp.")), "B_ESCHR") expect_equal(as.character(as.mo(" B_ESCHR_COLI ")), "B_ESCHR_COLI") expect_equal(as.character(as.mo("e coli")), "B_ESCHR_COLI") # not Campylobacter expect_equal(as.character(as.mo("klpn")), "B_KLBSL_PNMN") @@ -53,9 +53,8 @@ expect_equal(as.character(as.mo("Strepto")), "B_STRPT") expect_equal(as.character(as.mo("Streptococcus")), "B_STRPT") # not Peptostreptoccus expect_equal(as.character(as.mo("Estreptococos grupo B")), "B_STRPT_GRPB") expect_equal(as.character(as.mo("Group B Streptococci")), "B_STRPT_GRPB") -expect_equal(as.character(as.mo(c("mycobacterie", "mycobakterium"))), c("B_MYCBC", "B_MYCBC")) -expect_equal(as.character(as.mo(c("GAS", "GBS", "a MGS", "haemoly strep"))), c("B_STRPT_GRPA", "B_STRPT_GRPB", "B_STRPT_MILL", "B_STRPT_HAEM")) +expect_equal(as.character(as.mo(c("GAS", "GBS", "haemoly strep"))), c("B_STRPT_GRPA", "B_STRPT_GRPB", "B_STRPT_HAEM")) expect_equal(as.character(as.mo("S. pyo")), "B_STRPT_PYGN") # not Actinomyces pyogenes @@ -90,14 +89,13 @@ expect_identical( "staaur", "S. aureus", "S aureus", - "Sthafilokkockus aureeuzz", + "Sthafilokkockus aureus", "Staphylococcus aureus", "MRSA", - "VISA", - "meth.-resis. S. aureus (MRSA)" - )) + "VISA" + ), minimum_matching_score = 0) )), - rep("B_STPHY_AURS", 10) + rep("B_STPHY_AURS", 9) ) expect_identical( as.character( @@ -148,8 +146,8 @@ expect_identical(as.character(as.mo("STCPYO", Lancefield = TRUE)), "B_STRPT_GRPA expect_identical(as.character(as.mo("S. agalactiae", Lancefield = FALSE)), "B_STRPT_AGLC") expect_identical(as.character(as.mo("S. agalactiae", Lancefield = TRUE)), "B_STRPT_GRPB") # group B expect_identical(as.character(suppressWarnings(as.mo("estreptococos grupo B"))), "B_STRPT_GRPB") -expect_identical(as.character(as.mo("S. equisimilis", Lancefield = FALSE)), "B_STRPT_DYSG_EQSM") -expect_identical(as.character(as.mo("S. equisimilis", Lancefield = TRUE)), "B_STRPT_GRPC") # group C +expect_identical(as.character(as.mo("S. equi", Lancefield = FALSE)), "B_STRPT_EQUI") +expect_identical(as.character(as.mo("S. equi", Lancefield = TRUE)), "B_STRPT_GRPC") # group C # Enterococci must only be influenced if Lancefield = "all" expect_identical(as.character(as.mo("E. faecium", Lancefield = FALSE)), "B_ENTRC_FACM") expect_identical(as.character(as.mo("E. faecium", Lancefield = TRUE)), "B_ENTRC_FACM") @@ -213,19 +211,17 @@ expect_equal( # check empty values expect_equal( - as.character(suppressWarnings(as.mo(""))), + as.character(as.mo("")), NA_character_ ) # check less prevalent MOs -expect_equal(as.character(as.mo("Gomphosphaeria aponina delicatula")), "B_GMPHS_APNN_DLCT") -expect_equal(as.character(as.mo("Gomphosphaeria apo del")), "B_GMPHS_APNN_DLCT") -expect_equal(as.character(as.mo("G apo deli")), "B_GMPHS_APNN_DLCT") -expect_equal(as.character(as.mo("Gomphosphaeria aponina")), "B_GMPHS_APNN") -expect_equal(as.character(as.mo("Gomphosphaeria species")), "B_GMPHS") -expect_equal(as.character(as.mo("Gomphosphaeria")), "B_GMPHS") -expect_equal(as.character(as.mo(" B_GMPHS_APNN ")), "B_GMPHS_APNN") -expect_equal(as.character(as.mo("g aponina")), "B_GMPHS_APNN") +expect_equal(as.character(as.mo("Actinosynnema pretiosum auranticum")), "B_ANNMA_PRTS_ARNT") +expect_equal(as.character(as.mo("Actinosynnema preti aura")), "B_ANNMA_PRTS_ARNT") +expect_equal(as.character(as.mo("A pre aur")), "B_ANNMA_PRTS_ARNT") +expect_equal(as.character(as.mo("Actinosynnema pretiosum")), "B_ANNMA_PRTS") +expect_equal(as.character(as.mo("Actinosynnema")), "B_ANNMA") +expect_equal(as.character(as.mo(" B_ANNMA_PRTS ")), "B_ANNMA_PRTS") # check old names expect_equal(suppressMessages(as.character(as.mo("Escherichia blattae"))), "B_SHMWL_BLTT") @@ -250,7 +246,7 @@ expect_error(as.mo("E. coli", reference_df = data.frame(mycol = "TestingOwnID")) # combination of existing mo and other code expect_identical( - as.character(as.mo(c("B_ESCHR_COL", "ESCCOL"))), + suppressWarnings(as.character(as.mo(c("B_ESCHR_COL", "ESCCOL")))), c("B_ESCHR_COLI", "B_ESCHR_COLI") ) @@ -274,7 +270,7 @@ expect_equal( c("B_MCRBC_PRXY", "B_STRPT_SUIS", "B_RLTLL_TRRG") ) expect_stdout(print(mo_uncertainties())) -x <- as.mo("S. aur") +x <- as.mo("Sta. aur") # many hits expect_stdout(print(mo_uncertainties())) diff --git a/man/as.mo.Rd b/man/as.mo.Rd index 4e8f4b1e1..2c9d65360 100644 --- a/man/as.mo.Rd +++ b/man/as.mo.Rd @@ -4,9 +4,9 @@ \alias{as.mo} \alias{mo} \alias{is.mo} -\alias{mo_failures} \alias{mo_uncertainties} \alias{mo_renamed} +\alias{mo_failures} \alias{mo_reset_session} \alias{mo_cleaning_regex} \title{Transform Input to a Microorganism Code} @@ -27,12 +27,12 @@ as.mo( is.mo(x) -mo_failures() - mo_uncertainties() mo_renamed() +mo_failures() + mo_reset_session() mo_cleaning_regex() diff --git a/man/microorganisms.codes.Rd b/man/microorganisms.codes.Rd index 5643c9b33..c9c460a77 100644 --- a/man/microorganisms.codes.Rd +++ b/man/microorganisms.codes.Rd @@ -3,9 +3,9 @@ \docType{data} \name{microorganisms.codes} \alias{microorganisms.codes} -\title{Data Set with 5,508 Common Microorganism Codes} +\title{Data Set with 5,411 Common Microorganism Codes} \format{ -A \link[tibble:tibble]{tibble} with 5,508 observations and 2 variables: +A \link[tibble:tibble]{tibble} with 5,411 observations and 2 variables: \itemize{ \item \code{code}\cr Commonly used code of a microorganism \item \code{mo}\cr ID of the microorganism in the \link{microorganisms} data set diff --git a/man/mo_property.Rd b/man/mo_property.Rd index 8adba27b1..0e107686f 100644 --- a/man/mo_property.Rd +++ b/man/mo_property.Rd @@ -261,7 +261,7 @@ mo_property( \item{keep_synonyms}{a \link{logical} to indicate if old, previously valid taxonomic names must be preserved and not be corrected to currently accepted names. The default is \code{FALSE}, which will return a note if old taxonomic names were processed. The default can be set with \code{options(AMR_keep_synonyms = TRUE)} or \code{options(AMR_keep_synonyms = FALSE)}.} -\item{...}{other arguments passed on to \code{\link[=as.mo]{as.mo()}}, such as 'allow_uncertain' and 'ignore_pattern'} +\item{...}{other arguments passed on to \code{\link[=as.mo]{as.mo()}}, such as 'minimum_matching_score', 'ignore_pattern', and 'remove_from_input'} \item{ab}{any (vector of) text that can be coerced to a valid antibiotic code with \code{\link[=as.ab]{as.ab()}}} @@ -285,8 +285,8 @@ Use these functions to return a specific property of a microorganism based on th All functions will, at default, keep old taxonomic properties. Please refer to this example, knowing that \emph{Escherichia blattae} was renamed to \emph{Shimwellia blattae} in 2010: \itemize{ \item \code{mo_name("Escherichia blattae")} will return \code{"Shimwellia blattae"} (with a message about the renaming) -\item \code{mo_ref("Escherichia blattae")} will return \code{"Burgess et al., 1973"} (with a message about the renaming) -\item \code{mo_ref("Shimwellia blattae")} will return \code{"Priest et al., 2010"} (without a message) +\item \code{mo_ref("Escherichia blattae", keep_synonyms = TRUE)} will return \code{"Burgess et al., 1973"} (with a warning about the renaming) +\item \code{mo_ref("Shimwellia blattae", keep_synonyms = FALSE)} will return \code{"Priest et al., 2010"} (without a message) } The short name - \code{\link[=mo_shortname]{mo_shortname()}} - almost always returns the first character of the genus and the full species, like \code{"E. coli"}. Exceptions are abbreviations of staphylococci (such as \emph{"CoNS"}, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (such as \emph{"GBS"}, Group B Streptococci). Please bear in mind that e.g. \emph{E. coli} could mean \emph{Escherichia coli} (kingdom of Bacteria) as well as \emph{Entamoeba coli} (kingdom of Protozoa). Returning to the full name will be done using \code{\link[=as.mo]{as.mo()}} internally, giving priority to bacteria and human pathogens, i.e. \code{"E. coli"} will be considered \emph{Escherichia coli}. In other words, \code{mo_fullname(mo_shortname("Entamoeba coli"))} returns \code{"Escherichia coli"}.