diff --git a/DESCRIPTION b/DESCRIPTION index 1a1dc9b2e..fafe048e6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 1.8.2.9023 -Date: 2022-10-02 +Version: 1.8.2.9024 +Date: 2022-10-03 Title: Antimicrobial Resistance Data Analysis Description: Functions to simplify and standardise antimicrobial resistance (AMR) data analysis and to work with microbial and antimicrobial properties by @@ -35,6 +35,7 @@ Enhances: tidyselect Suggests: curl, + data.table, dplyr, knitr, progress, diff --git a/NEWS.md b/NEWS.md index 35b32b55b..c3e764866 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# AMR 1.8.2.9023 +# AMR 1.8.2.9024 This version will eventually become v2.0! We're happy to reach a new major milestone soon! diff --git a/R/aa_globals.R b/R/aa_globals.R index b857bfbaf..750622d82 100755 --- a/R/aa_globals.R +++ b/R/aa_globals.R @@ -72,7 +72,7 @@ TAXONOMY_VERSION <- list( ), LPSN = list( accessed_date = as.Date("2022-09-12"), - citation = "Parte, A.C. *et al.* (2020). **List of Prokaryotic names with Standing in Nomenclature (LPSN) moves to the DSMZ.** International Journal of Systematic and Evolutionary Microbiology, 70, 5607-5612; \\doi{10.1099/ijsem.0.004332}.", + citation = "Parte, AC *et al.* (2020). **List of Prokaryotic names with Standing in Nomenclature (LPSN) moves to the DSMZ.** International Journal of Systematic and Evolutionary Microbiology, 70, 5607-5612; \\doi{10.1099/ijsem.0.004332}.", url = "https://lpsn.dsmz.de" ), SNOMED = list( diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R index a95b890d3..c6856b8f3 100755 --- a/R/aa_helper_functions.R +++ b/R/aa_helper_functions.R @@ -989,7 +989,7 @@ unique_call_id <- function(entire_session = FALSE, match_fn = NULL) { message_not_thrown_before <- function(fn, ..., entire_session = FALSE) { # this is to prevent that messages/notes will be printed for every dplyr group or more than once per session # e.g. this would show a msg 4 times: example_isolates %>% group_by(ward) %>% filter(mo_is_gram_negative()) - salt <- gsub("[^a-zA-Z0-9|_-]", "?", paste(c(...)[seq_len(min(50, length(c(...))))], sep = "|", collapse = "|"), perl = TRUE) + salt <- gsub("[^a-zA-Z0-9|_-]", "?", substr(paste(c(...), sep = "|", collapse = "|"), 1, 512), perl = TRUE) not_thrown_before <- is.null(pkg_env[[paste0("thrown_msg.", fn, ".", salt)]]) || !identical( pkg_env[[paste0("thrown_msg.", fn, ".", salt)]], @@ -1361,6 +1361,31 @@ time_track <- function(name = NULL) { paste("(until now:", trimws(round(as.double(Sys.time()) * 1000) - pkg_env$time_start), "ms)") } +trimws2 <- function(..., whitespace = "[\u0009\u000A\u000B\u000C\u000D\u0020\u0085\u00A0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u200C\u200D\u2028\u2029\u202F\u205F\u2060\u3000\uFEFF]") { + # this is even faster than trimws() itself which sets " \t\n\r". + trimws(..., whitespace = whitespace) +} + + +# Faster data.table implementations ---- + +match <- function(x, ...) { + if (isTRUE(pkg_env$has_data.table) && is.character(x)) { + # data.table::chmatch() is 35% faster than base::match() for character + getExportedValue(name = "chmatch", ns = asNamespace("data.table"))(x, ...) + } else { + base::match(x, ...) + } +} +`%in%` <- function(x, ...) { + if (isTRUE(pkg_env$has_data.table) && is.character(x)) { + # data.table::`%chin%`() is 20% faster than base::`%in%`() for character + getExportedValue(name = "%chin%", ns = asNamespace("data.table"))(x, ...) + } else { + base::`%in%`(x, ...) + } +} + # nolint start # Register S3 methods ---- diff --git a/R/ab_from_text.R b/R/ab_from_text.R index 35a48cc0c..9949c9ea6 100644 --- a/R/ab_from_text.R +++ b/R/ab_from_text.R @@ -114,7 +114,7 @@ ab_from_text <- function(text, meet_criteria(thorough_search, allow_class = "logical", has_length = 1, allow_NULL = TRUE) meet_criteria(info, allow_class = "logical", has_length = 1) - type <- tolower(trimws(type)) + type <- tolower(trimws2(type)) text <- tolower(as.character(text)) text_split_all <- strsplit(text, "[ ;.,:\\|]") diff --git a/R/ab_selectors.R b/R/ab_selectors.R index ea639b23b..3324b8717 100644 --- a/R/ab_selectors.R +++ b/R/ab_selectors.R @@ -766,12 +766,12 @@ any.ab_selector_any_all <- function(..., na.rm = FALSE) { } is_any <- function(el1) { - syscalls <- paste0(trimws(deparse(sys.calls())), collapse = " ") + syscalls <- paste0(trimws2(deparse(sys.calls())), collapse = " ") el1 <- gsub("(.*),.*", "\\1", el1) syscalls %like% paste0("[^_a-zA-Z0-9]any\\(", "(c\\()?", el1) } is_all <- function(el1) { - syscalls <- paste0(trimws(deparse(sys.calls())), collapse = " ") + syscalls <- paste0(trimws2(deparse(sys.calls())), collapse = " ") el1 <- gsub("(.*),.*", "\\1", el1) syscalls %like% paste0("[^_a-zA-Z0-9]all\\(", "(c\\()?", el1) } diff --git a/R/disk.R b/R/disk.R index 004d76c99..463522c48 100644 --- a/R/disk.R +++ b/R/disk.R @@ -76,7 +76,7 @@ as.disk <- function(x, na.rm = FALSE) { if (na.rm == TRUE) { x <- x[!is.na(x)] } - x[trimws(x) == ""] <- NA + x[trimws2(x) == ""] <- NA x.bak <- x na_before <- length(x[is.na(x)]) diff --git a/R/eucast_rules.R b/R/eucast_rules.R index e59e20c5c..93d01b91c 100755 --- a/R/eucast_rules.R +++ b/R/eucast_rules.R @@ -332,7 +332,7 @@ eucast_rules <- function(x, x <- x %pm>% strsplit(",") %pm>% unlist() %pm>% - trimws() %pm>% + trimws2() %pm>% vapply(FUN.VALUE = character(1), function(x) if (x %in% AMR::antibiotics$ab) ab_name(x, language = NULL, tolower = TRUE, fast_mode = TRUE) else x) %pm>% sort() %pm>% paste(collapse = ", ") @@ -343,8 +343,8 @@ eucast_rules <- function(x, x } format_antibiotic_names <- function(ab_names, ab_results) { - ab_names <- trimws(unlist(strsplit(ab_names, ","))) - ab_results <- trimws(unlist(strsplit(ab_results, ","))) + ab_names <- trimws2(unlist(strsplit(ab_names, ","))) + ab_results <- trimws2(unlist(strsplit(ab_results, ","))) if (length(ab_results) == 1) { if (length(ab_names) == 1) { # like FOX S diff --git a/R/guess_ab_col.R b/R/guess_ab_col.R index 21074a6f8..9185496a9 100755 --- a/R/guess_ab_col.R +++ b/R/guess_ab_col.R @@ -329,7 +329,7 @@ get_column_abx <- function(x, get_ab_from_namespace <- function(x, cols_ab) { # cols_ab comes from get_column_abx() - x <- trimws(unique(toupper(unlist(strsplit(x, ",", fixed = TRUE))))) + x <- trimws2(unique(toupper(unlist(strsplit(x, ",", fixed = TRUE))))) x_new <- character() for (val in x) { if (paste0("AB_", val) %in% ls(envir = asNamespace("AMR"))) { diff --git a/R/mic.R b/R/mic.R index 806f18212..1f6607085 100755 --- a/R/mic.R +++ b/R/mic.R @@ -179,7 +179,7 @@ as.mic <- function(x, na.rm = FALSE) { if (na.rm == TRUE) { x <- x[!is.na(x)] } - x[trimws(x) == ""] <- NA + x[trimws2(x) == ""] <- NA x.bak <- x # comma to period @@ -214,7 +214,7 @@ as.mic <- function(x, na.rm = FALSE) { # never end with dot x <- gsub("[.]$", "", x, perl = TRUE) # trim it - x <- trimws(x) + x <- trimws2(x) ## previously unempty values now empty - should return a warning later on x[x.bak != "" & x == ""] <- "invalid" diff --git a/R/mo.R b/R/mo.R index 02034217e..734fdff02 100755 --- a/R/mo.R +++ b/R/mo.R @@ -31,15 +31,15 @@ #' #' Use this function to determine a valid microorganism code ([`mo`]). Determination is done using intelligent rules and the complete taxonomic kingdoms `r vector_and(unique(microorganisms$kingdom[which(!grepl("(unknown|Fungi)", microorganisms$kingdom))]), quotes = FALSE)`, and most microbial species from the kingdom Fungi (see *Source*). The input can be almost anything: a full name (like `"Staphylococcus aureus"`), an abbreviated name (such as `"S. aureus"`), an abbreviation known in the field (such as `"MRSA"`), or just a genus. See *Examples*. #' @param x a [character] vector or a [data.frame] with one or two columns -#' @param Becker a [logical] to indicate whether staphylococci should be categorised into coagulase-negative staphylococci ("CoNS") and coagulase-positive staphylococci ("CoPS") instead of their own species, according to Karsten Becker *et al.* (1,2,3). +#' @param Becker a [logical] to indicate whether staphylococci should be categorised into coagulase-negative staphylococci ("CoNS") and coagulase-positive staphylococci ("CoPS") instead of their own species, according to Karsten Becker *et al.* (see Source). #' #' This excludes *Staphylococcus aureus* at default, use `Becker = "all"` to also categorise *S. aureus* as "CoPS". -#' @param Lancefield a [logical] to indicate whether a beta-haemolytic *Streptococcus* should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield (4). These streptococci will be categorised in their first group, e.g. *Streptococcus dysgalactiae* will be group C, although officially it was also categorised into groups G and L. +#' @param Lancefield a [logical] to indicate whether a beta-haemolytic *Streptococcus* should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield (see Source). These streptococci will be categorised in their first group, e.g. *Streptococcus dysgalactiae* will be group C, although officially it was also categorised into groups G and L. #' #' This excludes enterococci at default (who are in group D), use `Lancefield = "all"` to also categorise all enterococci as group D. #' @param minimum_matching_score a numeric value to set as the lower limit for the [MO matching score][mo_matching_score()]. When left blank, this will be determined automatically based on the character length of `x`, its [taxonomic kingdom][microorganisms] and [human pathogenicity][mo_matching_score()]. #' @param allow_uncertain a number between `0` (or `"none"`) and `3` (or `"all"`), or `TRUE` (= `2`) or `FALSE` (= `0`) to indicate whether the input should be checked for less probable results, see *Details* -#' @param keep_synonyms a [logical] to indicate if old, previously valid taxonomic names must be preserved and not be corrected to currently accepted names. The default is `TRUE`, which will return a note if old taxonomic names are returned. The default can be set with `options(AMR_keep_synonyms = ...)`. +#' @param keep_synonyms a [logical] to indicate if old, previously valid taxonomic names must be preserved and not be corrected to currently accepted names. The default is `FALSE`, which will return a note if old taxonomic names were processed. The default can be set with `options(AMR_keep_synonyms = TRUE)` or `options(AMR_keep_synonyms = FALSE)`. #' @param reference_df a [data.frame] to be used for extra reference when translating `x` to a valid [`mo`]. See [set_mo_source()] and [get_mo_source()] to automate the usage of your own codes (e.g. used in your analysis or organisation). #' @param ignore_pattern a regular expression (case-insensitive) of which all matches in `x` must return `NA`. This can be convenient to exclude known non-relevant input and can also be set with the option `AMR_ignore_pattern`, e.g. `options(AMR_ignore_pattern = "(not reported|contaminated flora)")`. #' @param language language to translate text like "no growth", which defaults to the system language (see [get_AMR_locale()]) @@ -83,18 +83,16 @@ #' #' ### Coping with Uncertain Results #' -#' In addition, the [as.mo()] function can differentiate four levels of uncertainty to guess valid results: -#' - Uncertainty level 0: no additional rules are applied; -#' - Uncertainty level 1: allow previously accepted (but now invalid) taxonomic names and minor spelling errors; -#' - Uncertainty level 2: allow all of level 1, strip values between brackets, inverse the words of the input, strip off text elements from the end keeping at least two elements; -#' - Uncertainty level 3: allow all of level 1 and 2, strip off text elements from the end, allow any part of a taxonomic name. +#' Users can control the coercion rules by setting the `allow_uncertain` argument in the [as.mo()] function. The following values or levels can be used: +#' +#' - `0`: no additional rules are applied; +#' - `1`: allow previously accepted (but now invalid) taxonomic names +#' - `2`: allow all of `1`, strip values between brackets, inverse the words of the input, strip off text elements from the end keeping at least two elements; +#' - `3`: allow all of level `1` and `2`, strip off text elements from the end, allow any part of a taxonomic name; +#' - `TRUE` (default): equivalent to `2`; +#' - `FALSE`: equivalent to `0``. #' -#' The level of uncertainty can be set using the argument `allow_uncertain`. The default is `allow_uncertain = TRUE`, which is equal to uncertainty level 2. Using `allow_uncertain = FALSE` is equal to uncertainty level 0 and will skip all rules. You can also use e.g. `as.mo(..., allow_uncertain = 1)` to only allow up to level 1 uncertainty. -#' -#' With the default setting (`allow_uncertain = TRUE`, level 2), below examples will lead to valid results: -#' - `"Streptococcus group B (known as S. agalactiae)"`. The text between brackets will be removed and a warning will be thrown that the result *Streptococcus group B* (`B_STRPT_GRPB`) needs review. -#' - `"S. aureus - please mind: MRSA"`. The last word will be stripped, after which the function will try to find a match. If it does not, the second last word will be stripped, etc. Again, a warning will be thrown that the result *Staphylococcus aureus* (`B_STPHY_AURS`) needs review. -#' - `"Fluoroquinolone-resistant Neisseria gonorrhoeae"`. The first word will be stripped, after which the function will try to find a match. A warning will be thrown that the result *Neisseria gonorrhoeae* (`B_NESSR_GNRR`) needs review. +#' The default is `allow_uncertain = TRUE`, which is equal to uncertainty level 2. Using `allow_uncertain = FALSE` is equal to uncertainty level 0 and will skip all rules. You can also use e.g. `as.mo(..., allow_uncertain = 1)` to only allow up to level 1 uncertainty. #' #' There are three helper functions that can be run after using the [as.mo()] function: #' - Use [mo_uncertainties()] to get a [data.frame] that prints in a pretty format with all taxonomic names that were guessed. The output contains the matching score for all matches (see *Matching Score for Microorganisms* below). @@ -105,16 +103,18 @@ #' #' The coercion rules consider the prevalence of microorganisms in humans grouped into three groups, which is available as the `prevalence` columns in the [microorganisms] data set. The grouping into human pathogenic prevalence is explained in the section *Matching Score for Microorganisms* below. #' @inheritSection mo_matching_score Matching Score for Microorganisms -# (source as a section here, so it can be inherited by other man pages:) +#' +# (source as a section here, so it can be inherited by other man pages) #' @section Source: -#' 1. Becker K. *et al.* (2014). **Coagulase-Negative Staphylococci.** *Clin Microbiol Rev.* 27(4): 870-926; \doi{10.1128/CMR.00109-13} -#' 2. Becker K. *et al.* (2019). **Implications of identifying the recently defined members of the *S. aureus* complex, *S. argenteus* and *S. schweitzeri*: A position paper of members of the ESCMID Study Group for staphylococci and Staphylococcal Diseases (ESGS).** *Clin Microbiol Infect*; \doi{10.1016/j.cmi.2019.02.028} -#' 3. Becker K. *et al.* (2020). **Emergence of coagulase-negative staphylococci** *Expert Rev Anti Infect Ther.* 18(4):349-366; \doi{10.1080/14787210.2020.1730813} -#' 4. Lancefield R.C. (1933). **A serological differentiation of human and other groups of hemolytic streptococci**. *J Exp Med.* 57(4): 571-95; \doi{10.1084/jem.57.4.571} -#' 5. Berends M.S. *et al.* (2022). **Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019** *Microorganisms* 10(9), 1801; \doi{10.3390/microorganisms10091801} -#' 6. `r TAXONOMY_VERSION$LPSN$citation` Accessed from <`r TAXONOMY_VERSION$LPSN$url`> on `r documentation_date(TAXONOMY_VERSION$LPSN$accessed_date)`. -#' 7. `r TAXONOMY_VERSION$GBIF$citation` Accessed from <`r TAXONOMY_VERSION$GBIF$url`> on `r documentation_date(TAXONOMY_VERSION$GBIF$accessed_date)`. -#' 8. `r TAXONOMY_VERSION$SNOMED$citation` URL: <`r TAXONOMY_VERSION$SNOMED$url`> +#' 1. Berends MS *et al.* (2022). **AMR: An R Package for Working with Antimicrobial Resistance Data**. *Journal of Statistical Software*, 104(3), 1-31; \doi{10.18637/jss.v104.i03} +#' 2. Becker K *et al.* (2014). **Coagulase-Negative Staphylococci.** *Clin Microbiol Rev.* 27(4): 870-926; \doi{10.1128/CMR.00109-13} +#' 3. Becker K *et al.* (2019). **Implications of identifying the recently defined members of the *S. aureus* complex, *S. argenteus* and *S. schweitzeri*: A position paper of members of the ESCMID Study Group for staphylococci and Staphylococcal Diseases (ESGS).** *Clin Microbiol Infect*; \doi{10.1016/j.cmi.2019.02.028} +#' 4. Becker K *et al.* (2020). **Emergence of coagulase-negative staphylococci** *Expert Rev Anti Infect Ther.* 18(4):349-366; \doi{10.1080/14787210.2020.1730813} +#' 5. Lancefield RC (1933). **A serological differentiation of human and other groups of hemolytic streptococci**. *J Exp Med.* 57(4): 571-95; \doi{10.1084/jem.57.4.571} +#' 6. Berends MS *et al.* (2022). **Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019** *Microorganisms* 10(9), 1801; \doi{10.3390/microorganisms10091801} +#' 7. `r TAXONOMY_VERSION$LPSN$citation` Accessed from <`r TAXONOMY_VERSION$LPSN$url`> on `r documentation_date(TAXONOMY_VERSION$LPSN$accessed_date)`. +#' 8. `r TAXONOMY_VERSION$GBIF$citation` Accessed from <`r TAXONOMY_VERSION$GBIF$url`> on `r documentation_date(TAXONOMY_VERSION$GBIF$accessed_date)`. +#' 9. `r TAXONOMY_VERSION$SNOMED$citation` URL: <`r TAXONOMY_VERSION$SNOMED$url`> #' @export #' @return A [character] [vector] with additional class [`mo`] #' @seealso [microorganisms] for the [data.frame] that is being used to determine ID's. @@ -166,7 +166,7 @@ as.mo <- function(x, Lancefield = FALSE, minimum_matching_score = NULL, allow_uncertain = TRUE, - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), reference_df = get_mo_source(), ignore_pattern = getOption("AMR_ignore_pattern", NULL), language = get_AMR_locale(), @@ -175,21 +175,31 @@ as.mo <- function(x, meet_criteria(x, allow_class = c("mo", "data.frame", "list", "character", "numeric", "integer", "factor"), allow_NA = TRUE) meet_criteria(Becker, allow_class = c("logical", "character"), has_length = 1) meet_criteria(Lancefield, allow_class = c("logical", "character"), has_length = 1) + meet_criteria(allow_uncertain, allow_class = c("logical", "numeric", "integer"), has_length = 1) meet_criteria(keep_synonyms, allow_class = "logical", has_length = 1) meet_criteria(minimum_matching_score, allow_class = c("numeric", "integer"), has_length = 1, allow_NULL = TRUE) meet_criteria(reference_df, allow_class = "data.frame", allow_NULL = TRUE) meet_criteria(ignore_pattern, allow_class = "character", has_length = 1, allow_NULL = TRUE) language <- validate_language(language) meet_criteria(info, allow_class = "logical", has_length = 1) - - if (tryCatch(all(x[!is.na(x)] %in% AMR::microorganisms$mo) & - isFALSE(Becker) & - isTRUE(keep_synonyms) && + + # set the microorganisms data set to use for all lookup + mo_data <- MO_lookup + + allow_uncertain <- translate_allow_uncertain(allow_uncertain) + if (allow_uncertain < 1) { + # do not allow old names + mo_data <- mo_data[which(mo_data$status == "accepted"), , drop = FALSE] + } + + if (tryCatch(all(x %in% c(mo_data$mo, NA)) && + isFALSE(Becker) && isFALSE(Lancefield), error = function(e) FALSE)) { # don't look into valid MO codes, just return them # is.mo() won't work - MO codes might change between package versions return(set_clean_class(x, new_class = c("mo", "character"))) } + # start off with replaced language-specific non-ASCII characters with ASCII characters x <- parse_and_convert(x) @@ -211,13 +221,13 @@ as.mo <- function(x, out[x %in% reference_df[[1]]] <- reference_df[[2]][match(x[x %in% reference_df[[1]]], reference_df[[1]])] } # From MO code ---- - out[is.na(out) & x %in% AMR::microorganisms$mo] <- x[is.na(out) & x %in% AMR::microorganisms$mo] + out[is.na(out) & x %in% mo_data$mo] <- x[is.na(out) & x %in% mo_data$mo] # From full name ---- - out[is.na(out) & x %in% AMR::microorganisms$fullname] <- AMR::microorganisms$mo[match(x[is.na(out) & x %in% AMR::microorganisms$fullname], AMR::microorganisms$fullname)] + out[is.na(out) & x %in% mo_data$fullname] <- mo_data$mo[match(x[is.na(out) & x %in% mo_data$fullname], mo_data$fullname)] # From known codes ---- out[is.na(out) & x %in% AMR::microorganisms.codes$code] <- AMR::microorganisms.codes$mo[match(x[is.na(out) & x %in% AMR::microorganisms.codes$code], AMR::microorganisms.codes$code)] # From SNOMED ---- - if (any(is.na(out) & x %in% unlist(microorganisms$snomed), na.rm = TRUE)) { + if (any(is.na(out) & !is.na(x)) && any(is.na(out) & x %in% unlist(microorganisms$snomed), na.rm = TRUE)) { # found this extremely fast gem here: https://stackoverflow.com/a/11002456/4575331 out[is.na(out) & x %in% unlist(microorganisms$snomed)] <- microorganisms$mo[rep(seq_along(microorganisms$snomed), vapply(FUN.VALUE = double(1), microorganisms$snomed, length))[match(x[is.na(out) & x %in% unlist(microorganisms$snomed)], unlist(microorganisms$snomed))]] } @@ -228,13 +238,13 @@ as.mo <- function(x, old <- out out[is.na(out) & x %in% pkg_env$mo_previously_coerced$x] <- pkg_env$mo_previously_coerced$mo[match(x[is.na(out) & x %in% pkg_env$mo_previously_coerced$x], pkg_env$mo_previously_coerced$x)] new <- out - if (isTRUE(info) && message_not_thrown_before("as.mo", old[seq_len(min(100, length(old)))], new[seq_len(min(100, length(new)))], entire_session = TRUE) && any(is.na(old) & !is.na(new), na.rm = TRUE)) { + if (isTRUE(info) && message_not_thrown_before("as.mo", old, new, entire_session = TRUE) && any(is.na(old) & !is.na(new), na.rm = TRUE)) { message_( "Returning previously coerced value", ifelse(sum(is.na(old) & !is.na(new)) > 1, "s", ""), " for ", vector_and(x[is.na(old) & !is.na(new)]), ". Run `mo_reset_session()` to reset this." ) } - + # For all other input ---- if (any(is.na(out) & !is.na(x))) { # reset uncertainties @@ -257,29 +267,36 @@ as.mo <- function(x, progress$tick() # some required cleaning steps - x_out <- trimws(x_search) + x_out <- trimws2(x_search) x_out <- gsub("[^A-Za-z-]+", " ", x_out, perl = TRUE) x_out <- gsub(" +", " ", x_out, perl = TRUE) x_out <- gsub("(^| )(e?spp|e?ssp|e?ss|e?sp|e?subsp|sube?species|biovar|biotype|serovar|e?species)( |$)", "", x_out, ignore.case = TRUE, perl = TRUE) x_search_cleaned <- x_out x_out <- tolower(x_out) + if (allow_uncertain == 2) { + + } + if (allow_uncertain == 3) { + + } + # take out the parts, split by space x_parts <- strsplit(gsub("-", " ", x_out, fixed = TRUE), " ", fixed = TRUE)[[1]] # do a pre-match on first character (and if it contains a space, first chars of first two terms) if (length(x_parts) %in% c(2, 3)) { # for genus + species + subspecies - filtr <- which(MO_lookup$full_first == substr(x_parts[1], 1, 1) & MO_lookup$species_first == substr(x_parts[2], 1, 1)) + filtr <- which(mo_data$full_first == substr(x_parts[1], 1, 1) & mo_data$species_first == substr(x_parts[2], 1, 1)) } else if (length(x_parts) > 3) { first_chars <- paste0("(^| )", "[", paste(substr(x_parts, 1, 1), collapse = ""), "]") - filtr <- which(MO_lookup$full_first %like_case% first_chars) + filtr <- which(mo_data$full_first %like_case% first_chars) } else if (nchar(x_out) == 4) { # no space and 4 characters - probably a code such as STAU or ESCO! if (isTRUE(info)) { message_("Input \"", x_search, "\" is assumed to be a microorganism code - trying to match on ", vector_and(c(substr(x_out, 1, 2), substr(x_out, 3, 4)), sort = FALSE)) } - filtr <- which(MO_lookup$fullname_lower %like_case% paste0("(^| )", substr(x_out, 1, 2), ".* ", substr(x_out, 3, 4))) + filtr <- which(mo_data$fullname_lower %like_case% paste0("(^| )", substr(x_out, 1, 2), ".* ", substr(x_out, 3, 4))) } else if (nchar(x_out) <= 6) { # no space and 5-6 characters - probably a code such as STAAUR or ESCCOL! first_part <- paste0(substr(x_out, 1, 2), "[a-z]*", substr(x_out, 3, 3)) @@ -287,14 +304,14 @@ as.mo <- function(x, if (isTRUE(info)) { message_("Input \"", x_search, "\" is assumed to be a microorganism code - trying to match on ", vector_and(c(gsub("[a-z]*", "(...)", first_part, fixed = TRUE), second_part), sort = FALSE)) } - filtr <- which(MO_lookup$fullname_lower %like_case% paste0("(^| )", first_part, ".* ", second_part)) + filtr <- which(mo_data$fullname_lower %like_case% paste0("(^| )", first_part, ".* ", second_part)) } else { - filtr <- which(MO_lookup$full_first == substr(x_out, 1, 1)) + filtr <- which(mo_data$full_first == substr(x_out, 1, 1)) } if (length(filtr) == 0) { - mo_to_search <- MO_lookup$fullname + mo_to_search <- mo_data$fullname } else { - mo_to_search <- MO_lookup$fullname[filtr] + mo_to_search <- mo_data$fullname[filtr] } pkg_env$mo_to_search <- mo_to_search # determine the matching score on the original search value @@ -383,35 +400,12 @@ as.mo <- function(x, gbif_matches = gbif_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)], lpsn_matches = lpsn_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)]) if (isFALSE(keep_synonyms)) { - out_old <- out - - gbif_matches[!gbif_matches %in% AMR::microorganisms$gbif] <- NA out[which(!is.na(gbif_matches))] <- AMR::microorganisms$mo[match(gbif_matches[which(!is.na(gbif_matches))], AMR::microorganisms$gbif)] - - lpsn_matches[!lpsn_matches %in% AMR::microorganisms$lpsn] <- NA out[which(!is.na(lpsn_matches))] <- AMR::microorganisms$mo[match(lpsn_matches[which(!is.na(lpsn_matches))], AMR::microorganisms$lpsn)] - - # if (isTRUE(info) && (any(!is.na(gbif_matches)) || any(!is.na(lpsn_matches))) && message_not_thrown_before("as.mo", gbif_matches[which(!is.na(gbif_matches))], lpsn_matches[which(!is.na(lpsn_matches))]) && length(c(lpsn_matches, gbif_matches)) > 0) { - # mo_old <- out_old[which(!is.na(gbif_matches) | !is.na(lpsn_matches))] - # mo_new <- out[which(!is.na(gbif_matches) | !is.na(lpsn_matches))] - # - # mo_new <- mo_new[!duplicated(mo_old)] - # mo_old <- mo_old[!duplicated(mo_old)] - # - # mo_new <- mo_new[order(mo_old)] - # mo_old <- mo_old[order(mo_old)] - # - # ref_old <- microorganisms$ref[match(mo_old, microorganisms$mo)] - # ref_old[!is.na(ref_old)] <- paste0(" (", ref_old[!is.na(ref_old)], ")") - # ref_old[is.na(ref_old)] <- "" - # ref_new <- microorganisms$ref[match(mo_new, microorganisms$mo)] - # ref_new[!is.na(ref_new)] <- paste0(" (", ref_new[!is.na(ref_new)], ")") - # ref_new[is.na(ref_new)] <- "" - # - # pkg_env$mo_renamed <- list(mo_old = mo_old, mo_new = mo_new) - # print(mo_renamed(), extra_txt = " (use `keep_synonyms = TRUE` to leave uncorrected)") - # } - } else if (is.null(getOption("AMR_keep_synonyms")) && any(!is.na(c(gbif_matches, lpsn_matches))) && message_not_thrown_before("as.mo", "keep_synonyms_warning", entire_session = TRUE)) { + if (isTRUE(info) && length(pkg_env$mo_renamed$old) > 0) { + print(mo_renamed(), extra_txt = " (use `keep_synonyms = TRUE` to leave uncorrected)") + } + } else if (is.null(getOption("AMR_keep_synonyms")) && length(pkg_env$mo_renamed$old) > 0 && message_not_thrown_before("as.mo", "keep_synonyms_warning", entire_session = TRUE)) { # keep synonyms is TRUE, so check if any do have synonyms warning_("Function `as.mo()` returned some old taxonomic names. Use `as.mo(..., keep_synonyms = FALSE)` to clean the input to currently accepted taxonomic names, or set the R option `AMR_keep_synonyms` to `FALSE`. This warning will be shown once per session.") } @@ -505,7 +499,7 @@ pillar_shaft.mo <- function(x, ...) { mo_cols <- NULL } - if (!all(x[!is.na(x)] %in% AMR::microorganisms$mo) | + if (!all(x %in% c(AMR::microorganisms$mo, NA)) || (!is.null(df) && !all(unlist(df[, which(mo_cols), drop = FALSE]) %in% AMR::microorganisms$mo))) { # markup old mo codes out[!x %in% AMR::microorganisms$mo] <- font_italic(font_na(x[!x %in% AMR::microorganisms$mo], @@ -605,7 +599,7 @@ print.mo <- function(x, print.shortnames = FALSE, ...) { } x <- as.character(x) names(x) <- x_names - if (!all(x[!is.na(x)] %in% AMR::microorganisms$mo)) { + if (!all(x %in% c(AMR::microorganisms$mo, NA))) { warning_( "Some MO codes are from a previous AMR package version. ", "Please update the MO codes with `as.mo()`." @@ -637,7 +631,7 @@ summary.mo <- function(object, ...) { #' @export #' @noRd as.data.frame.mo <- function(x, ...) { - if (!all(x[!is.na(x)] %in% AMR::microorganisms$mo)) { + if (!all(x %in% c(AMR::microorganisms$mo, NA))) { warning_( "The data contains old MO codes (from a previous AMR package version). ", "Please update your MO codes with `as.mo()`." @@ -730,7 +724,7 @@ mo_uncertainties <- function() { #' @noRd print.mo_uncertainties <- function(x, ...) { if (NROW(x) == 0) { - cat(word_wrap("No uncertainties to show. Only uncertainties of the last call of `as.mo()` or any `mo_*()` function are stored.", add_fn = font_blue)) + cat(word_wrap("No uncertainties to show. Only uncertainties of the last call of `as.mo()` or any `mo_*()` function are stored.\n", add_fn = font_blue)) return(invisible(NULL)) } @@ -819,31 +813,47 @@ print.mo_uncertainties <- function(x, ...) { #' @rdname as.mo #' @export mo_renamed <- function() { - set_clean_class(pkg_env$mo_renamed, new_class = c("mo_renamed", "list")) + x <- pkg_env$mo_renamed + + x$new <- ifelse(is.na(x$lpsn_matches), + AMR::microorganisms$mo[match(x$gbif_matches, AMR::microorganisms$gbif)], + AMR::microorganisms$mo[match(x$lpsn_matches, AMR::microorganisms$lpsn)]) + mo_old <- AMR::microorganisms$fullname[match(x$old, AMR::microorganisms$mo)] + mo_new <- AMR::microorganisms$fullname[match(x$new, AMR::microorganisms$mo)] + ref_old <- AMR::microorganisms$ref[match(x$old, AMR::microorganisms$mo)] + ref_new <- AMR::microorganisms$ref[match(x$new, AMR::microorganisms$mo)] + + df_renamed <- data.frame(old = mo_old, + new = mo_new, + ref_old = ref_old, + ref_new = ref_new, + stringsAsFactors = FALSE) + df_renamed <- unique(df_renamed) + df_renamed <- df_renamed[order(df_renamed$old), , drop = FALSE] + set_clean_class(df_renamed, new_class = c("mo_renamed", "data.frame")) } #' @method print mo_renamed #' @export #' @noRd -print.mo_renamed <- function(x, extra_txt = "", ...) { - if (length(x) == 0) { - cat(word_wrap("No renamed taxonomy to show. Only renamed taxonomy of the last call of `as.mo()` or any `mo_*()` function are stored.", add_fn = font_blue)) +print.mo_renamed <- function(x, extra_txt = "", n = 25, ...) { + if (NROW(x) == 0) { + cat(word_wrap("No renamed taxonomy to show. Only renamed taxonomy of the last call of `as.mo()` or any `mo_*()` function are stored.\n", add_fn = font_blue)) return(invisible(NULL)) } - ref_old <- AMR::microorganisms$ref[match(x$mo_old, AMR::microorganisms$mo)] - ref_new <- AMR::microorganisms$ref[match(x$mo_new, AMR::microorganisms$mo)] - ref_old[!is.na(ref_old)] <- paste0(" (", gsub("et al.", font_italic("et al.", collapse = NULL), ref_old[!is.na(ref_old)], fixed = TRUE), ")") - ref_new[!is.na(ref_new)] <- paste0(" (", gsub("et al.", font_italic("et al.", collapse = NULL), ref_new[!is.na(ref_new)], fixed = TRUE), ")") - + x$ref_old[!is.na(x$ref_old)] <- paste0(" (", gsub("et al.", font_italic("et al."), x$ref_old[!is.na(x$ref_old)], fixed = TRUE), ")") + x$ref_new[!is.na(x$ref_new)] <- paste0(" (", gsub("et al.", font_italic("et al."), x$ref_new[!is.na(x$ref_new)], fixed = TRUE), ")") + + rows <- seq_len(min(NROW(x), n)) + message_( - "The following microorganism", ifelse(length(x$mo_old) > 1, "s were", " was"), " taxonomically renamed", extra_txt, ":\n", - paste0(" \u2022 ", font_italic(AMR::microorganisms$fullname[match(x$mo_old, AMR::microorganisms$mo)], collapse = NULL), - ref_old, - " -> ", font_italic(AMR::microorganisms$fullname[match(x$mo_new, AMR::microorganisms$mo)], collapse = NULL), - ref_new, + "The following microorganism", ifelse(NROW(x) > 1, "s were", " was"), " taxonomically renamed", extra_txt, ":\n", + paste0(" \u2022 ", font_italic(x$old[rows], collapse = NULL), x$ref_old[rows], + " -> ", font_italic(x$new[rows], collapse = NULL), x$ref_new[rows], collapse = "\n" - ) + ), + ifelse(NROW(x) > n, paste0("\n\nOnly the first ", n, " (out of ", NROW(x), ") are shown. Run `print(mo_renamed(), n = ...)` to view more entries (might be slow), or save `mo_renamed()` to an object."), "") ) } @@ -898,11 +908,6 @@ load_mo_uncertainties <- function(metadata) { pkg_env$mo_uncertainties <- metadata$uncertainties } -trimws2 <- function(..., whitespace = "[\u0009\u000A\u000B\u000C\u000D\u0020\u0085\u00A0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u200C\u200D\u2028\u2029\u202F\u205F\u2060\u3000\uFEFF]") { - # this is even faster than trimws() itself which sets " \t\n\r". - trimws(..., whitespace = whitespace) -} - parse_and_convert <- function(x) { if (tryCatch(is.character(x) && all(Encoding(x) == "unknown", na.rm = TRUE), error = function(e) FALSE)) { return(trimws2(x)) @@ -927,7 +932,6 @@ parse_and_convert <- function(x) { parsed[is.na(parsed) & !is.na(x)] <- iconv(x[is.na(parsed) & !is.na(x)], from = "Latin1", to = "ASCII//TRANSLIT") parsed <- gsub('"', "", parsed, fixed = TRUE) parsed <- gsub(" +", " ", parsed, perl = TRUE) - parsed <- trimws(parsed) parsed }, error = function(e) stop(e$message, call. = FALSE) diff --git a/R/mo_property.R b/R/mo_property.R index bf4470099..11d5230ed 100755 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -172,7 +172,7 @@ #' # SNOMED codes, and URL to the online database #' mo_info("Klebsiella pneumoniae") #' } -mo_name <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_name <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_name") @@ -194,7 +194,7 @@ mo_fullname <- mo_name #' @rdname mo_property #' @export -mo_shortname <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_shortname <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_shortname") @@ -224,7 +224,7 @@ mo_shortname <- function(x, language = get_AMR_locale(), keep_synonyms = getOpti # exceptions for streptococci: Group A Streptococcus -> GAS shortnames[shortnames %like% "S. group [ABCDFGHK]"] <- paste0("G", gsub("S. group ([ABCDFGHK])", "\\1", shortnames[shortnames %like% "S. group [ABCDFGHK]"], perl = TRUE), "S") # unknown species etc. - shortnames[shortnames %like% "unknown"] <- paste0("(", trimws(gsub("[^a-zA-Z -]", "", shortnames[shortnames %like% "unknown"], perl = TRUE)), ")") + shortnames[shortnames %like% "unknown"] <- paste0("(", trimws2(gsub("[^a-zA-Z -]", "", shortnames[shortnames %like% "unknown"], perl = TRUE)), ")") shortnames[is.na(x.mo)] <- NA_character_ load_mo_uncertainties(metadata) @@ -235,7 +235,7 @@ mo_shortname <- function(x, language = get_AMR_locale(), keep_synonyms = getOpti #' @rdname mo_property #' @export -mo_subspecies <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_subspecies <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_subspecies") @@ -249,7 +249,7 @@ mo_subspecies <- function(x, language = get_AMR_locale(), keep_synonyms = getOpt #' @rdname mo_property #' @export -mo_species <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_species <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_species") @@ -263,7 +263,7 @@ mo_species <- function(x, language = get_AMR_locale(), keep_synonyms = getOption #' @rdname mo_property #' @export -mo_genus <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_genus <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_genus") @@ -277,7 +277,7 @@ mo_genus <- function(x, language = get_AMR_locale(), keep_synonyms = getOption(" #' @rdname mo_property #' @export -mo_family <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_family <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_family") @@ -291,7 +291,7 @@ mo_family <- function(x, language = get_AMR_locale(), keep_synonyms = getOption( #' @rdname mo_property #' @export -mo_order <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_order <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_order") @@ -305,7 +305,7 @@ mo_order <- function(x, language = get_AMR_locale(), keep_synonyms = getOption(" #' @rdname mo_property #' @export -mo_class <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_class <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_class") @@ -319,7 +319,7 @@ mo_class <- function(x, language = get_AMR_locale(), keep_synonyms = getOption(" #' @rdname mo_property #' @export -mo_phylum <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_phylum <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_phylum") @@ -333,7 +333,7 @@ mo_phylum <- function(x, language = get_AMR_locale(), keep_synonyms = getOption( #' @rdname mo_property #' @export -mo_kingdom <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_kingdom <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_kingdom") @@ -351,7 +351,7 @@ mo_domain <- mo_kingdom #' @rdname mo_property #' @export -mo_type <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_type <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_type") @@ -368,7 +368,7 @@ mo_type <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("A #' @rdname mo_property #' @export -mo_status <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_status <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_status") @@ -382,7 +382,7 @@ mo_status <- function(x, language = get_AMR_locale(), keep_synonyms = getOption( #' @rdname mo_property #' @export -mo_gramstain <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_gramstain <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_gramstain") @@ -417,7 +417,7 @@ mo_gramstain <- function(x, language = get_AMR_locale(), keep_synonyms = getOpti #' @rdname mo_property #' @export -mo_is_gram_negative <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_is_gram_negative <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_is_gram_negative") @@ -437,7 +437,7 @@ mo_is_gram_negative <- function(x, language = get_AMR_locale(), keep_synonyms = #' @rdname mo_property #' @export -mo_is_gram_positive <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_is_gram_positive <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_is_gram_positive") @@ -457,7 +457,7 @@ mo_is_gram_positive <- function(x, language = get_AMR_locale(), keep_synonyms = #' @rdname mo_property #' @export -mo_is_yeast <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_is_yeast <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_is_yeast") @@ -482,7 +482,7 @@ mo_is_yeast <- function(x, language = get_AMR_locale(), keep_synonyms = getOptio #' @rdname mo_property #' @export -mo_is_intrinsic_resistant <- function(x, ab, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_is_intrinsic_resistant <- function(x, ab, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_is_intrinsic_resistant") @@ -519,7 +519,7 @@ mo_is_intrinsic_resistant <- function(x, ab, language = get_AMR_locale(), keep_s #' @rdname mo_property #' @export -mo_snomed <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_snomed <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_snomed") @@ -533,7 +533,7 @@ mo_snomed <- function(x, language = get_AMR_locale(), keep_synonyms = getOption( #' @rdname mo_property #' @export -mo_ref <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_ref <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_ref") @@ -547,7 +547,7 @@ mo_ref <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AM #' @rdname mo_property #' @export -mo_authors <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_authors <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_authors") @@ -564,7 +564,7 @@ mo_authors <- function(x, language = get_AMR_locale(), keep_synonyms = getOption #' @rdname mo_property #' @export -mo_year <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_year <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_year") @@ -581,7 +581,7 @@ mo_year <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("A #' @rdname mo_property #' @export -mo_lpsn <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_lpsn <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_lpsn") @@ -595,7 +595,7 @@ mo_lpsn <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("A #' @rdname mo_property #' @export -mo_gbif <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_gbif <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_gbif") @@ -609,7 +609,7 @@ mo_gbif <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("A #' @rdname mo_property #' @export -mo_rank <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_rank <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_rank") @@ -623,7 +623,7 @@ mo_rank <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("A #' @rdname mo_property #' @export -mo_taxonomy <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_taxonomy <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_taxonomy") @@ -652,7 +652,7 @@ mo_taxonomy <- function(x, language = get_AMR_locale(), keep_synonyms = getOptio #' @rdname mo_property #' @export -mo_synonyms <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_synonyms <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_synonyms") @@ -688,7 +688,7 @@ mo_synonyms <- function(x, language = get_AMR_locale(), keep_synonyms = getOptio #' @rdname mo_property #' @export -mo_info <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_info <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_info") @@ -726,7 +726,7 @@ mo_info <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("A #' @rdname mo_property #' @export -mo_url <- function(x, open = FALSE, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_url <- function(x, open = FALSE, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_url") @@ -765,7 +765,7 @@ mo_url <- function(x, open = FALSE, language = get_AMR_locale(), keep_synonyms = #' @rdname mo_property #' @export -mo_property <- function(x, property = "fullname", language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", TRUE), ...) { +mo_property <- function(x, property = "fullname", language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_property") @@ -796,13 +796,17 @@ mo_validate <- function(x, property, language, keep_synonyms = keep_synonyms, .. Lancefield <- FALSE } has_Becker_or_Lancefield <- Becker %in% c(TRUE, "all") || Lancefield %in% c(TRUE, "all") + + # get microorganisms data set, but remove synonyms if keep_synonyms is FALSE + mo_data_check <- AMR::microorganisms[which(AMR::microorganisms$status %in% if (isTRUE(keep_synonyms)) c("synonym", "accepted") else "accepted"), , drop = FALSE] - if (all(x %in% AMR::microorganisms$mo, na.rm = TRUE) && !has_Becker_or_Lancefield && isTRUE(keep_synonyms)) { + if (all(x %in% c(mo_data_check$mo, NA)) && !has_Becker_or_Lancefield) { # do nothing, just don't run the other if-else's - } else if (all(x %in% AMR::microorganisms[[property]], na.rm = TRUE) && !has_Becker_or_Lancefield && isTRUE(keep_synonyms)) { + } else if (all(x %in% c(mo_data_check[[property]], NA)) && !has_Becker_or_Lancefield) { # no need to do anything, just return it return(x) } else { + # we need to get MO codes now x <- replace_old_mo_codes(x, property = property) x <- as.mo(x, language = language, keep_synonyms = keep_synonyms, ...) } diff --git a/R/translate.R b/R/translate.R index 71db39530..cbe8e0827 100755 --- a/R/translate.R +++ b/R/translate.R @@ -151,7 +151,7 @@ translate_AMR <- function(x, language = get_AMR_locale()) { validate_language <- function(language, extra_txt = character(0)) { - if (isTRUE(trimws(tolower(language[1])) %in% c("en", "english", "", "false", NA)) || length(language) == 0) { + if (isTRUE(trimws2(tolower(language[1])) %in% c("en", "english", "", "false", NA)) || length(language) == 0) { return("en") } lang <- find_language(language[1], fallback = FALSE) diff --git a/R/zzz.R b/R/zzz.R index 78ea664d7..6014c6ca3 100755 --- a/R/zzz.R +++ b/R/zzz.R @@ -60,6 +60,7 @@ pkg_env$rsi_interpretation_history <- data.frame( interpretation = character(0), stringsAsFactors = FALSE ) +pkg_env$has_data.table <- pkg_is_available("data.table", also_load = FALSE) # determine info icon for messages utf8_supported <- isTRUE(base::l10n_info()$`UTF-8`) diff --git a/README.md b/README.md index f5133ba5a..dd571ca58 100755 --- a/README.md +++ b/README.md @@ -2,10 +2,6 @@ # `AMR` (for R) -![R-code-check](https://github.com/msberends/AMR/workflows/R-code-check/badge.svg?branch=main) -[![CodeFactor](https://www.codefactor.io/repository/github/msberends/amr/badge)](https://www.codefactor.io/repository/github/msberends/amr) -[![Codecov](https://codecov.io/gh/msberends/AMR/branch/main/graph/badge.svg)](https://app.codecov.io/gh/msberends/AMR?branch=main) - This work was published in the Journal of Statistical Software (Volume 104(3); [DOI 10.18637/jss.v104.i03](https://doi.org/10.18637/jss.v104.i03)) and formed the basis of two PhD theses ([DOI 10.33612/diss.177417131](https://doi.org/10.33612/diss.177417131) and [DOI 10.33612/diss.192486375](https://doi.org/10.33612/diss.192486375)). diff --git a/data-raw/read_EUCAST.R b/data-raw/read_EUCAST.R index 222631db6..70dee5f3a 100644 --- a/data-raw/read_EUCAST.R +++ b/data-raw/read_EUCAST.R @@ -107,8 +107,8 @@ read_EUCAST <- function(sheet, file, guideline_name) { get_mo <- function(x) { for (i in seq_len(length(x))) { - y <- trimws(unlist(strsplit(x[i], "(,|and)"))) - y <- trimws(gsub("[(].*[)]", "", y)) + y <- trimws2(unlist(strsplit(x[i], "(,|and)"))) + y <- trimws2(gsub("[(].*[)]", "", y)) y <- suppressWarnings(suppressMessages(as.mo(y, allow_uncertain = FALSE))) if (!is.null(mo_uncertainties())) uncertainties <<- add_uncertainties(uncertainties, mo_uncertainties()) y <- y[!is.na(y) & y != "UNKNOWN"] diff --git a/data-raw/reproduction_of_microorganisms.R b/data-raw/reproduction_of_microorganisms.R index f3e4e92d7..3371d6001 100644 --- a/data-raw/reproduction_of_microorganisms.R +++ b/data-raw/reproduction_of_microorganisms.R @@ -678,7 +678,7 @@ taxonomy <- taxonomy %>% # Add prevalence ---------------------------------------------------------- -# update prevalence based on taxonomy (our own JSS paper: Berends et al., 2022) +# update prevalence based on taxonomy (our own JSS paper: Berends MS et al. (2022), DOI 10.18637/jss.v104.i03) taxonomy <- taxonomy %>% mutate(prevalence = case_when( class == "Gammaproteobacteria" | diff --git a/inst/tinytest/test-_misc.R b/inst/tinytest/test-_misc.R index 80fcbcaf7..9908041d4 100755 --- a/inst/tinytest/test-_misc.R +++ b/inst/tinytest/test-_misc.R @@ -41,6 +41,9 @@ expect_equal(strrep(c("A", "B"), c(5, 2)), c("AAAAA", "BB")) expect_equal(trimws(" test "), "test") expect_equal(trimws(" test ", "l"), "test ") expect_equal(trimws(" test ", "r"), " test") +expect_equal(trimws2(" test "), "test") +expect_equal(trimws2(" test ", "l"), "test ") +expect_equal(trimws2(" test ", "r"), " test") expect_warning(AMR:::generate_warning_abs_missing(c("AMP", "AMX"))) expect_warning(AMR:::generate_warning_abs_missing(c("AMP", "AMX"), any = TRUE)) diff --git a/inst/tinytest/test-zzz.R b/inst/tinytest/test-zzz.R index 5702c3329..47d5490c2 100644 --- a/inst/tinytest/test-zzz.R +++ b/inst/tinytest/test-zzz.R @@ -32,7 +32,9 @@ # functions used by import_fn() import_functions <- c( + "%chin%" = "data.table", "anti_join" = "dplyr", + "chmatch" = "data.table", "cur_column" = "dplyr", "full_join" = "dplyr", "has_internet" = "curl", diff --git a/man/as.mo.Rd b/man/as.mo.Rd index e47f05600..dedbabfc7 100644 --- a/man/as.mo.Rd +++ b/man/as.mo.Rd @@ -16,7 +16,7 @@ as.mo( Lancefield = FALSE, minimum_matching_score = NULL, allow_uncertain = TRUE, - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), reference_df = get_mo_source(), ignore_pattern = getOption("AMR_ignore_pattern", NULL), language = get_AMR_locale(), @@ -37,11 +37,11 @@ mo_reset_session() \arguments{ \item{x}{a \link{character} vector or a \link{data.frame} with one or two columns} -\item{Becker}{a \link{logical} to indicate whether staphylococci should be categorised into coagulase-negative staphylococci ("CoNS") and coagulase-positive staphylococci ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} (1,2,3). +\item{Becker}{a \link{logical} to indicate whether staphylococci should be categorised into coagulase-negative staphylococci ("CoNS") and coagulase-positive staphylococci ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} (see Source). This excludes \emph{Staphylococcus aureus} at default, use \code{Becker = "all"} to also categorise \emph{S. aureus} as "CoPS".} -\item{Lancefield}{a \link{logical} to indicate whether a beta-haemolytic \emph{Streptococcus} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield (4). These streptococci will be categorised in their first group, e.g. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L. +\item{Lancefield}{a \link{logical} to indicate whether a beta-haemolytic \emph{Streptococcus} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield (see Source). These streptococci will be categorised in their first group, e.g. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L. This excludes enterococci at default (who are in group D), use \code{Lancefield = "all"} to also categorise all enterococci as group D.} @@ -49,7 +49,7 @@ This excludes enterococci at default (who are in group D), use \code{Lancefield \item{allow_uncertain}{a number between \code{0} (or \code{"none"}) and \code{3} (or \code{"all"}), or \code{TRUE} (= \code{2}) or \code{FALSE} (= \code{0}) to indicate whether the input should be checked for less probable results, see \emph{Details}} -\item{keep_synonyms}{a \link{logical} to indicate if old, previously valid taxonomic names must be preserved and not be corrected to currently accepted names. The default is \code{TRUE}, which will return a note if old taxonomic names are returned. The default can be set with \code{options(AMR_keep_synonyms = ...)}.} +\item{keep_synonyms}{a \link{logical} to indicate if old, previously valid taxonomic names must be preserved and not be corrected to currently accepted names. The default is \code{FALSE}, which will return a note if old taxonomic names were processed. The default can be set with \code{options(AMR_keep_synonyms = TRUE)} or \code{options(AMR_keep_synonyms = FALSE)}.} \item{reference_df}{a \link{data.frame} to be used for extra reference when translating \code{x} to a valid \code{\link{mo}}. See \code{\link[=set_mo_source]{set_mo_source()}} and \code{\link[=get_mo_source]{get_mo_source()}} to automate the usage of your own codes (e.g. used in your analysis or organisation).} @@ -104,22 +104,17 @@ This will lead to the effect that e.g. \code{"E. coli"} (a microorganism highly \subsection{Coping with Uncertain Results}{ -In addition, the \code{\link[=as.mo]{as.mo()}} function can differentiate four levels of uncertainty to guess valid results: +Users can control the coercion rules by setting the \code{allow_uncertain} argument in the \code{\link[=as.mo]{as.mo()}} function. The following values or levels can be used: \itemize{ -\item Uncertainty level 0: no additional rules are applied; -\item Uncertainty level 1: allow previously accepted (but now invalid) taxonomic names and minor spelling errors; -\item Uncertainty level 2: allow all of level 1, strip values between brackets, inverse the words of the input, strip off text elements from the end keeping at least two elements; -\item Uncertainty level 3: allow all of level 1 and 2, strip off text elements from the end, allow any part of a taxonomic name. +\item \code{0}: no additional rules are applied; +\item \code{1}: allow previously accepted (but now invalid) taxonomic names +\item \code{2}: allow all of \code{1}, strip values between brackets, inverse the words of the input, strip off text elements from the end keeping at least two elements; +\item \code{3}: allow all of level \code{1} and \code{2}, strip off text elements from the end, allow any part of a taxonomic name; +\item \code{TRUE} (default): equivalent to \code{2}; +\item \code{FALSE}: equivalent to `0``. } -The level of uncertainty can be set using the argument \code{allow_uncertain}. The default is \code{allow_uncertain = TRUE}, which is equal to uncertainty level 2. Using \code{allow_uncertain = FALSE} is equal to uncertainty level 0 and will skip all rules. You can also use e.g. \code{as.mo(..., allow_uncertain = 1)} to only allow up to level 1 uncertainty. - -With the default setting (\code{allow_uncertain = TRUE}, level 2), below examples will lead to valid results: -\itemize{ -\item \code{"Streptococcus group B (known as S. agalactiae)"}. The text between brackets will be removed and a warning will be thrown that the result \emph{Streptococcus group B} (\code{B_STRPT_GRPB}) needs review. -\item \code{"S. aureus - please mind: MRSA"}. The last word will be stripped, after which the function will try to find a match. If it does not, the second last word will be stripped, etc. Again, a warning will be thrown that the result \emph{Staphylococcus aureus} (\code{B_STPHY_AURS}) needs review. -\item \code{"Fluoroquinolone-resistant Neisseria gonorrhoeae"}. The first word will be stripped, after which the function will try to find a match. A warning will be thrown that the result \emph{Neisseria gonorrhoeae} (\code{B_NESSR_GNRR}) needs review. -} +The default is \code{allow_uncertain = TRUE}, which is equal to uncertainty level 2. Using \code{allow_uncertain = FALSE} is equal to uncertainty level 0 and will skip all rules. You can also use e.g. \code{as.mo(..., allow_uncertain = 1)} to only allow up to level 1 uncertainty. There are three helper functions that can be run after using the \code{\link[=as.mo]{as.mo()}} function: \itemize{ @@ -137,12 +132,13 @@ The coercion rules consider the prevalence of microorganisms in humans grouped i \section{Source}{ \enumerate{ -\item Becker K. \emph{et al.} (2014). \strong{Coagulase-Negative Staphylococci.} \emph{Clin Microbiol Rev.} 27(4): 870-926; \doi{10.1128/CMR.00109-13} -\item Becker K. \emph{et al.} (2019). \strong{Implications of identifying the recently defined members of the \emph{S. aureus} complex, \emph{S. argenteus} and \emph{S. schweitzeri}: A position paper of members of the ESCMID Study Group for staphylococci and Staphylococcal Diseases (ESGS).} \emph{Clin Microbiol Infect}; \doi{10.1016/j.cmi.2019.02.028} -\item Becker K. \emph{et al.} (2020). \strong{Emergence of coagulase-negative staphylococci} \emph{Expert Rev Anti Infect Ther.} 18(4):349-366; \doi{10.1080/14787210.2020.1730813} -\item Lancefield R.C. (1933). \strong{A serological differentiation of human and other groups of hemolytic streptococci}. \emph{J Exp Med.} 57(4): 571-95; \doi{10.1084/jem.57.4.571} -\item Berends M.S. \emph{et al.} (2022). \strong{Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019} \emph{Microorganisms} 10(9), 1801; \doi{10.3390/microorganisms10091801} -\item Parte, A.C. \emph{et al.} (2020). \strong{List of Prokaryotic names with Standing in Nomenclature (LPSN) moves to the DSMZ.} International Journal of Systematic and Evolutionary Microbiology, 70, 5607-5612; \doi{10.1099/ijsem.0.004332}. Accessed from \url{https://lpsn.dsmz.de} on 12 September, 2022. +\item Berends MS \emph{et al.} (2022). \strong{AMR: An R Package for Working with Antimicrobial Resistance Data}. \emph{Journal of Statistical Software}, 104(3), 1-31; \doi{10.18637/jss.v104.i03} +\item Becker K \emph{et al.} (2014). \strong{Coagulase-Negative Staphylococci.} \emph{Clin Microbiol Rev.} 27(4): 870-926; \doi{10.1128/CMR.00109-13} +\item Becker K \emph{et al.} (2019). \strong{Implications of identifying the recently defined members of the \emph{S. aureus} complex, \emph{S. argenteus} and \emph{S. schweitzeri}: A position paper of members of the ESCMID Study Group for staphylococci and Staphylococcal Diseases (ESGS).} \emph{Clin Microbiol Infect}; \doi{10.1016/j.cmi.2019.02.028} +\item Becker K \emph{et al.} (2020). \strong{Emergence of coagulase-negative staphylococci} \emph{Expert Rev Anti Infect Ther.} 18(4):349-366; \doi{10.1080/14787210.2020.1730813} +\item Lancefield RC (1933). \strong{A serological differentiation of human and other groups of hemolytic streptococci}. \emph{J Exp Med.} 57(4): 571-95; \doi{10.1084/jem.57.4.571} +\item Berends MS \emph{et al.} (2022). \strong{Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019} \emph{Microorganisms} 10(9), 1801; \doi{10.3390/microorganisms10091801} +\item Parte, AC \emph{et al.} (2020). \strong{List of Prokaryotic names with Standing in Nomenclature (LPSN) moves to the DSMZ.} International Journal of Systematic and Evolutionary Microbiology, 70, 5607-5612; \doi{10.1099/ijsem.0.004332}. Accessed from \url{https://lpsn.dsmz.de} on 12 September, 2022. \item GBIF Secretariat (November 26, 2021). GBIF Backbone Taxonomy. Checklist dataset \doi{10.15468/39omei}. Accessed from \url{https://www.gbif.org} on 12 September, 2022. \item Public Health Information Network Vocabulary Access and Distribution System (PHIN VADS). US Edition of SNOMED CT from 1 September 2020. Value Set Name 'Microoganism', OID 2.16.840.1.114222.4.11.1009 (v12). URL: \url{https://phinvads.cdc.gov} } diff --git a/man/microorganisms.Rd b/man/microorganisms.Rd index 5cc47c84c..e78fb249d 100755 --- a/man/microorganisms.Rd +++ b/man/microorganisms.Rd @@ -26,7 +26,7 @@ A \link[tibble:tibble]{tibble} with 48,787 observations and 22 variables: } \source{ \itemize{ -\item Parte, A.C. \emph{et al.} (2020). \strong{List of Prokaryotic names with Standing in Nomenclature (LPSN) moves to the DSMZ.} International Journal of Systematic and Evolutionary Microbiology, 70, 5607-5612; \doi{10.1099/ijsem.0.004332}. Accessed from \url{https://lpsn.dsmz.de} on 12 September, 2022. +\item Parte, AC \emph{et al.} (2020). \strong{List of Prokaryotic names with Standing in Nomenclature (LPSN) moves to the DSMZ.} International Journal of Systematic and Evolutionary Microbiology, 70, 5607-5612; \doi{10.1099/ijsem.0.004332}. Accessed from \url{https://lpsn.dsmz.de} on 12 September, 2022. \item GBIF Secretariat (November 26, 2021). GBIF Backbone Taxonomy. Checklist dataset \doi{10.15468/39omei}. Accessed from \url{https://www.gbif.org} on 12 September, 2022. \item Public Health Information Network Vocabulary Access and Distribution System (PHIN VADS). US Edition of SNOMED CT from 1 September 2020. Value Set Name 'Microoganism', OID 2.16.840.1.114222.4.11.1009 (v12). URL: \url{https://phinvads.cdc.gov} } diff --git a/man/mo_property.Rd b/man/mo_property.Rd index 8ca6d7c86..8c7cef819 100644 --- a/man/mo_property.Rd +++ b/man/mo_property.Rd @@ -37,126 +37,126 @@ mo_name( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_fullname( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_shortname( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_subspecies( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_species( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_genus( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_family( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_order( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_class( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_phylum( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_kingdom( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_domain( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_type( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_status( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_gramstain( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_is_gram_negative( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_is_gram_positive( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_is_yeast( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) @@ -164,77 +164,77 @@ mo_is_intrinsic_resistant( x, ab, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_snomed( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_ref( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_authors( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_year( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_lpsn( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_gbif( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_rank( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_taxonomy( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_synonyms( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) mo_info( x, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) @@ -242,7 +242,7 @@ mo_url( x, open = FALSE, language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) @@ -250,7 +250,7 @@ mo_property( x, property = "fullname", language = get_AMR_locale(), - keep_synonyms = getOption("AMR_keep_synonyms", TRUE), + keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ... ) } @@ -259,7 +259,7 @@ mo_property( \item{language}{language to translate text like "no growth", which defaults to the system language (see \code{\link[=get_AMR_locale]{get_AMR_locale()}})} -\item{keep_synonyms}{a \link{logical} to indicate if old, previously valid taxonomic names must be preserved and not be corrected to currently accepted names. The default is \code{TRUE}, which will return a note if old taxonomic names are returned. The default can be set with \code{options(AMR_keep_synonyms = ...)}.} +\item{keep_synonyms}{a \link{logical} to indicate if old, previously valid taxonomic names must be preserved and not be corrected to currently accepted names. The default is \code{FALSE}, which will return a note if old taxonomic names were processed. The default can be set with \code{options(AMR_keep_synonyms = TRUE)} or \code{options(AMR_keep_synonyms = FALSE)}.} \item{...}{other arguments passed on to \code{\link[=as.mo]{as.mo()}}, such as 'allow_uncertain' and 'ignore_pattern'} @@ -337,12 +337,13 @@ All matches are sorted descending on their matching score and for all user input \section{Source}{ \enumerate{ -\item Becker K. \emph{et al.} (2014). \strong{Coagulase-Negative Staphylococci.} \emph{Clin Microbiol Rev.} 27(4): 870-926; \doi{10.1128/CMR.00109-13} -\item Becker K. \emph{et al.} (2019). \strong{Implications of identifying the recently defined members of the \emph{S. aureus} complex, \emph{S. argenteus} and \emph{S. schweitzeri}: A position paper of members of the ESCMID Study Group for staphylococci and Staphylococcal Diseases (ESGS).} \emph{Clin Microbiol Infect}; \doi{10.1016/j.cmi.2019.02.028} -\item Becker K. \emph{et al.} (2020). \strong{Emergence of coagulase-negative staphylococci} \emph{Expert Rev Anti Infect Ther.} 18(4):349-366; \doi{10.1080/14787210.2020.1730813} -\item Lancefield R.C. (1933). \strong{A serological differentiation of human and other groups of hemolytic streptococci}. \emph{J Exp Med.} 57(4): 571-95; \doi{10.1084/jem.57.4.571} -\item Berends M.S. \emph{et al.} (2022). \strong{Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019} \emph{Microorganisms} 10(9), 1801; \doi{10.3390/microorganisms10091801} -\item Parte, A.C. \emph{et al.} (2020). \strong{List of Prokaryotic names with Standing in Nomenclature (LPSN) moves to the DSMZ.} International Journal of Systematic and Evolutionary Microbiology, 70, 5607-5612; \doi{10.1099/ijsem.0.004332}. Accessed from \url{https://lpsn.dsmz.de} on 12 September, 2022. +\item Berends MS \emph{et al.} (2022). \strong{AMR: An R Package for Working with Antimicrobial Resistance Data}. \emph{Journal of Statistical Software}, 104(3), 1-31; \doi{10.18637/jss.v104.i03} +\item Becker K \emph{et al.} (2014). \strong{Coagulase-Negative Staphylococci.} \emph{Clin Microbiol Rev.} 27(4): 870-926; \doi{10.1128/CMR.00109-13} +\item Becker K \emph{et al.} (2019). \strong{Implications of identifying the recently defined members of the \emph{S. aureus} complex, \emph{S. argenteus} and \emph{S. schweitzeri}: A position paper of members of the ESCMID Study Group for staphylococci and Staphylococcal Diseases (ESGS).} \emph{Clin Microbiol Infect}; \doi{10.1016/j.cmi.2019.02.028} +\item Becker K \emph{et al.} (2020). \strong{Emergence of coagulase-negative staphylococci} \emph{Expert Rev Anti Infect Ther.} 18(4):349-366; \doi{10.1080/14787210.2020.1730813} +\item Lancefield RC (1933). \strong{A serological differentiation of human and other groups of hemolytic streptococci}. \emph{J Exp Med.} 57(4): 571-95; \doi{10.1084/jem.57.4.571} +\item Berends MS \emph{et al.} (2022). \strong{Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019} \emph{Microorganisms} 10(9), 1801; \doi{10.3390/microorganisms10091801} +\item Parte, AC \emph{et al.} (2020). \strong{List of Prokaryotic names with Standing in Nomenclature (LPSN) moves to the DSMZ.} International Journal of Systematic and Evolutionary Microbiology, 70, 5607-5612; \doi{10.1099/ijsem.0.004332}. Accessed from \url{https://lpsn.dsmz.de} on 12 September, 2022. \item GBIF Secretariat (November 26, 2021). GBIF Backbone Taxonomy. Checklist dataset \doi{10.15468/39omei}. Accessed from \url{https://www.gbif.org} on 12 September, 2022. \item Public Health Information Network Vocabulary Access and Distribution System (PHIN VADS). US Edition of SNOMED CT from 1 September 2020. Value Set Name 'Microoganism', OID 2.16.840.1.114222.4.11.1009 (v12). URL: \url{https://phinvads.cdc.gov} } diff --git a/tests/tinytest.R b/tests/tinytest.R index 3508620ce..b576cadc1 100644 --- a/tests/tinytest.R +++ b/tests/tinytest.R @@ -40,7 +40,7 @@ if (identical(Sys.getenv("R_RUN_TINYTEST"), "true")) { library(AMR) # set language set_AMR_locale("English") - # get dir.exists(), trimws() and strrep() if on old R + # set some functions if on old R if (getRversion() < "3.2.0") { anyNA <- AMR:::anyNA dir.exists <- AMR:::dir.exists @@ -54,6 +54,7 @@ if (identical(Sys.getenv("R_RUN_TINYTEST"), "true")) { } if (getRversion() < "3.5.0") { isFALSE <- AMR:::isFALSE + # trims() was introduced in 3.3.0, but its argument `whitespace` only in 3.5.0 trimws <- AMR:::trimws } if (getRversion() < "3.6.0") {