diff --git a/DESCRIPTION b/DESCRIPTION index 1c069aec..dc718674 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 0.5.0.9024 -Date: 2019-03-18 +Version: 0.5.0.9025 +Date: 2019-03-26 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/NAMESPACE b/NAMESPACE index ed636cd8..37afc23d 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,7 @@ S3method(plot,mic) S3method(plot,resistance_predict) S3method(plot,rsi) S3method(print,atc) +S3method(print,catalogue_of_life_version) S3method(print,frequency_tbl) S3method(print,mic) S3method(print,mo) @@ -191,6 +192,7 @@ exportMethods(plot.frequency_tbl) exportMethods(plot.mic) exportMethods(plot.rsi) exportMethods(print.atc) +exportMethods(print.catalogue_of_life_version) exportMethods(print.frequency_tbl) exportMethods(print.mic) exportMethods(print.mo) @@ -221,6 +223,7 @@ importFrom(crayon,red) importFrom(crayon,silver) importFrom(crayon,strip_style) importFrom(crayon,underline) +importFrom(crayon,white) importFrom(crayon,yellow) importFrom(data.table,as.data.table) importFrom(data.table,data.table) @@ -292,3 +295,4 @@ importFrom(stats,sd) importFrom(utils,browseURL) importFrom(utils,browseVignettes) importFrom(utils,installed.packages) +importFrom(utils,menu) diff --git a/R/catalogue_of_life.R b/R/catalogue_of_life.R index 3cd597f0..f15d7c80 100755 --- a/R/catalogue_of_life.R +++ b/R/catalogue_of_life.R @@ -81,10 +81,10 @@ NULL #' #' This function returns information about the included data from the Catalogue of Life. It also shows if the included version is their latest annual release. The Catalogue of Life releases their annual release in March each year. #' @seealso \code{\link{microorganisms}} -#' @details The list item \code{is_latest_annual_release} is based on the system date. +#' @details The list item \code{...$catalogue_of_life$is_latest_annual_release} is based on the system date. #' #' For DSMZ, see \code{?microorganisms}. -#' @return a \code{list}, invisibly +#' @return a \code{list}, which prints in pretty format #' @inheritSection catalogue_of_life Catalogue of Life #' @inheritSection AMR Read more on our website! #' @importFrom crayon bold underline @@ -99,8 +99,8 @@ catalogue_of_life_version <- function() { lst <- list(catalogue_of_life = list(version = gsub("{year}", catalogue_of_life$year, catalogue_of_life$version, fixed = TRUE), url = gsub("{year}", catalogue_of_life$year, catalogue_of_life$url_CoL, fixed = TRUE), - # annual release always somewhere in March, so before April is TRUE, FALSE otherwise - is_latest_annual_release = Sys.Date() < as.Date(paste0(catalogue_of_life$year + 1, "-04-01")), + # annual release always somewhere in May, so before June is TRUE, FALSE otherwise + is_latest_annual_release = Sys.Date() < as.Date(paste0(catalogue_of_life$year + 1, "-06-01")), n = nrow(filter(AMR::microorganisms, source == "CoL"))), deutsche_sammlung_von_mikroorganismen_und_zellkulturen = list(version = "Prokaryotic Nomenclature Up-to-Date from DSMZ", @@ -112,7 +112,16 @@ catalogue_of_life_version <- function() { n_total_species = nrow(AMR::microorganisms), n_total_synonyms = nrow(AMR::microorganisms.old))) - cat(paste0(bold("Included in this package are:\n\n"), + structure(.Data = lst, + class = c("catalogue_of_life_version", "list")) +} + +#' @exportMethod print.catalogue_of_life_version +#' @export +#' @noRd +print.catalogue_of_life_version <- function(x, ...) { + lst <- x + cat(paste0(bold("Included in this AMR package are:\n\n"), underline(lst$catalogue_of_life$version), "\n", " Available at: ", lst$catalogue_of_life$url, "\n", " Number of included species: ", format(lst$catalogue_of_life$n, big.mark = ","), "\n", @@ -121,9 +130,7 @@ catalogue_of_life_version <- function() { lst$deutsche_sammlung_von_mikroorganismen_und_zellkulturen$yearmonth, ")")), "\n", " Available at: ", lst$deutsche_sammlung_von_mikroorganismen_und_zellkulturen$url, "\n", " Number of included species: ", format(lst$deutsche_sammlung_von_mikroorganismen_und_zellkulturen$n, big.mark = ","), "\n\n", - "Total number of species included: ", format(lst$total_included$n_total_species, big.mark = ","), "\n", - "Total number of synonyms included: ", format(lst$total_included$n_total_synonyms, big.mark = ","), "\n\n", + "=> Total number of species included: ", format(lst$total_included$n_total_species, big.mark = ","), "\n", + "=> Total number of synonyms included: ", format(lst$total_included$n_total_synonyms, big.mark = ","), "\n\n", "See for more info ?microorganisms and ?catalogue_of_life.\n")) - - return(base::invisible(lst)) } diff --git a/R/count.R b/R/count.R index 13791582..b1036cdc 100755 --- a/R/count.R +++ b/R/count.R @@ -69,7 +69,7 @@ #' S = count_S(cipr), #' n1 = count_all(cipr), # the actual total; sum of all three #' n2 = n_rsi(cipr), # same - analogous to n_distinct -#' total = n()) # NOT the amount of tested isolates! +#' total = n()) # NOT the number of tested isolates! #' #' # Count co-resistance between amoxicillin/clav acid and gentamicin, #' # so we can see that combination therapy does a lot more than mono therapy. diff --git a/R/filter_ab_class.R b/R/filter_ab_class.R index 9884ae42..5d4c71a9 100644 --- a/R/filter_ab_class.R +++ b/R/filter_ab_class.R @@ -19,11 +19,11 @@ # Visit our website for more info: https://msberends.gitab.io/AMR. # # ==================================================================== # -#' Filter on antibiotic class +#' Filter isolates on result in antibiotic class #' -#' Filter on specific antibiotic variables based on their class (ATC groups). +#' Filter isolates on results in specific antibiotic variables based on their class (ATC groups). This makes it easy to get a list of isolates that were tested for e.g. any aminoglycoside. #' @param tbl a data set -#' @param ab_class an antimicrobial class, like \code{"carbapenems"} +#' @param ab_class an antimicrobial class, like \code{"carbapenems"}. More specifically, this should be a text that can be found in a 4th level ATC group (chemical subgroup) or a 5th level ATC group (chemical substance), please see \href{https://www.whocc.no/atc/structure_and_principles/}{this explanation on the WHOCC website}. #' @param result an antibiotic result: S, I or R (or a combination of more of them) #' @param scope the scope to check which variables to check, can be \code{"any"} (default) or \code{"all"} #' @param ... parameters passed on to \code{\link[dplyr]{filter_at}} @@ -54,8 +54,14 @@ #' # filter on isolates that show resistance to #' # any aminoglycoside and any fluoroquinolone #' septic_patients %>% -#' filter_aminoglycosides("R", "any") %>% -#' filter_fluoroquinolones("R", "any") +#' filter_aminoglycosides("R") %>% +#' filter_fluoroquinolones("R") +#' +#' # filter on isolates that show resistance to +#' # all aminoglycosides and all fluoroquinolones +#' septic_patients %>% +#' filter_aminoglycosides("R", "all") %>% +#' filter_fluoroquinolones("R", "all") filter_ab_class <- function(tbl, ab_class, result = NULL, @@ -65,6 +71,8 @@ filter_ab_class <- function(tbl, if (is.null(result)) { result <- c("S", "I", "R") } + # make result = "IR" work too: + result <- unlist(strsplit(result, "")) if (!all(result %in% c("S", "I", "R"))) { stop("`result` must be one or more of: S, I, R", call. = FALSE) @@ -88,12 +96,20 @@ filter_ab_class <- function(tbl, } else { scope_txt <- " and " scope_fn <- all_vars + if (length(vars_df) > 1) { + operator <- gsub("is", "are", operator) + } } - message(blue(paste0("Filtering on ", atc_groups, ": ", scope, " of ", + if (length(vars_df) > 1) { + scope <- paste(scope, "of ") + } else { + scope <- "" + } + message(blue(paste0("Filtering on ", atc_groups, ": ", scope, paste(bold(paste0("`", vars_df, "`")), collapse = scope_txt), operator, toString(result)))) tbl %>% - filter_at(.vars = vars(vars_df), - .vars_predicate = scope_fn(. %in% result), + filter_at(vars(vars_df), + scope_fn(. %in% result), ...) } else { warning(paste0("no antibiotics of class ", atc_groups, " found, leaving data unchanged"), call. = FALSE) @@ -244,7 +260,7 @@ filter_tetracyclines <- function(tbl, ...) } -#' @importFrom dplyr %>% filter_at any_vars select +#' @importFrom dplyr %>% filter_at vars any_vars select ab_class_vars <- function(ab_class) { ab_vars <- AMR::antibiotics %>% filter_at(vars(c("atc_group1", "atc_group2")), any_vars(. %like% ab_class)) %>% @@ -260,10 +276,23 @@ ab_class_vars <- function(ab_class) { #' @importFrom dplyr %>% filter pull ab_class_atcgroups <- function(ab_class) { - AMR::antibiotics %>% - filter(atc %in% ab_class_vars(ab_class)) %>% - pull("atc_group2") %>% - unique() %>% - tolower() %>% - paste(collapse = "/") + ifelse(ab_class %in% c("aminoglycoside", + "carbapenem", + "cephalosporin", + "first-generation cephalosporin", + "second-generation cephalosporin", + "third-generation cephalosporin", + "fourth-generation cephalosporin", + "fluoroquinolone", + "glycopeptide", + "macrolide", + "tetracycline"), + paste0(ab_class, "s"), + AMR::antibiotics %>% + filter(atc %in% ab_class_vars(ab_class)) %>% + pull("atc_group2") %>% + unique() %>% + tolower() %>% + paste(collapse = "/") + ) } diff --git a/R/freq.R b/R/freq.R index d498932b..cdb116eb 100755 --- a/R/freq.R +++ b/R/freq.R @@ -417,9 +417,9 @@ frequency_tbl <- function(x, header_list$outliers_unique <- n_distinct(boxplot.stats(x)$out) } - if (NROW(x) > 0 & any(class(x) == "rsi")) { - header_list$count_S <- sum(x == "S", na.rm = TRUE) - header_list$count_IR <- sum(x %in% c("I", "R"), na.rm = TRUE) + if (any(class(x) == "rsi")) { + header_list$count_S <- max(0, sum(x == "S", na.rm = TRUE), na.rm = TRUE) + header_list$count_IR <- max(0, sum(x %in% c("I", "R"), na.rm = TRUE), na.rm = TRUE) } formatdates <- "%e %B %Y" # = d mmmm yyyy @@ -564,18 +564,14 @@ format_header <- function(x, markdown = FALSE, decimal.mark = ".", big.mark = ", # FORMATTING # rsi if (has_length == TRUE & any(x_class == "rsi")) { - if (header$count_S < header$count_IR) { - ratio <- paste0(green(1), ":", red(format(header$count_IR / header$count_S, - digits = 1, nsmall = 1, decimal.mark = decimal.mark, big.mark = big.mark))) - } else { - ratio <- paste0(green(format(header$count_S / header$count_IR, - digits = 1, nsmall = 1, decimal.mark = decimal.mark, big.mark = big.mark)), - ":", red(1)) + ab <- tryCatch(atc_name(attributes(x)$opt$vars), error = function(e) NA) + if (!is.na(ab)) { + header$drug <- ab[1L] } - header$`%IR` <- paste((header$count_IR / header$length) %>% percent(force_zero = TRUE, round = digits, decimal.mark = decimal.mark), - paste0("(ratio ", ratio, ")")) - header <- header[!names(header) %in% c("count_S", "count_IR")] + header$`%IR` <- percent(header$count_IR / (header$count_S + header$count_IR), + force_zero = TRUE, round = digits, decimal.mark = decimal.mark) } + header <- header[!names(header) %in% c("count_S", "count_IR")] # dates if (!is.null(header$date_format)) { if (header$date_format == "%H:%M:%S") { diff --git a/R/ggplot_rsi.R b/R/ggplot_rsi.R index bb0961b2..9a2d8730 100755 --- a/R/ggplot_rsi.R +++ b/R/ggplot_rsi.R @@ -164,9 +164,7 @@ ggplot_rsi <- function(data, datalabels.colour = "grey15", ...) { - if (!"ggplot2" %in% rownames(installed.packages())) { - stop('this function requires the ggplot2 package.', call. = FALSE) - } + stopifnot_installed_package("ggplot2") fun_name <- deparse(substitute(fun)) if (!fun_name %in% c("portion_df", "count_df")) { @@ -235,6 +233,8 @@ geom_rsi <- function(position = NULL, fun = count_df, ...) { + stopifnot_installed_package("ggplot2") + fun_name <- deparse(substitute(fun)) if (!fun_name %in% c("portion_df", "count_df", "fun")) { stop("`fun` must be portion_df or count_df") @@ -279,6 +279,8 @@ geom_rsi <- function(position = NULL, #' @export facet_rsi <- function(facet = c("Interpretation", "Antibiotic"), nrow = NULL) { + stopifnot_installed_package("ggplot2") + facet <- facet[1] # we work with aes_string later on @@ -302,6 +304,8 @@ facet_rsi <- function(facet = c("Interpretation", "Antibiotic"), nrow = NULL) { #' @rdname ggplot_rsi #' @export scale_y_percent <- function(breaks = seq(0, 1, 0.1), limits = NULL) { + stopifnot_installed_package("ggplot2") + if (all(breaks[breaks != 0] > 1)) { breaks <- breaks / 100 } @@ -313,6 +317,7 @@ scale_y_percent <- function(breaks = seq(0, 1, 0.1), limits = NULL) { #' @rdname ggplot_rsi #' @export scale_rsi_colours <- function() { + stopifnot_installed_package("ggplot2") #ggplot2::scale_fill_brewer(palette = "RdYlGn") ggplot2::scale_fill_manual(values = c("#b22222", "#ae9c20", "#7cfc00")) } @@ -320,6 +325,7 @@ scale_rsi_colours <- function() { #' @rdname ggplot_rsi #' @export theme_rsi <- function() { + stopifnot_installed_package("ggplot2") ggplot2::theme_minimal() + ggplot2::theme(panel.grid.major.x = ggplot2::element_blank(), panel.grid.minor = ggplot2::element_blank(), @@ -332,6 +338,7 @@ labels_rsi_count <- function(position = NULL, x = "Antibiotic", datalabels.size = 3, datalabels.colour = "grey15") { + stopifnot_installed_package("ggplot2") if (is.null(position)) { position <- "fill" } @@ -357,3 +364,4 @@ getlbls <- function(data) { " (n=", Value, ")")) %>% mutate(lbl = ifelse(lbl == "0.0% (n=0)", "", lbl)) } + diff --git a/R/globals.R b/R/globals.R index 7c480b9b..51acb22b 100755 --- a/R/globals.R +++ b/R/globals.R @@ -92,7 +92,7 @@ globalVariables(c(".", "Sex", "shortname", "species", - "superprevalent", + "species_id", "trade_name", "transmute", "tsn", diff --git a/R/misc.R b/R/misc.R index b153c951..2217b850 100755 --- a/R/misc.R +++ b/R/misc.R @@ -194,3 +194,9 @@ search_type_in_df <- function(tbl, type) { } found } + +stopifnot_installed_package <- function(package) { + if (!package %in% base::rownames(utils::installed.packages())) { + stop("this function requires the ", package, " package.", call. = FALSE) + } +} diff --git a/R/mo.R b/R/mo.R index fbadb762..b65baff9 100755 --- a/R/mo.R +++ b/R/mo.R @@ -23,10 +23,10 @@ #' #' Use this function to determine a valid microorganism ID (\code{mo}). Determination is done using intelligent rules and the complete taxonomic kingdoms Bacteria, Chromista, Protozoa, Archaea and most microbial species from the kingdom Fungi (see Source). The input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. Please see Examples. #' @param x a character vector or a \code{data.frame} with one or two columns -#' @param Becker a logical to indicate whether \emph{Staphylococci} should be categorised into Coagulase Negative \emph{Staphylococci} ("CoNS") and Coagulase Positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} [1]. Note that this does not include species that were newly named after this publication. +#' @param Becker a logical to indicate whether \emph{Staphylococci} should be categorised into coagulase-negative \emph{Staphylococci} ("CoNS") and coagulase-positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} [1,2]. Note that this does not include species that were newly named after these publications, like \emph{S. caeli}. #' #' This excludes \emph{Staphylococcus aureus} at default, use \code{Becker = "all"} to also categorise \emph{S. aureus} as "CoPS". -#' @param Lancefield a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [2]. These \emph{Streptococci} will be categorised in their first group, e.g. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L. +#' @param Lancefield a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [3]. These \emph{Streptococci} will be categorised in their first group, e.g. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L. #' #' This excludes \emph{Enterococci} at default (who are in group D), use \code{Lancefield = "all"} to also categorise all \emph{Enterococci} as group D. #' @param allow_uncertain a logical (\code{TRUE} or \code{FALSE}) or a value between 0 and 3 to indicate whether the input should be checked for less possible results, see Details @@ -49,18 +49,19 @@ #' | | | ----> subspecies, a 3-4 letter acronym #' | | ----> species, a 3-4 letter acronym #' | ----> genus, a 5-7 letter acronym, mostly without vowels -#' ----> taxonomic kingdom: A (Archaea), AN (Animalia), B (Bacteria), C (Chromista), -#' F (Fungi), P (Protozoa) or PL (Plantae) +#' ----> taxonomic kingdom: A (Archaea), AN (Animalia), B (Bacteria), +#' C (Chromista), F (Fungi), P (Protozoa) or +#' PL (Plantae) #' } #' -#' Values that cannot be coered will be considered 'unknown' and have an MO code \code{UNKNOWN}. +#' Values that cannot be coered will be considered 'unknown' and will get the MO code \code{UNKNOWN}. #' #' Use the \code{\link{mo_property}_*} functions to get properties based on the returned code, see Examples. #' #' The algorithm uses data from the Catalogue of Life (see below) and from one other source (see \code{?microorganisms}). #' #' \strong{Self-learning algoritm} \cr -#' The \code{as.mo()} function gains experience from previously determined microbial IDs and learns from it. This drastically improves both speed and reliability. Use \code{clean_mo_history()} to reset the algorithms. Only experience from your current \code{AMR} package version is used. This is done because in the future the taxonomic tree (which is included in this package) may change for any organism and it consequently has to rebuild its knowledge. Usually, any guess after the first try runs 90-95\% faster than the first try. The algorithm saves its previous findings to \code{~/.Rhistory_mo}. +#' The \code{as.mo()} function gains experience from previously determined microbial IDs and learns from it. This drastically improves both speed and reliability. Use \code{clean_mo_history()} to reset the algorithms. Only experience from your current \code{AMR} package version is used. This is done because in the future the taxonomic tree (which is included in this package) may change for any organism and it consequently has to rebuild its knowledge. Usually, any guess after the first try runs 80-95\% faster than the first try. The algorithm saves its previous findings to \code{~/.Rhistory_mo}. #' #' \strong{Intelligent rules} \cr #' This function uses intelligent rules to help getting fast and logical results. It tries to find matches in this order: @@ -80,7 +81,7 @@ #' This means that looking up human pathogenic microorganisms takes less time than looking up human non-pathogenic microorganisms. #' #' \strong{Uncertain results} \cr -#' The algorithm can additionally use three different levels of uncertainty to guess valid results. The default is \code{allow_uncertain = TRUE}, which is uqual to uncertainty level 2. Using \code{allow_uncertain = FALSE} will skip all of these additional rules: +#' The algorithm can additionally use three different levels of uncertainty to guess valid results. The default is \code{allow_uncertain = TRUE}, which is equal to uncertainty level 2. Using \code{allow_uncertain = FALSE} will skip all of these additional rules: #' \itemize{ #' \item{(uncertainty level 1): It tries to look for only matching genera} #' \item{(uncertainty level 1): It tries to look for previously accepted (but now invalid) taxonomic names} @@ -121,11 +122,13 @@ #' @section Source: #' [1] Becker K \emph{et al.} \strong{Coagulase-Negative Staphylococci}. 2014. Clin Microbiol Rev. 27(4): 870–926. \url{https://dx.doi.org/10.1128/CMR.00109-13} #' -#' [2] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571–95. \url{https://dx.doi.org/10.1084/jem.57.4.571} +#' [2] Becker K \emph{et al.} \strong{Implications of identifying the recently defined members of the S. aureus complex, S. argenteus and S. schweitzeri: A position paper of members of the ESCMID Study Group for staphylococci and Staphylococcal Diseases (ESGS).}. 2019. Clin Microbiol Infect. 2019 Mar 11. \url{https://doi.org/10.1016/j.cmi.2019.02.028} #' -#' [3] Catalogue of Life: Annual Checklist (public online taxonomic database), \url{www.catalogueoflife.org} (check included annual version with \code{\link{catalogue_of_life_version}()}). +#' [3] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571–95. \url{https://dx.doi.org/10.1084/jem.57.4.571} +#' +#' [4] Catalogue of Life: Annual Checklist (public online taxonomic database), \url{www.catalogueoflife.org} (check included annual version with \code{\link{catalogue_of_life_version}()}). #' @export -#' @return Character (vector) with class \code{"mo"}. Unknown values will return \code{NA}. +#' @return Character (vector) with class \code{"mo"} #' @seealso \code{\link{microorganisms}} for the \code{data.frame} that is being used to determine ID's. \cr #' The \code{\link{mo_property}} functions (like \code{\link{mo_genus}}, \code{\link{mo_gramstain}}) to get properties based on the returned code. #' @inheritSection AMR Read more on our website! @@ -188,7 +191,8 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE, # check onLoad() in R/zzz.R: data tables are created there. } - mo_hist <- get_mo_history(x, force = isTRUE(list(...)$force_mo_history)) + uncertainty_level <- translate_allow_uncertain(allow_uncertain) + mo_hist <- get_mo_history(x, uncertainty_level, force = isTRUE(list(...)$force_mo_history)) if (mo_source_isvalid(reference_df) & isFALSE(Becker) @@ -247,13 +251,13 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE, "mo"][[1]] } # save them to history - set_mo_history(x, y, force = isTRUE(list(...)$force_mo_history)) + set_mo_history(x, y, 0, force = isTRUE(list(...)$force_mo_history)) } else { # will be checked for mo class in validation and uses exec_as.mo internally if necessary y <- mo_validate(x = x, property = "mo", Becker = Becker, Lancefield = Lancefield, - allow_uncertain = allow_uncertain, reference_df = reference_df, + allow_uncertain = uncertainty_level, reference_df = reference_df, force_mo_history = isTRUE(list(...)$force_mo_history)) } @@ -320,15 +324,8 @@ exec_as.mo <- function(x, fullname = character(0), mo = character(0)) failures <- character(0) - if (isTRUE(allow_uncertain)) { - # default to uncertainty level 2 - allow_uncertain <- 2 - } else { - allow_uncertain <- as.integer(allow_uncertain) - if (!allow_uncertain %in% c(0:3)) { - stop("`allow_uncertain` must be a number between 0 (none) and 3 (all), or TRUE (= 2) or FALSE (= 0).", call. = FALSE) - } - } + uncertainty_level <- translate_allow_uncertain(allow_uncertain) + x_input <- x # already strip leading and trailing spaces x <- trimws(x, which = "both") @@ -341,7 +338,6 @@ exec_as.mo <- function(x, & !identical(x, "") & !identical(x, "xxx") & !identical(x, "con")] - x_input_backup <- x # conversion of old MO codes from v0.5.0 (ITIS) to later versions (Catalogue of Life) if (any(x %like% "^[BFP]_[A-Z]{3,7}") & !all(x %in% microorganisms$mo)) { @@ -406,9 +402,13 @@ exec_as.mo <- function(x, } x <- y - } else if (all(x %in% read_mo_history(force = force_mo_history)$x)) { + } else if (all(x %in% read_mo_history(uncertainty_level, + force = force_mo_history)$x)) { # previously found code - x <- microorganismsDT[data.table(mo = get_mo_history(x, force = force_mo_history)), on = "mo", ..property][[1]] + x <- microorganismsDT[data.table(mo = get_mo_history(x, + uncertainty_level, + force = force_mo_history)), + on = "mo", ..property][[1]] } else if (all(tolower(x) %in% microorganismsDT$fullname_lower)) { # we need special treatment for very prevalent full names, they are likely! @@ -430,7 +430,7 @@ exec_as.mo <- function(x, # commonly used MO codes y <- as.data.table(AMR::microorganisms.codes)[data.table(code = toupper(x)), on = "code", ] # save them to history - set_mo_history(x, y$mo, force = force_mo_history) + set_mo_history(x, y$mo, 0, force = force_mo_history) x <- microorganismsDT[data.table(mo = y[["mo"]]), on = "mo", ..property][[1]] @@ -502,11 +502,16 @@ exec_as.mo <- function(x, progress$tick()$print() - found <- microorganismsDT[mo == get_mo_history(x_backup[i], force = force_mo_history), ..property][[1]] - # previously found result - if (length(found) > 0) { - x[i] <- found[1L] - next + if (initial_search == TRUE) { + found <- microorganismsDT[mo == get_mo_history(x_backup[i], + uncertainty_level, + force = force_mo_history), + ..property][[1]] + # previously found result + if (length(found) > 0) { + x[i] <- found[1L] + next + } } found <- microorganismsDT[mo == toupper(x_backup[i]), ..property][[1]] @@ -521,7 +526,7 @@ exec_as.mo <- function(x, if (length(found) > 0) { x[i] <- found[1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -535,7 +540,7 @@ exec_as.mo <- function(x, # empty and nonsense values, ignore without warning x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -552,7 +557,7 @@ exec_as.mo <- function(x, if (length(found) > 0) { x[i] <- found[1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -562,7 +567,7 @@ exec_as.mo <- function(x, x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]] if (initial_search == TRUE) { failures <- c(failures, x_backup[i]) - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -572,7 +577,7 @@ exec_as.mo <- function(x, x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]] if (initial_search == TRUE) { failures <- c(failures, x_backup[i]) - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -582,14 +587,14 @@ exec_as.mo <- function(x, if (toupper(x_backup_without_spp[i]) %in% c('MRSA', 'MSSA', 'VISA', 'VRSA')) { x[i] <- microorganismsDT[mo == 'B_STPHY_AUR', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } if (toupper(x_backup_without_spp[i]) %in% c('MRSE', 'MSSE')) { x[i] <- microorganismsDT[mo == 'B_STPHY_EPI', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -597,14 +602,14 @@ exec_as.mo <- function(x, | x_backup_without_spp[i] %like% '(enterococci|enterokok|enterococo)[a-z]*?$') { x[i] <- microorganismsDT[mo == 'B_ENTRC', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } if (toupper(x_backup_without_spp[i]) %in% c("EHEC", "EPEC", "EIEC", "STEC", "ATEC")) { x[i] <- microorganismsDT[mo == 'B_ESCHR_COL', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -612,7 +617,7 @@ exec_as.mo <- function(x, # multi resistant P. aeruginosa x[i] <- microorganismsDT[mo == 'B_PSDMN_AER', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -621,7 +626,7 @@ exec_as.mo <- function(x, # co-trim resistant S. maltophilia x[i] <- microorganismsDT[mo == 'B_STNTR_MAL', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -629,7 +634,7 @@ exec_as.mo <- function(x, # peni I, peni R, vanco I, vanco R: S. pneumoniae x[i] <- microorganismsDT[mo == 'B_STRPT_PNE', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -637,7 +642,7 @@ exec_as.mo <- function(x, # Streptococci, like GBS = Group B Streptococci (B_STRPT_GRB) x[i] <- microorganismsDT[mo == gsub("G([ABCDFGHK])S", "B_STRPT_GR\\1", x_backup_without_spp[i], ignore.case = TRUE), ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -645,7 +650,7 @@ exec_as.mo <- function(x, # Streptococci in different languages, like "estreptococos grupo B" x[i] <- microorganismsDT[mo == gsub(".*(streptococ|streptokok|estreptococ).* ([ABCDFGHK])$", "B_STRPT_GR\\2", x_backup_without_spp[i], ignore.case = TRUE), ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -653,7 +658,7 @@ exec_as.mo <- function(x, # Streptococci in different languages, like "Group A Streptococci" x[i] <- microorganismsDT[mo == gsub(".*group ([ABCDFGHK]) (streptococ|streptokok|estreptococ).*", "B_STRPT_GR\\1", x_backup_without_spp[i], ignore.case = TRUE), ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -664,7 +669,7 @@ exec_as.mo <- function(x, # coerce S. coagulase negative x[i] <- microorganismsDT[mo == 'B_STPHY_CNS', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -674,7 +679,7 @@ exec_as.mo <- function(x, # coerce S. coagulase positive x[i] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -684,7 +689,7 @@ exec_as.mo <- function(x, # coerce Gram negatives x[i] <- microorganismsDT[mo == 'B_GRAMN', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -694,7 +699,7 @@ exec_as.mo <- function(x, # coerce Gram positives x[i] <- microorganismsDT[mo == 'B_GRAMP', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -703,7 +708,7 @@ exec_as.mo <- function(x, # Salmonella Group A to Z, just return S. species for now x[i] <- microorganismsDT[mo == 'B_SLMNL', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } options(mo_renamed = c(getOption("mo_renamed"), magenta(paste0("Note: ", @@ -715,7 +720,7 @@ exec_as.mo <- function(x, # Salmonella with capital letter species like "Salmonella Goettingen" - they're all S. enterica x[i] <- microorganismsDT[mo == 'B_SLMNL_ENT', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } options(mo_renamed = c(getOption("mo_renamed"), magenta(paste0("Note: ", @@ -735,7 +740,7 @@ exec_as.mo <- function(x, if (length(found) > 0) { x[i] <- found[1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -744,7 +749,7 @@ exec_as.mo <- function(x, if (length(found) > 0) { x[i] <- found[1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -759,7 +764,7 @@ exec_as.mo <- function(x, if (length(mo_found) > 0) { x[i] <- microorganismsDT[mo == mo_found, ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -782,7 +787,7 @@ exec_as.mo <- function(x, x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]] if (initial_search == TRUE) { failures <- c(failures, x_backup[i]) - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -869,7 +874,7 @@ exec_as.mo <- function(x, g.x_backup_without_spp = x_backup_without_spp[i]) if (!empty_result(x[i])) { if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -884,7 +889,7 @@ exec_as.mo <- function(x, g.x_backup_without_spp = x_backup_without_spp[i]) if (!empty_result(x[i])) { if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -899,7 +904,7 @@ exec_as.mo <- function(x, g.x_backup_without_spp = x_backup_without_spp[i]) if (!empty_result(x[i])) { if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -925,7 +930,7 @@ exec_as.mo <- function(x, ref_new = microorganismsDT[col_id == found[1, col_id_new], ref], mo = microorganismsDT[col_id == found[1, col_id_new], mo]) if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -938,12 +943,12 @@ exec_as.mo <- function(x, f.x_withspaces_end_only, g.x_backup_without_spp) { - if (allow_uncertain == 0) { + if (uncertainty_level == 0) { # do not allow uncertainties return(NA_character_) } - if (allow_uncertain >= 1) { + if (uncertainty_level >= 1) { # (1) look again for old taxonomic names, now for G. species ---- found <- microorganisms.oldDT[fullname %like% c.x_withspaces_start_end | fullname %like% d.x_withspaces_start_only] @@ -966,11 +971,14 @@ exec_as.mo <- function(x, input = a.x_backup, fullname = found[1, fullname], mo = paste("CoL", found[1, col_id]))) + if (initial_search == TRUE) { + set_mo_history(a.x_backup, get_mo_code(x, property), 1, force = force_mo_history) + } return(x) } } - if (allow_uncertain >= 2) { + if (uncertainty_level >= 2) { # (3) look for genus only, part of name ---- if (nchar(g.x_backup_without_spp) > 4 & !b.x_trimmed %like% " ") { if (!grepl("^[A-Z][a-z]+", b.x_trimmed, ignore.case = FALSE)) { @@ -983,6 +991,9 @@ exec_as.mo <- function(x, input = a.x_backup, fullname = microorganismsDT[mo == found[1L], fullname][[1]], mo = found[1L])) + if (initial_search == TRUE) { + set_mo_history(a.x_backup, get_mo_code(x, property), 2, force = force_mo_history) + } return(x) } } @@ -1000,6 +1011,9 @@ exec_as.mo <- function(x, input = a.x_backup, fullname = microorganismsDT[mo == found_result[1L], fullname][[1]], mo = found_result[1L])) + if (initial_search == TRUE) { + set_mo_history(a.x_backup, get_mo_code(found[1L], property), 2, force = force_mo_history) + } return(found[1L]) } @@ -1018,6 +1032,33 @@ exec_as.mo <- function(x, input = a.x_backup, fullname = microorganismsDT[mo == found_result[1L], fullname][[1]], mo = found_result[1L])) + if (initial_search == TRUE) { + set_mo_history(a.x_backup, get_mo_code(found[1L], property), 2, force = force_mo_history) + } + return(found[1L]) + } + } + } + } + # (6) try to strip off one element from start and check the remains (only allow 2-part name outcome) ---- + x_strip <- a.x_backup %>% strsplit(" ") %>% unlist() + if (length(x_strip) > 1 & nchar(g.x_backup_without_spp) >= 6) { + for (i in 2:(length(x_strip))) { + x_strip_collapsed <- paste(x_strip[i:length(x_strip)], collapse = " ") + found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, allow_uncertain = FALSE))) + if (!empty_result(found)) { + found_result <- found + found <- microorganismsDT[mo == found_result[1L], ..property][[1]] + # uncertainty level 2 only if the fullname contains a space (otherwise it will be found with lvl 3) + if (microorganismsDT[mo == found_result[1L], fullname][[1]] %like% " ") { + uncertainties <<- rbind(uncertainties, + data.frame(uncertainty = 2, + input = a.x_backup, + fullname = microorganismsDT[mo == found_result[1L], fullname][[1]], + mo = found_result[1L])) + if (initial_search == TRUE) { + set_mo_history(a.x_backup, get_mo_code(found[1L], property), 2, force = force_mo_history) + } return(found[1L]) } } @@ -1025,8 +1066,8 @@ exec_as.mo <- function(x, } } - if (allow_uncertain >= 3) { - # (6) try to strip off one element from start and check the remains ---- + if (uncertainty_level >= 3) { + # (7) try to strip off one element from start and check the remains ---- x_strip <- a.x_backup %>% strsplit(" ") %>% unlist() if (length(x_strip) > 1 & nchar(g.x_backup_without_spp) >= 6) { for (i in 2:(length(x_strip))) { @@ -1040,12 +1081,15 @@ exec_as.mo <- function(x, input = a.x_backup, fullname = microorganismsDT[mo == found_result[1L], fullname][[1]], mo = found_result[1L])) + if (initial_search == TRUE) { + set_mo_history(a.x_backup, get_mo_code(found[1L], property), 3, force = force_mo_history) + } return(found[1L]) } } } - # (7) part of a name (very unlikely match) ---- + # (8) part of a name (very unlikely match) ---- found <- microorganismsDT[fullname %like% f.x_withspaces_end_only] if (nrow(found) > 0) { found_result <- found[["mo"]] @@ -1056,6 +1100,9 @@ exec_as.mo <- function(x, input = a.x_backup, fullname = microorganismsDT[mo == found_result[1L], fullname][[1]], mo = found_result[1L])) + if (initial_search == TRUE) { + set_mo_history(a.x_backup, get_mo_code(found[1L], property), 3, force = force_mo_history) + } return(found[1L]) } } @@ -1071,7 +1118,7 @@ exec_as.mo <- function(x, x_withspaces_end_only[i], x_backup_without_spp[i]) if (!empty_result(x[i])) { - # no set_mo_history here; these are uncertain + # no set_mo_history: is already set in uncertain_fn() next } @@ -1079,7 +1126,7 @@ exec_as.mo <- function(x, x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]] if (initial_search == TRUE) { failures <- c(failures, x_backup[i]) - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } } } @@ -1127,8 +1174,8 @@ exec_as.mo <- function(x, MOs_staph <- microorganismsDT[genus == "Staphylococcus"] setkey(MOs_staph, species) CoNS <- MOs_staph[species %in% c("arlettae", "auricularis", "capitis", - "caprae", "carnosus", "cohnii", "condimenti", - "devriesei", "epidermidis", "equorum", + "caprae", "carnosus", "chromogenes", "cohnii", "condimenti", + "devriesei", "epidermidis", "equorum", "felis", "fleurettii", "gallinarum", "haemolyticus", "hominis", "jettensis", "kloosii", "lentus", "lugdunensis", "massiliensis", "microti", @@ -1136,16 +1183,31 @@ exec_as.mo <- function(x, "pettenkoferi", "piscifermentans", "rostri", "saccharolyticus", "saprophyticus", "sciuri", "stepanovicii", "simulans", "succinus", - "vitulinus", "warneri", "xylosus"), ..property][[1]] - CoPS <- MOs_staph[species %in% c("simiae", "agnetis", "chromogenes", - "delphini", "felis", "lutrae", + "vitulinus", "warneri", "xylosus") + | (species == "schleiferi" & subspecies %in% c("schleiferi", "")), ..property][[1]] + CoPS <- MOs_staph[species %in% c("simiae", "agnetis", + "delphini", "lutrae", "hyicus", "intermedius", "pseudintermedius", "pseudointermedius", - "schleiferi"), ..property][[1]] + "schweitzeri", "argenteus") + | (species == "schleiferi" & subspecies == "coagulans"), ..property][[1]] + + # warn when species found that are not in Becker (2014, PMID 25278577) and Becker (2019, PMID 30872103) + post_Becker <- c("argensis", "caeli", "cornubiensis", "edaphicus") + if (any(x %in% MOs_staph[species %in% post_Becker, ..property][[1]])) { + + warning("Becker ", italic("et al."), " (2014) does not contain species named after their publication: ", + italic(paste("S.", + sort(mo_species(unique(x[x %in% MOs_staph[species %in% post_Becker, ..property][[1]]]))), + collapse = ", ")), + call. = FALSE, + immediate. = TRUE) + } + x[x %in% CoNS] <- microorganismsDT[mo == 'B_STPHY_CNS', ..property][[1]][1L] x[x %in% CoPS] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L] if (Becker == "all") { - x[x == microorganismsDT[mo == 'B_STPHY_AUR', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L] + x[x %in% microorganismsDT[mo %like% '^B_STPHY_AUR', ..property][[1]]] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L] } } @@ -1305,7 +1367,7 @@ mo_uncertainties <- function() { } #' @exportMethod print.mo_uncertainties -#' @importFrom crayon green yellow red bgGreen bgYellow bgRed +#' @importFrom crayon green yellow red white bgGreen bgYellow bgRed #' @export #' @noRd print.mo_uncertainties <- function(x, ...) { @@ -1321,16 +1383,16 @@ print.mo_uncertainties <- function(x, ...) { for (i in 1:nrow(x)) { if (x[i, "uncertainty"] == 1) { colour1 <- green - colour2 <- bgGreen + colour2 <- function(...) bgGreen(white(...)) } else if (x[i, "uncertainty"] == 2) { colour1 <- yellow colour2 <- bgYellow } else { colour1 <- red - colour2 <- bgRed + colour2 <- function(...) bgRed(white(...)) } msg <- paste(msg, - paste0("[", colour2(paste0(" ", x[i, "uncertainty"], " ")), '] - "', x[i, "input"], '" -> ', + paste0(colour2(paste0(" [", x[i, "uncertainty"], "] ")), ' "', x[i, "input"], '" -> ', colour1(paste0(italic(x[i, "fullname"]), " (", x[i, "mo"], ")"))), sep = "\n") } @@ -1373,3 +1435,16 @@ get_mo_code <- function(x, property) { AMR::microorganisms[base::which(AMR::microorganisms[, property] %in% x),]$mo } } + +translate_allow_uncertain <- function(allow_uncertain) { + if (isTRUE(allow_uncertain)) { + # default to uncertainty level 2 + allow_uncertain <- 2 + } else { + allow_uncertain <- as.integer(allow_uncertain) + if (!allow_uncertain %in% c(0:3)) { + stop("`allow_uncertain` must be a number between 0 (none) and 3 (all), or TRUE (= 2) or FALSE (= 0).", call. = FALSE) + } + } + allow_uncertain +} diff --git a/R/mo_history.R b/R/mo_history.R index e9c52bf4..9217eab9 100644 --- a/R/mo_history.R +++ b/R/mo_history.R @@ -21,10 +21,10 @@ # print successful as.mo coercions to file, not uncertain ones #' @importFrom dplyr %>% distinct filter -set_mo_history <- function(x, mo, force = FALSE) { +set_mo_history <- function(x, mo, uncertainty_level, force = FALSE) { file_location <- base::path.expand('~/.Rhistory_mo') if (base::interactive() | force == TRUE) { - mo_hist <- read_mo_history(force = force) + mo_hist <- read_mo_history(uncertainty_level = uncertainty_level, force = force) df <- data.frame(x, mo, stringsAsFactors = FALSE) %>% distinct(x, .keep_all = TRUE) %>% filter(!is.na(x) & !is.na(mo)) @@ -35,10 +35,12 @@ set_mo_history <- function(x, mo, force = FALSE) { mo <- df$mo for (i in 1:length(x)) { # save package version too, as both the as.mo() algorithm and the reference data set may change - if (NROW(mo_hist[base::which(mo_hist$x == x[i] & mo_hist$package_version == utils::packageVersion("AMR")),]) == 0) { - base::write(x = c(x[i], mo[i], base::as.character(utils::packageVersion("AMR"))), + if (NROW(mo_hist[base::which(mo_hist$x == x[i] & + mo_hist$uncertainty_level >= uncertainty_level & + mo_hist$package_version == utils::packageVersion("AMR")),]) == 0) { + base::write(x = c(x[i], mo[i], uncertainty_level, base::as.character(utils::packageVersion("AMR"))), file = file_location, - ncolumns = 3, + ncolumns = 4, append = TRUE, sep = "\t") } @@ -47,8 +49,8 @@ set_mo_history <- function(x, mo, force = FALSE) { return(base::invisible()) } -get_mo_history <- function(x, force = FALSE) { - file_read <- read_mo_history(force = force) +get_mo_history <- function(x, uncertainty_level, force = FALSE) { + file_read <- read_mo_history(uncertainty_level = uncertainty_level, force = force) if (base::is.null(file_read)) { NA } else { @@ -59,30 +61,57 @@ get_mo_history <- function(x, force = FALSE) { } #' @importFrom dplyr %>% filter distinct -read_mo_history <- function(force = FALSE) { +read_mo_history <- function(uncertainty_level = 2, force = FALSE, unfiltered = FALSE) { file_location <- base::path.expand('~/.Rhistory_mo') if (!base::file.exists(file_location) | (!base::interactive() & force == FALSE)) { return(NULL) } + uncertainty_level_param <- uncertainty_level file_read <- utils::read.table(file = file_location, header = FALSE, sep = "\t", - col.names = c("x", "mo", "package_version"), + col.names = c("x", "mo", "uncertainty_level", "package_version"), stringsAsFactors = FALSE) # Below: filter on current package version. # Even current fullnames may be replaced by new taxonomic names, so new versions of # the Catalogue of Life must not lead to data corruption. - file_read %>% - filter(package_version == utils::packageVersion("AMR")) %>% - distinct(x, mo, .keep_all = TRUE) -} -#' @rdname as.mo -#' @export -clean_mo_history <- function() { - file_location <- base::path.expand('~/.Rhistory_mo') - if (base::file.exists(file_location)) { - base::unlink(file_location) + if (unfiltered == FALSE) { + file_read <- file_read %>% + filter(package_version == utils::packageVersion("AMR"), + # only take unknowns if uncertainty_level_param is higher + ((mo == "UNKNOWN" & uncertainty_level_param == uncertainty_level) | + (mo != "UNKNOWN" & uncertainty_level_param >= uncertainty_level))) %>% + arrange(desc(uncertainty_level)) %>% + distinct(x, mo, .keep_all = TRUE) + } + + if (nrow(file_read) == 0) { + NULL + } else { + file_read + } +} + +#' @rdname as.mo +#' @importFrom crayon red +#' @importFrom utils menu +#' @export +clean_mo_history <- function(...) { + file_location <- base::path.expand('~/.Rhistory_mo') + if (file.exists(file_location)) { + if (interactive() & !isTRUE(list(...)$force)) { + q <- menu(title = paste("This will remove all", + format(nrow(read_mo_history(999, unfiltered = TRUE)), big.mark = ","), + "previously determined microbial IDs. Are you sure?"), + choices = c("Yes", "No"), + graphics = FALSE) + if (q != 1) { + return(invisible()) + } + } + unlink(file_location) + cat(red("File", file_location, "removed.")) } } diff --git a/R/mo_property.R b/R/mo_property.R index e0fb4a24..1d3faccb 100755 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -446,6 +446,8 @@ mo_translate <- function(x, language) { # Spanish language == "es" ~ x[x_tobetranslated] %>% + # not 'negativa' + # https://www.sciencedirect.com/science/article/pii/S0123939215000739 gsub("Coagulase-negative Staphylococcus","Staphylococcus coagulasa negativo", ., fixed = TRUE) %>% gsub("Coagulase-positive Staphylococcus","Staphylococcus coagulasa positivo", ., fixed = TRUE) %>% gsub("Beta-haemolytic Streptococcus", "Streptococcus Beta-hemol\u00edtico", ., fixed = TRUE) %>% @@ -461,6 +463,8 @@ mo_translate <- function(x, language) { gsub("unknown species", "especie desconocida", ., fixed = TRUE) %>% gsub("unknown subspecies", "subespecie desconocida", ., fixed = TRUE) %>% gsub("unknown rank", "rango desconocido", ., fixed = TRUE) %>% + gsub("(CoNS)", "(SCN)", ., fixed = TRUE) %>% + gsub("(CoPS)", "(SCP)", ., fixed = TRUE) %>% gsub("Gram negative", "Gram negativo", ., fixed = TRUE) %>% gsub("Gram positive", "Gram positivo", ., fixed = TRUE) %>% gsub("Bacteria", "Bacterias", ., fixed = TRUE) %>% @@ -579,7 +583,12 @@ mo_validate <- function(x, property, ...) { # check onLoad() in R/zzz.R: data tables are created there. } - if (!all(x %in% microorganisms[, property]) + # try to catch an error when inputting an invalid parameter + # so the call can be set to FALSE + tryCatch(x[1L] %in% AMR::microorganisms[1, property], + error = function(e) stop(e$message, call. = FALSE)) + + if (!all(x %in% AMR::microorganisms[, property]) | Becker %in% c(TRUE, "all") | Lancefield %in% c(TRUE, "all")) { exec_as.mo(x, property = property, ...) diff --git a/R/portion.R b/R/portion.R index 38593462..278c9723 100755 --- a/R/portion.R +++ b/R/portion.R @@ -21,11 +21,11 @@ #' Calculate resistance of isolates #' -#' @description These functions can be used to calculate the (co-)resistance of microbial isolates (i.e. percentage S, SI, I, IR or R). All functions support quasiquotation with pipes, can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}. +#' @description These functions can be used to calculate the (co-)resistance of microbial isolates (i.e. percentage of S, SI, I, IR or R). All functions support quasiquotation with pipes, can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}. #' #' \code{portion_R} and \code{portion_IR} can be used to calculate resistance, \code{portion_S} and \code{portion_SI} can be used to calculate susceptibility.\cr #' @param ... one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed. Use multiple columns to calculate (the lack of) co-resistance: the probability where one of two drugs have a resistant or susceptible result. See Examples. -#' @param minimum the minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source. +#' @param minimum the minimum allowed number of available (tested) isolates. Any isolate count lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source. #' @param as_percent a logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}. #' @param also_single_tested a logical to indicate whether (in combination therapies) also observations should be included where not all antibiotics were tested, but at least one of the tested antibiotics contains a target interpretation (e.g. S in case of \code{portion_S} and R in case of \code{portion_R}). \strong{This would lead to selection bias in almost all cases.} #' @param data a \code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}}) @@ -39,19 +39,21 @@ #' #' The old \code{\link{rsi}} function is still available for backwards compatibility but is deprecated. #' \if{html}{ +# (created with https://www.latex4technics.com/) #' \cr\cr #' To calculate the probability (\emph{p}) of susceptibility of one antibiotic, we use this formula: -#' \out{
AMR.Rmd
Note: values on this page will change with every website update since they are based on randomly created values and the page was written in RMarkdown. However, the methodology remains unchanged. This page was generated on 15 March 2019.
+Note: values on this page will change with every website update since they are based on randomly created values and the page was written in RMarkdown. However, the methodology remains unchanged. This page was generated on 26 March 2019.
So, we can draw at least two conclusions immediately. From a data scientist perspective, the data looks clean: only values M
and F
. From a researcher perspective: there are slightly more men. Nothing we didn’t already know.
The data is already quite clean, but we still need to transform some variables. The bacteria
column now consists of text, and we want to add more variables based on microbial IDs later on. So, we will transform this column to valid IDs. The mutate()
function of the dplyr
package makes this really easy:
data <- data %>%
@@ -443,10 +443,10 @@
#> Kingella kingae (no changes)
#>
#> EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes (v3.1, 2016)
-#> Table 1: Intrinsic resistance in Enterobacteriaceae (1315 changes)
+#> Table 1: Intrinsic resistance in Enterobacteriaceae (1262 changes)
#> Table 2: Intrinsic resistance in non-fermentative Gram-negative bacteria (no changes)
#> Table 3: Intrinsic resistance in other Gram-negative bacteria (no changes)
-#> Table 4: Intrinsic resistance in Gram-positive bacteria (2799 changes)
+#> Table 4: Intrinsic resistance in Gram-positive bacteria (2756 changes)
#> Table 8: Interpretive rules for B-lactam agents and Gram-positive cocci (no changes)
#> Table 9: Interpretive rules for B-lactam agents and Gram-negative rods (no changes)
#> Table 10: Interpretive rules for B-lactam agents and other Gram-negative bacteria (no changes)
@@ -462,9 +462,9 @@
#> Non-EUCAST: piperacillin/tazobactam = S where piperacillin = S (no changes)
#> Non-EUCAST: trimethoprim/sulfa = S where trimethoprim = S (no changes)
#>
-#> => EUCAST rules affected 7,488 out of 20,000 rows
+#> => EUCAST rules affected 7,403 out of 20,000 rows
#> -> added 0 test results
-#> -> changed 4,114 test results (0 to S; 0 to I; 4,114 to R)
So only 28.4% is suitable for resistance analysis! We can now filter on it with the filter()
function, also from the dplyr
package:
So only 28.2% is suitable for resistance analysis! We can now filter on it with the filter()
function, also from the dplyr
package:
For future use, the above two syntaxes can be shortened with the filter_first_isolate()
function:
isolate | @@ -654,10 +654,10 @@|||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | -2010-04-01 | -K1 | +2010-01-29 | +P7 | B_ESCHR_COL | -R | +S | S | S | S | @@ -666,95 +666,95 @@|||
2 | -2010-04-30 | -K1 | +2010-05-18 | +P7 | B_ESCHR_COL | +S | R | S | S | -S | -FALSE | FALSE | +TRUE |
3 | -2010-10-12 | -K1 | +2010-06-01 | +P7 | B_ESCHR_COL | R | S | -S | +R | S | FALSE | -FALSE | +TRUE |
4 | -2010-12-05 | -K1 | +2010-07-21 | +P7 | B_ESCHR_COL | S | +I | S | S | -R | FALSE | TRUE | |
5 | -2011-01-19 | -K1 | -B_ESCHR_COL | -S | -S | -S | -S | -FALSE | -TRUE | -||||
6 | -2011-04-07 | -K1 | -B_ESCHR_COL | -S | -S | -S | -S | -TRUE | -TRUE | -||||
7 | -2011-06-16 | -K1 | -B_ESCHR_COL | -S | -S | -S | -S | -FALSE | -FALSE | -||||
8 | -2011-07-16 | -K1 | +2010-08-20 | +P7 | B_ESCHR_COL | S | R | S | S | FALSE | +FALSE | +||
6 | +2010-12-14 | +P7 | +B_ESCHR_COL | +S | +I | +S | +S | +FALSE | +FALSE | +||||
7 | +2011-03-02 | +P7 | +B_ESCHR_COL | +S | +S | +S | +R | +TRUE | +TRUE | +||||
8 | +2011-03-14 | +P7 | +B_ESCHR_COL | +S | +S | +R | +S | +FALSE | TRUE | ||||
9 | -2011-08-25 | -K1 | +2011-05-28 | +P7 | B_ESCHR_COL | -R | S | +I | S | S | FALSE | @@ -762,23 +762,23 @@||
10 | -2011-09-11 | -K1 | +2011-08-09 | +P7 | B_ESCHR_COL | -R | +I | S | R | -R | +S | FALSE | TRUE |
Instead of 2, now 7 isolates are flagged. In total, 79.7% of all isolates are marked ‘first weighted’ - 51.3% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.
+Instead of 2, now 8 isolates are flagged. In total, 79.5% of all isolates are marked ‘first weighted’ - 51.2% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.
As with filter_first_isolate()
, there’s a shortcut for this new algorithm too:
So we end up with 15,948 isolates for analysis.
+So we end up with 15,891 isolates for analysis.
We can remove unneeded columns:
@@ -804,13 +804,13 @@Or can be used like the dplyr
way, which is easier readable:
Frequency table of genus
and species
from a data.frame
(15,948 x 13)
Frequency table of genus
and species
from a data.frame
(15,891 x 13)
Columns: 2
-Length: 15,948 (of which NA: 0 = 0.00%)
+Length: 15,891 (of which NA: 0 = 0.00%)
Unique: 4
Shortest: 16
Longest: 24
The functions portion_R()
, portion_RI()
, portion_I()
, portion_IS()
and portion_S()
can be used to determine the portion of a specific antimicrobial outcome. They can be used on their own:
The functions portion_S()
, portion_SI()
, portion_I()
, portion_IR()
and portion_R()
can be used to determine the portion of a specific antimicrobial outcome. They can be used on their own:
Or can be used in conjuction with group_by()
and summarise()
, both from the dplyr
package:
data_1st %>%
group_by(hospital) %>%
@@ -984,19 +984,19 @@ Longest: 24
Hospital A
-0.4801481
+0.4674370
Hospital B
-0.4811895
+0.4698925
Hospital C
-0.4707087
+0.4813574
Hospital D
-0.4915144
+0.4712389
@@ -1014,23 +1014,23 @@ Longest: 24
Hospital A
-0.4801481
-4861
+0.4674370
+4760
Hospital B
-0.4811895
-5582
+0.4698925
+5580
Hospital C
-0.4707087
-2441
+0.4813574
+2387
Hospital D
-0.4915144
-3064
+0.4712389
+3164
@@ -1050,27 +1050,27 @@ Longest: 24
Escherichia
-0.7282445
-0.9031690
-0.9756036
+0.7272384
+0.9034205
+0.9763581
Klebsiella
-0.7270456
-0.9000625
-0.9787633
+0.7457847
+0.9014267
+0.9760052
Staphylococcus
-0.7220793
-0.9184251
-0.9796706
+0.7245186
+0.9181001
+0.9756098
Streptococcus
-0.7182144
+0.7234213
0.0000000
-0.7182144
+0.7234213
@@ -1081,36 +1081,36 @@ Longest: 24
"2. Gentamicin" = portion_S(gent),
"3. Amoxi/clav + gent" = portion_S(amcl, gent)) %>%
tidyr::gather("Antibiotic", "S", -genus) %>%
- ggplot(aes(x = genus,
+ ggplot(aes(x = genus,
y = S,
fill = Antibiotic)) +
- geom_col(position = "dodge2")
To show results in plots, most R users would nowadays use the ggplot2
package. This package lets you create plots in layers. You can read more about it on their website. A quick example would look like these syntaxes:
ggplot(data = a_data_set,
- mapping = aes(x = year,
+ggplot(data = a_data_set,
+ mapping = aes(x = year,
y = value)) +
- geom_col() +
- labs(title = "A title",
+ geom_col() +
+ labs(title = "A title",
subtitle = "A subtitle",
x = "My X axis",
y = "My Y axis")
# or as short as:
-ggplot(a_data_set) +
- geom_bar(aes(year))
+ggplot(a_data_set) +
+ geom_bar(aes(year))
The AMR
package contains functions to extend this ggplot2
package, for example geom_rsi()
. It automatically transforms data with count_df()
or portion_df()
and show results in stacked bars. Its simplest and shortest example:
ggplot(data_1st) +
+
Omit the translate_ab = FALSE
to have the antibiotic codes (amox, amcl, cipr, gent) translated to official WHO names (amoxicillin, amoxicillin and betalactamase inhibitor, ciprofloxacin, gentamicin).
If we group on e.g. the genus
column and add some additional functions from our package, we can create this:
# group the data on `genus`
-ggplot(data_1st %>% group_by(genus)) +
+ggplot(data_1st %>% group_by(genus)) +
# create bars with genus on x axis
# it looks for variables with class `rsi`,
# of which we have 4 (earlier created with `as.rsi`)
@@ -1122,13 +1122,13 @@ Longest: 24
# show percentages on y axis
scale_y_percent(breaks = 0:4 * 25) +
# turn 90 degrees, make it bars instead of columns
- coord_flip() +
+ coord_flip() +
# add labels
- labs(title = "Resistance per genus and antibiotic",
+ labs(title = "Resistance per genus and antibiotic",
subtitle = "(this is fake data)") +
# and print genus in italic to follow our convention
# (is now y axis because we turned the plot)
- theme(axis.text.y = element_text(face = "italic"))
+ theme(axis.text.y = element_text(face = "italic"))
To simplify this, we also created the ggplot_rsi()
function, which combines almost all above functions:
data_1st %>%
@@ -1137,7 +1137,7 @@ Longest: 24
facet = "Antibiotic",
breaks = 0:4 * 25,
datalabels = FALSE) +
- coord_flip()
Becker | -a logical to indicate whether Staphylococci should be categorised into Coagulase Negative Staphylococci ("CoNS") and Coagulase Positive Staphylococci ("CoPS") instead of their own species, according to Karsten Becker et al. [1]. Note that this does not include species that were newly named after this publication. + | a logical to indicate whether Staphylococci should be categorised into coagulase-negative Staphylococci ("CoNS") and coagulase-positive Staphylococci ("CoPS") instead of their own species, according to Karsten Becker et al. [1,2]. Note that this does not include species that were newly named after these publications, like S. caeli. This excludes Staphylococcus aureus at default, use |
|
---|---|---|---|
Lancefield | -a logical to indicate whether beta-haemolytic Streptococci should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [2]. These Streptococci will be categorised in their first group, e.g. Streptococcus dysgalactiae will be group C, although officially it was also categorised into groups G and L. + | a logical to indicate whether beta-haemolytic Streptococci should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [3]. These Streptococci will be categorised in their first group, e.g. Streptococcus dysgalactiae will be group C, although officially it was also categorised into groups G and L. This excludes Enterococci at default (who are in group D), use |
|
ab_class | -an antimicrobial class, like |
+ an antimicrobial class, like |
|
result | @@ -317,8 +317,14 @@ # filter on isolates that show resistance to # any aminoglycoside and any fluoroquinolone septic_patients %>% - filter_aminoglycosides("R", "any") %>% - filter_fluoroquinolones("R", "any") + filter_aminoglycosides("R") %>% + filter_fluoroquinolones("R") + +# filter on isolates that show resistance to +# all aminoglycosides and all fluoroquinolones +septic_patients %>% + filter_aminoglycosides("R", "all") %>% + filter_fluoroquinolones("R", "all") # } @@ -431,7 +431,7 @@
|
- Filter on antibiotic class |
+ Filter isolates on result in antibiotic class |
diff --git a/docs/reference/microorganisms.html b/docs/reference/microorganisms.html
index c773534c..bf77573d 100644
--- a/docs/reference/microorganisms.html
+++ b/docs/reference/microorganisms.html
@@ -80,7 +80,7 @@
[1] Becker K et al. Coagulase-Negative Staphylococci. 2014. Clin Microbiol Rev. 27(4): 870–926. https://dx.doi.org/10.1128/CMR.00109-13 -[2] Lancefield RC A serological differentiation of human and other groups of hemolytic streptococci. 1933. J Exp Med. 57(4): 571–95. https://dx.doi.org/10.1084/jem.57.4.571 -[3] Catalogue of Life: Annual Checklist (public online taxonomic database), www.catalogueoflife.org (check included annual version with [2] Becker K et al. Implications of identifying the recently defined members of the S. aureus complex, S. argenteus and S. schweitzeri: A position paper of members of the ESCMID Study Group for staphylococci and Staphylococcal Diseases (ESGS).. 2019. Clin Microbiol Infect. 2019 Mar 11. https://doi.org/10.1016/j.cmi.2019.02.028 +[3] Lancefield RC A serological differentiation of human and other groups of hemolytic streptococci. 1933. J Exp Med. 57(4): 571–95. https://dx.doi.org/10.1084/jem.57.4.571 +[4] Catalogue of Life: Annual Checklist (public online taxonomic database), www.catalogueoflife.org (check included annual version with Read more on our website!diff --git a/docs/reference/portion.html b/docs/reference/portion.html index 28e94e0c..90d311cb 100644 --- a/docs/reference/portion.html +++ b/docs/reference/portion.html @@ -47,7 +47,7 @@ - @@ -81,7 +81,7 @@ portion_R and portion_IR can be used to calculate resistance, portion_S and port @@ -238,7 +238,7 @@ portion_R and portion_IR can be used to calculate resistance, portion_S and port
-
@@ -270,7 +270,7 @@ portion_R and portion_IR can be used to calculate resistance, portion_S and port
These functions can be used to calculate the (co-)resistance of microbial isolates (i.e. percentage S, SI, I, IR or R). All functions support quasiquotation with pipes, can be used in These functions can be used to calculate the (co-)resistance of microbial isolates (i.e. percentage of S, SI, I, IR or R). All functions support quasiquotation with pipes, can be used in
|
diff --git a/docs/reference/microorganisms.old.html b/docs/reference/microorganisms.old.html
index 7a5e5f37..dde94272 100644
--- a/docs/reference/microorganisms.old.html
+++ b/docs/reference/microorganisms.old.html
@@ -80,7 +80,7 @@
diff --git a/docs/reference/mo_property.html b/docs/reference/mo_property.html
index 9e2ef6b1..314bbc64 100644
--- a/docs/reference/mo_property.html
+++ b/docs/reference/mo_property.html
@@ -80,7 +80,7 @@
@@ -341,8 +341,9 @@ This package contains the complete taxonomic tree of almost all microorganisms (
|||
minimum | -the minimal amount of available isolates. Any number lower than |
+ the minimum allowed number of available (tested) isolates. Any isolate count lower than |
|
as_percent | @@ -311,14 +311,14 @@ portion_R and portion_IR can be used to calculate resistance, portion_S and port|||
minimum | -the minimal amount of available isolates. Any number lower than |
+ the minimum allowed number of available (tested) isolates. Any isolate count lower than |
|
as_percent | diff --git a/man/as.mo.Rd b/man/as.mo.Rd index d64d3062..69fc3cf6 100644 --- a/man/as.mo.Rd +++ b/man/as.mo.Rd @@ -21,16 +21,16 @@ mo_uncertainties() mo_renamed() -clean_mo_history() +clean_mo_history(...) } \arguments{ \item{x}{a character vector or a \code{data.frame} with one or two columns} -\item{Becker}{a logical to indicate whether \emph{Staphylococci} should be categorised into Coagulase Negative \emph{Staphylococci} ("CoNS") and Coagulase Positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} [1]. Note that this does not include species that were newly named after this publication. +\item{Becker}{a logical to indicate whether \emph{Staphylococci} should be categorised into coagulase-negative \emph{Staphylococci} ("CoNS") and coagulase-positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} [1,2]. Note that this does not include species that were newly named after these publications, like \emph{S. caeli}. This excludes \emph{Staphylococcus aureus} at default, use \code{Becker = "all"} to also categorise \emph{S. aureus} as "CoPS".} -\item{Lancefield}{a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [2]. These \emph{Streptococci} will be categorised in their first group, e.g. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L. +\item{Lancefield}{a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [3]. These \emph{Streptococci} will be categorised in their first group, e.g. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L. This excludes \emph{Enterococci} at default (who are in group D), use \code{Lancefield = "all"} to also categorise all \emph{Enterococci} as group D.} @@ -41,7 +41,7 @@ clean_mo_history() \item{...}{other parameters passed on to functions} } \value{ -Character (vector) with class \code{"mo"}. Unknown values will return \code{NA}. +Character (vector) with class \code{"mo"} } \description{ Use this function to determine a valid microorganism ID (\code{mo}). Determination is done using intelligent rules and the complete taxonomic kingdoms Bacteria, Chromista, Protozoa, Archaea and most microbial species from the kingdom Fungi (see Source). The input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. Please see Examples. @@ -60,18 +60,19 @@ A microbial ID from this package (class: \code{mo}) typically looks like these e | | | ----> subspecies, a 3-4 letter acronym | | ----> species, a 3-4 letter acronym | ----> genus, a 5-7 letter acronym, mostly without vowels - ----> taxonomic kingdom: A (Archaea), AN (Animalia), B (Bacteria), C (Chromista), - F (Fungi), P (Protozoa) or PL (Plantae) + ----> taxonomic kingdom: A (Archaea), AN (Animalia), B (Bacteria), + C (Chromista), F (Fungi), P (Protozoa) or + PL (Plantae) } -Values that cannot be coered will be considered 'unknown' and have an MO code \code{UNKNOWN}. +Values that cannot be coered will be considered 'unknown' and will get the MO code \code{UNKNOWN}. Use the \code{\link{mo_property}_*} functions to get properties based on the returned code, see Examples. The algorithm uses data from the Catalogue of Life (see below) and from one other source (see \code{?microorganisms}). \strong{Self-learning algoritm} \cr -The \code{as.mo()} function gains experience from previously determined microbial IDs and learns from it. This drastically improves both speed and reliability. Use \code{clean_mo_history()} to reset the algorithms. Only experience from your current \code{AMR} package version is used. This is done because in the future the taxonomic tree (which is included in this package) may change for any organism and it consequently has to rebuild its knowledge. Usually, any guess after the first try runs 90-95\% faster than the first try. The algorithm saves its previous findings to \code{~/.Rhistory_mo}. +The \code{as.mo()} function gains experience from previously determined microbial IDs and learns from it. This drastically improves both speed and reliability. Use \code{clean_mo_history()} to reset the algorithms. Only experience from your current \code{AMR} package version is used. This is done because in the future the taxonomic tree (which is included in this package) may change for any organism and it consequently has to rebuild its knowledge. Usually, any guess after the first try runs 80-95\% faster than the first try. The algorithm saves its previous findings to \code{~/.Rhistory_mo}. \strong{Intelligent rules} \cr This function uses intelligent rules to help getting fast and logical results. It tries to find matches in this order: @@ -91,7 +92,7 @@ A couple of effects because of these rules: This means that looking up human pathogenic microorganisms takes less time than looking up human non-pathogenic microorganisms. \strong{Uncertain results} \cr -The algorithm can additionally use three different levels of uncertainty to guess valid results. The default is \code{allow_uncertain = TRUE}, which is uqual to uncertainty level 2. Using \code{allow_uncertain = FALSE} will skip all of these additional rules: +The algorithm can additionally use three different levels of uncertainty to guess valid results. The default is \code{allow_uncertain = TRUE}, which is equal to uncertainty level 2. Using \code{allow_uncertain = FALSE} will skip all of these additional rules: \itemize{ \item{(uncertainty level 1): It tries to look for only matching genera} \item{(uncertainty level 1): It tries to look for previously accepted (but now invalid) taxonomic names} @@ -132,9 +133,11 @@ Group 2 probably contains all other microbial pathogens ever found in humans. [1] Becker K \emph{et al.} \strong{Coagulase-Negative Staphylococci}. 2014. Clin Microbiol Rev. 27(4): 870–926. \url{https://dx.doi.org/10.1128/CMR.00109-13} -[2] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571–95. \url{https://dx.doi.org/10.1084/jem.57.4.571} +[2] Becker K \emph{et al.} \strong{Implications of identifying the recently defined members of the S. aureus complex, S. argenteus and S. schweitzeri: A position paper of members of the ESCMID Study Group for staphylococci and Staphylococcal Diseases (ESGS).}. 2019. Clin Microbiol Infect. 2019 Mar 11. \url{https://doi.org/10.1016/j.cmi.2019.02.028} -[3] Catalogue of Life: Annual Checklist (public online taxonomic database), \url{www.catalogueoflife.org} (check included annual version with \code{\link{catalogue_of_life_version}()}). +[3] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571–95. \url{https://dx.doi.org/10.1084/jem.57.4.571} + +[4] Catalogue of Life: Annual Checklist (public online taxonomic database), \url{www.catalogueoflife.org} (check included annual version with \code{\link{catalogue_of_life_version}()}). } \section{Catalogue of Life}{ diff --git a/man/catalogue_of_life_version.Rd b/man/catalogue_of_life_version.Rd index 6bdce96f..9decac39 100644 --- a/man/catalogue_of_life_version.Rd +++ b/man/catalogue_of_life_version.Rd @@ -7,13 +7,13 @@ catalogue_of_life_version() } \value{ -a \code{list}, invisibly +a \code{list}, which prints in pretty format } \description{ This function returns information about the included data from the Catalogue of Life. It also shows if the included version is their latest annual release. The Catalogue of Life releases their annual release in March each year. } \details{ -The list item \code{is_latest_annual_release} is based on the system date. +The list item \code{...$catalogue_of_life$is_latest_annual_release} is based on the system date. For DSMZ, see \code{?microorganisms}. } diff --git a/man/count.Rd b/man/count.Rd index 3ab59c1b..adebc5b3 100644 --- a/man/count.Rd +++ b/man/count.Rd @@ -93,7 +93,7 @@ septic_patients \%>\% S = count_S(cipr), n1 = count_all(cipr), # the actual total; sum of all three n2 = n_rsi(cipr), # same - analogous to n_distinct - total = n()) # NOT the amount of tested isolates! + total = n()) # NOT the number of tested isolates! # Count co-resistance between amoxicillin/clav acid and gentamicin, # so we can see that combination therapy does a lot more than mono therapy. diff --git a/man/filter_ab_class.Rd b/man/filter_ab_class.Rd index 78ef7433..19df430c 100644 --- a/man/filter_ab_class.Rd +++ b/man/filter_ab_class.Rd @@ -13,7 +13,7 @@ \alias{filter_glycopeptides} \alias{filter_macrolides} \alias{filter_tetracyclines} -\title{Filter on antibiotic class} +\title{Filter isolates on result in antibiotic class} \usage{ filter_ab_class(tbl, ab_class, result = NULL, scope = "any", ...) @@ -42,7 +42,7 @@ filter_tetracyclines(tbl, result = NULL, scope = "any", ...) \arguments{ \item{tbl}{a data set} -\item{ab_class}{an antimicrobial class, like \code{"carbapenems"}} +\item{ab_class}{an antimicrobial class, like \code{"carbapenems"}. More specifically, this should be a text that can be found in a 4th level ATC group (chemical subgroup) or a 5th level ATC group (chemical substance), please see \href{https://www.whocc.no/atc/structure_and_principles/}{this explanation on the WHOCC website}.} \item{result}{an antibiotic result: S, I or R (or a combination of more of them)} @@ -51,7 +51,7 @@ filter_tetracyclines(tbl, result = NULL, scope = "any", ...) \item{...}{parameters passed on to \code{\link[dplyr]{filter_at}}} } \description{ -Filter on specific antibiotic variables based on their class (ATC groups). +Filter isolates on results in specific antibiotic variables based on their class (ATC groups). This makes it easy to get a list of isolates that were tested for e.g. any aminoglycoside. } \details{ The \code{\link{antibiotics}} data set will be searched for \code{ab_class} in the columns \code{atc_group1} and \code{atc_group2} (case-insensitive). Next, \code{tbl} will be checked for column names with a value in any abbreviations, codes or official names found in the \code{antibiotics} data set. @@ -77,8 +77,14 @@ septic_patients \%>\% filter_aminoglycosides("R", "all") # filter on isolates that show resistance to # any aminoglycoside and any fluoroquinolone septic_patients \%>\% - filter_aminoglycosides("R", "any") \%>\% - filter_fluoroquinolones("R", "any") + filter_aminoglycosides("R") \%>\% + filter_fluoroquinolones("R") + +# filter on isolates that show resistance to +# all aminoglycosides and all fluoroquinolones +septic_patients \%>\% + filter_aminoglycosides("R", "all") \%>\% + filter_fluoroquinolones("R", "all") } \keyword{fillter_class} \keyword{filter} diff --git a/man/mo_property.Rd b/man/mo_property.Rd index dc549af9..86092391 100644 --- a/man/mo_property.Rd +++ b/man/mo_property.Rd @@ -111,9 +111,11 @@ This package contains the complete taxonomic tree of almost all microorganisms ( [1] Becker K \emph{et al.} \strong{Coagulase-Negative Staphylococci}. 2014. Clin Microbiol Rev. 27(4): 870–926. \url{https://dx.doi.org/10.1128/CMR.00109-13} -[2] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571–95. \url{https://dx.doi.org/10.1084/jem.57.4.571} +[2] Becker K \emph{et al.} \strong{Implications of identifying the recently defined members of the S. aureus complex, S. argenteus and S. schweitzeri: A position paper of members of the ESCMID Study Group for staphylococci and Staphylococcal Diseases (ESGS).}. 2019. Clin Microbiol Infect. 2019 Mar 11. \url{https://doi.org/10.1016/j.cmi.2019.02.028} -[3] Catalogue of Life: Annual Checklist (public online taxonomic database), \url{www.catalogueoflife.org} (check included annual version with \code{\link{catalogue_of_life_version}()}). +[3] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571–95. \url{https://dx.doi.org/10.1084/jem.57.4.571} + +[4] Catalogue of Life: Annual Checklist (public online taxonomic database), \url{www.catalogueoflife.org} (check included annual version with \code{\link{catalogue_of_life_version}()}). } \section{Read more on our website!}{ diff --git a/man/portion.Rd b/man/portion.Rd index 1fbd5296..ee3f044e 100644 --- a/man/portion.Rd +++ b/man/portion.Rd @@ -36,7 +36,7 @@ portion_df(data, translate_ab = getOption("get_antibiotic_names", \arguments{ \item{...}{one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed. Use multiple columns to calculate (the lack of) co-resistance: the probability where one of two drugs have a resistant or susceptible result. See Examples.} -\item{minimum}{the minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.} +\item{minimum}{the minimum allowed number of available (tested) isolates. Any isolate count lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.} \item{as_percent}{a logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}.} @@ -52,7 +52,7 @@ portion_df(data, translate_ab = getOption("get_antibiotic_names", Double or, when \code{as_percent = TRUE}, a character. } \description{ -These functions can be used to calculate the (co-)resistance of microbial isolates (i.e. percentage S, SI, I, IR or R). All functions support quasiquotation with pipes, can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}. +These functions can be used to calculate the (co-)resistance of microbial isolates (i.e. percentage of S, SI, I, IR or R). All functions support quasiquotation with pipes, can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}. \code{portion_R} and \code{portion_IR} can be used to calculate resistance, \code{portion_S} and \code{portion_SI} can be used to calculate susceptibility.\cr } @@ -67,14 +67,14 @@ The old \code{\link{rsi}} function is still available for backwards compatibilit \if{html}{ \cr\cr To calculate the probability (\emph{p}) of susceptibility of one antibiotic, we use this formula: - \out{