diff --git a/DESCRIPTION b/DESCRIPTION index 1c069aec..dc718674 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 0.5.0.9024 -Date: 2019-03-18 +Version: 0.5.0.9025 +Date: 2019-03-26 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/NAMESPACE b/NAMESPACE index ed636cd8..37afc23d 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,7 @@ S3method(plot,mic) S3method(plot,resistance_predict) S3method(plot,rsi) S3method(print,atc) +S3method(print,catalogue_of_life_version) S3method(print,frequency_tbl) S3method(print,mic) S3method(print,mo) @@ -191,6 +192,7 @@ exportMethods(plot.frequency_tbl) exportMethods(plot.mic) exportMethods(plot.rsi) exportMethods(print.atc) +exportMethods(print.catalogue_of_life_version) exportMethods(print.frequency_tbl) exportMethods(print.mic) exportMethods(print.mo) @@ -221,6 +223,7 @@ importFrom(crayon,red) importFrom(crayon,silver) importFrom(crayon,strip_style) importFrom(crayon,underline) +importFrom(crayon,white) importFrom(crayon,yellow) importFrom(data.table,as.data.table) importFrom(data.table,data.table) @@ -292,3 +295,4 @@ importFrom(stats,sd) importFrom(utils,browseURL) importFrom(utils,browseVignettes) importFrom(utils,installed.packages) +importFrom(utils,menu) diff --git a/R/catalogue_of_life.R b/R/catalogue_of_life.R index 3cd597f0..f15d7c80 100755 --- a/R/catalogue_of_life.R +++ b/R/catalogue_of_life.R @@ -81,10 +81,10 @@ NULL #' #' This function returns information about the included data from the Catalogue of Life. It also shows if the included version is their latest annual release. The Catalogue of Life releases their annual release in March each year. #' @seealso \code{\link{microorganisms}} -#' @details The list item \code{is_latest_annual_release} is based on the system date. +#' @details The list item \code{...$catalogue_of_life$is_latest_annual_release} is based on the system date. #' #' For DSMZ, see \code{?microorganisms}. -#' @return a \code{list}, invisibly +#' @return a \code{list}, which prints in pretty format #' @inheritSection catalogue_of_life Catalogue of Life #' @inheritSection AMR Read more on our website! #' @importFrom crayon bold underline @@ -99,8 +99,8 @@ catalogue_of_life_version <- function() { lst <- list(catalogue_of_life = list(version = gsub("{year}", catalogue_of_life$year, catalogue_of_life$version, fixed = TRUE), url = gsub("{year}", catalogue_of_life$year, catalogue_of_life$url_CoL, fixed = TRUE), - # annual release always somewhere in March, so before April is TRUE, FALSE otherwise - is_latest_annual_release = Sys.Date() < as.Date(paste0(catalogue_of_life$year + 1, "-04-01")), + # annual release always somewhere in May, so before June is TRUE, FALSE otherwise + is_latest_annual_release = Sys.Date() < as.Date(paste0(catalogue_of_life$year + 1, "-06-01")), n = nrow(filter(AMR::microorganisms, source == "CoL"))), deutsche_sammlung_von_mikroorganismen_und_zellkulturen = list(version = "Prokaryotic Nomenclature Up-to-Date from DSMZ", @@ -112,7 +112,16 @@ catalogue_of_life_version <- function() { n_total_species = nrow(AMR::microorganisms), n_total_synonyms = nrow(AMR::microorganisms.old))) - cat(paste0(bold("Included in this package are:\n\n"), + structure(.Data = lst, + class = c("catalogue_of_life_version", "list")) +} + +#' @exportMethod print.catalogue_of_life_version +#' @export +#' @noRd +print.catalogue_of_life_version <- function(x, ...) { + lst <- x + cat(paste0(bold("Included in this AMR package are:\n\n"), underline(lst$catalogue_of_life$version), "\n", " Available at: ", lst$catalogue_of_life$url, "\n", " Number of included species: ", format(lst$catalogue_of_life$n, big.mark = ","), "\n", @@ -121,9 +130,7 @@ catalogue_of_life_version <- function() { lst$deutsche_sammlung_von_mikroorganismen_und_zellkulturen$yearmonth, ")")), "\n", " Available at: ", lst$deutsche_sammlung_von_mikroorganismen_und_zellkulturen$url, "\n", " Number of included species: ", format(lst$deutsche_sammlung_von_mikroorganismen_und_zellkulturen$n, big.mark = ","), "\n\n", - "Total number of species included: ", format(lst$total_included$n_total_species, big.mark = ","), "\n", - "Total number of synonyms included: ", format(lst$total_included$n_total_synonyms, big.mark = ","), "\n\n", + "=> Total number of species included: ", format(lst$total_included$n_total_species, big.mark = ","), "\n", + "=> Total number of synonyms included: ", format(lst$total_included$n_total_synonyms, big.mark = ","), "\n\n", "See for more info ?microorganisms and ?catalogue_of_life.\n")) - - return(base::invisible(lst)) } diff --git a/R/count.R b/R/count.R index 13791582..b1036cdc 100755 --- a/R/count.R +++ b/R/count.R @@ -69,7 +69,7 @@ #' S = count_S(cipr), #' n1 = count_all(cipr), # the actual total; sum of all three #' n2 = n_rsi(cipr), # same - analogous to n_distinct -#' total = n()) # NOT the amount of tested isolates! +#' total = n()) # NOT the number of tested isolates! #' #' # Count co-resistance between amoxicillin/clav acid and gentamicin, #' # so we can see that combination therapy does a lot more than mono therapy. diff --git a/R/filter_ab_class.R b/R/filter_ab_class.R index 9884ae42..5d4c71a9 100644 --- a/R/filter_ab_class.R +++ b/R/filter_ab_class.R @@ -19,11 +19,11 @@ # Visit our website for more info: https://msberends.gitab.io/AMR. # # ==================================================================== # -#' Filter on antibiotic class +#' Filter isolates on result in antibiotic class #' -#' Filter on specific antibiotic variables based on their class (ATC groups). +#' Filter isolates on results in specific antibiotic variables based on their class (ATC groups). This makes it easy to get a list of isolates that were tested for e.g. any aminoglycoside. #' @param tbl a data set -#' @param ab_class an antimicrobial class, like \code{"carbapenems"} +#' @param ab_class an antimicrobial class, like \code{"carbapenems"}. More specifically, this should be a text that can be found in a 4th level ATC group (chemical subgroup) or a 5th level ATC group (chemical substance), please see \href{https://www.whocc.no/atc/structure_and_principles/}{this explanation on the WHOCC website}. #' @param result an antibiotic result: S, I or R (or a combination of more of them) #' @param scope the scope to check which variables to check, can be \code{"any"} (default) or \code{"all"} #' @param ... parameters passed on to \code{\link[dplyr]{filter_at}} @@ -54,8 +54,14 @@ #' # filter on isolates that show resistance to #' # any aminoglycoside and any fluoroquinolone #' septic_patients %>% -#' filter_aminoglycosides("R", "any") %>% -#' filter_fluoroquinolones("R", "any") +#' filter_aminoglycosides("R") %>% +#' filter_fluoroquinolones("R") +#' +#' # filter on isolates that show resistance to +#' # all aminoglycosides and all fluoroquinolones +#' septic_patients %>% +#' filter_aminoglycosides("R", "all") %>% +#' filter_fluoroquinolones("R", "all") filter_ab_class <- function(tbl, ab_class, result = NULL, @@ -65,6 +71,8 @@ filter_ab_class <- function(tbl, if (is.null(result)) { result <- c("S", "I", "R") } + # make result = "IR" work too: + result <- unlist(strsplit(result, "")) if (!all(result %in% c("S", "I", "R"))) { stop("`result` must be one or more of: S, I, R", call. = FALSE) @@ -88,12 +96,20 @@ filter_ab_class <- function(tbl, } else { scope_txt <- " and " scope_fn <- all_vars + if (length(vars_df) > 1) { + operator <- gsub("is", "are", operator) + } } - message(blue(paste0("Filtering on ", atc_groups, ": ", scope, " of ", + if (length(vars_df) > 1) { + scope <- paste(scope, "of ") + } else { + scope <- "" + } + message(blue(paste0("Filtering on ", atc_groups, ": ", scope, paste(bold(paste0("`", vars_df, "`")), collapse = scope_txt), operator, toString(result)))) tbl %>% - filter_at(.vars = vars(vars_df), - .vars_predicate = scope_fn(. %in% result), + filter_at(vars(vars_df), + scope_fn(. %in% result), ...) } else { warning(paste0("no antibiotics of class ", atc_groups, " found, leaving data unchanged"), call. = FALSE) @@ -244,7 +260,7 @@ filter_tetracyclines <- function(tbl, ...) } -#' @importFrom dplyr %>% filter_at any_vars select +#' @importFrom dplyr %>% filter_at vars any_vars select ab_class_vars <- function(ab_class) { ab_vars <- AMR::antibiotics %>% filter_at(vars(c("atc_group1", "atc_group2")), any_vars(. %like% ab_class)) %>% @@ -260,10 +276,23 @@ ab_class_vars <- function(ab_class) { #' @importFrom dplyr %>% filter pull ab_class_atcgroups <- function(ab_class) { - AMR::antibiotics %>% - filter(atc %in% ab_class_vars(ab_class)) %>% - pull("atc_group2") %>% - unique() %>% - tolower() %>% - paste(collapse = "/") + ifelse(ab_class %in% c("aminoglycoside", + "carbapenem", + "cephalosporin", + "first-generation cephalosporin", + "second-generation cephalosporin", + "third-generation cephalosporin", + "fourth-generation cephalosporin", + "fluoroquinolone", + "glycopeptide", + "macrolide", + "tetracycline"), + paste0(ab_class, "s"), + AMR::antibiotics %>% + filter(atc %in% ab_class_vars(ab_class)) %>% + pull("atc_group2") %>% + unique() %>% + tolower() %>% + paste(collapse = "/") + ) } diff --git a/R/freq.R b/R/freq.R index d498932b..cdb116eb 100755 --- a/R/freq.R +++ b/R/freq.R @@ -417,9 +417,9 @@ frequency_tbl <- function(x, header_list$outliers_unique <- n_distinct(boxplot.stats(x)$out) } - if (NROW(x) > 0 & any(class(x) == "rsi")) { - header_list$count_S <- sum(x == "S", na.rm = TRUE) - header_list$count_IR <- sum(x %in% c("I", "R"), na.rm = TRUE) + if (any(class(x) == "rsi")) { + header_list$count_S <- max(0, sum(x == "S", na.rm = TRUE), na.rm = TRUE) + header_list$count_IR <- max(0, sum(x %in% c("I", "R"), na.rm = TRUE), na.rm = TRUE) } formatdates <- "%e %B %Y" # = d mmmm yyyy @@ -564,18 +564,14 @@ format_header <- function(x, markdown = FALSE, decimal.mark = ".", big.mark = ", # FORMATTING # rsi if (has_length == TRUE & any(x_class == "rsi")) { - if (header$count_S < header$count_IR) { - ratio <- paste0(green(1), ":", red(format(header$count_IR / header$count_S, - digits = 1, nsmall = 1, decimal.mark = decimal.mark, big.mark = big.mark))) - } else { - ratio <- paste0(green(format(header$count_S / header$count_IR, - digits = 1, nsmall = 1, decimal.mark = decimal.mark, big.mark = big.mark)), - ":", red(1)) + ab <- tryCatch(atc_name(attributes(x)$opt$vars), error = function(e) NA) + if (!is.na(ab)) { + header$drug <- ab[1L] } - header$`%IR` <- paste((header$count_IR / header$length) %>% percent(force_zero = TRUE, round = digits, decimal.mark = decimal.mark), - paste0("(ratio ", ratio, ")")) - header <- header[!names(header) %in% c("count_S", "count_IR")] + header$`%IR` <- percent(header$count_IR / (header$count_S + header$count_IR), + force_zero = TRUE, round = digits, decimal.mark = decimal.mark) } + header <- header[!names(header) %in% c("count_S", "count_IR")] # dates if (!is.null(header$date_format)) { if (header$date_format == "%H:%M:%S") { diff --git a/R/ggplot_rsi.R b/R/ggplot_rsi.R index bb0961b2..9a2d8730 100755 --- a/R/ggplot_rsi.R +++ b/R/ggplot_rsi.R @@ -164,9 +164,7 @@ ggplot_rsi <- function(data, datalabels.colour = "grey15", ...) { - if (!"ggplot2" %in% rownames(installed.packages())) { - stop('this function requires the ggplot2 package.', call. = FALSE) - } + stopifnot_installed_package("ggplot2") fun_name <- deparse(substitute(fun)) if (!fun_name %in% c("portion_df", "count_df")) { @@ -235,6 +233,8 @@ geom_rsi <- function(position = NULL, fun = count_df, ...) { + stopifnot_installed_package("ggplot2") + fun_name <- deparse(substitute(fun)) if (!fun_name %in% c("portion_df", "count_df", "fun")) { stop("`fun` must be portion_df or count_df") @@ -279,6 +279,8 @@ geom_rsi <- function(position = NULL, #' @export facet_rsi <- function(facet = c("Interpretation", "Antibiotic"), nrow = NULL) { + stopifnot_installed_package("ggplot2") + facet <- facet[1] # we work with aes_string later on @@ -302,6 +304,8 @@ facet_rsi <- function(facet = c("Interpretation", "Antibiotic"), nrow = NULL) { #' @rdname ggplot_rsi #' @export scale_y_percent <- function(breaks = seq(0, 1, 0.1), limits = NULL) { + stopifnot_installed_package("ggplot2") + if (all(breaks[breaks != 0] > 1)) { breaks <- breaks / 100 } @@ -313,6 +317,7 @@ scale_y_percent <- function(breaks = seq(0, 1, 0.1), limits = NULL) { #' @rdname ggplot_rsi #' @export scale_rsi_colours <- function() { + stopifnot_installed_package("ggplot2") #ggplot2::scale_fill_brewer(palette = "RdYlGn") ggplot2::scale_fill_manual(values = c("#b22222", "#ae9c20", "#7cfc00")) } @@ -320,6 +325,7 @@ scale_rsi_colours <- function() { #' @rdname ggplot_rsi #' @export theme_rsi <- function() { + stopifnot_installed_package("ggplot2") ggplot2::theme_minimal() + ggplot2::theme(panel.grid.major.x = ggplot2::element_blank(), panel.grid.minor = ggplot2::element_blank(), @@ -332,6 +338,7 @@ labels_rsi_count <- function(position = NULL, x = "Antibiotic", datalabels.size = 3, datalabels.colour = "grey15") { + stopifnot_installed_package("ggplot2") if (is.null(position)) { position <- "fill" } @@ -357,3 +364,4 @@ getlbls <- function(data) { " (n=", Value, ")")) %>% mutate(lbl = ifelse(lbl == "0.0% (n=0)", "", lbl)) } + diff --git a/R/globals.R b/R/globals.R index 7c480b9b..51acb22b 100755 --- a/R/globals.R +++ b/R/globals.R @@ -92,7 +92,7 @@ globalVariables(c(".", "Sex", "shortname", "species", - "superprevalent", + "species_id", "trade_name", "transmute", "tsn", diff --git a/R/misc.R b/R/misc.R index b153c951..2217b850 100755 --- a/R/misc.R +++ b/R/misc.R @@ -194,3 +194,9 @@ search_type_in_df <- function(tbl, type) { } found } + +stopifnot_installed_package <- function(package) { + if (!package %in% base::rownames(utils::installed.packages())) { + stop("this function requires the ", package, " package.", call. = FALSE) + } +} diff --git a/R/mo.R b/R/mo.R index fbadb762..b65baff9 100755 --- a/R/mo.R +++ b/R/mo.R @@ -23,10 +23,10 @@ #' #' Use this function to determine a valid microorganism ID (\code{mo}). Determination is done using intelligent rules and the complete taxonomic kingdoms Bacteria, Chromista, Protozoa, Archaea and most microbial species from the kingdom Fungi (see Source). The input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. Please see Examples. #' @param x a character vector or a \code{data.frame} with one or two columns -#' @param Becker a logical to indicate whether \emph{Staphylococci} should be categorised into Coagulase Negative \emph{Staphylococci} ("CoNS") and Coagulase Positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} [1]. Note that this does not include species that were newly named after this publication. +#' @param Becker a logical to indicate whether \emph{Staphylococci} should be categorised into coagulase-negative \emph{Staphylococci} ("CoNS") and coagulase-positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} [1,2]. Note that this does not include species that were newly named after these publications, like \emph{S. caeli}. #' #' This excludes \emph{Staphylococcus aureus} at default, use \code{Becker = "all"} to also categorise \emph{S. aureus} as "CoPS". -#' @param Lancefield a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [2]. These \emph{Streptococci} will be categorised in their first group, e.g. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L. +#' @param Lancefield a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [3]. These \emph{Streptococci} will be categorised in their first group, e.g. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L. #' #' This excludes \emph{Enterococci} at default (who are in group D), use \code{Lancefield = "all"} to also categorise all \emph{Enterococci} as group D. #' @param allow_uncertain a logical (\code{TRUE} or \code{FALSE}) or a value between 0 and 3 to indicate whether the input should be checked for less possible results, see Details @@ -49,18 +49,19 @@ #' | | | ----> subspecies, a 3-4 letter acronym #' | | ----> species, a 3-4 letter acronym #' | ----> genus, a 5-7 letter acronym, mostly without vowels -#' ----> taxonomic kingdom: A (Archaea), AN (Animalia), B (Bacteria), C (Chromista), -#' F (Fungi), P (Protozoa) or PL (Plantae) +#' ----> taxonomic kingdom: A (Archaea), AN (Animalia), B (Bacteria), +#' C (Chromista), F (Fungi), P (Protozoa) or +#' PL (Plantae) #' } #' -#' Values that cannot be coered will be considered 'unknown' and have an MO code \code{UNKNOWN}. +#' Values that cannot be coered will be considered 'unknown' and will get the MO code \code{UNKNOWN}. #' #' Use the \code{\link{mo_property}_*} functions to get properties based on the returned code, see Examples. #' #' The algorithm uses data from the Catalogue of Life (see below) and from one other source (see \code{?microorganisms}). #' #' \strong{Self-learning algoritm} \cr -#' The \code{as.mo()} function gains experience from previously determined microbial IDs and learns from it. This drastically improves both speed and reliability. Use \code{clean_mo_history()} to reset the algorithms. Only experience from your current \code{AMR} package version is used. This is done because in the future the taxonomic tree (which is included in this package) may change for any organism and it consequently has to rebuild its knowledge. Usually, any guess after the first try runs 90-95\% faster than the first try. The algorithm saves its previous findings to \code{~/.Rhistory_mo}. +#' The \code{as.mo()} function gains experience from previously determined microbial IDs and learns from it. This drastically improves both speed and reliability. Use \code{clean_mo_history()} to reset the algorithms. Only experience from your current \code{AMR} package version is used. This is done because in the future the taxonomic tree (which is included in this package) may change for any organism and it consequently has to rebuild its knowledge. Usually, any guess after the first try runs 80-95\% faster than the first try. The algorithm saves its previous findings to \code{~/.Rhistory_mo}. #' #' \strong{Intelligent rules} \cr #' This function uses intelligent rules to help getting fast and logical results. It tries to find matches in this order: @@ -80,7 +81,7 @@ #' This means that looking up human pathogenic microorganisms takes less time than looking up human non-pathogenic microorganisms. #' #' \strong{Uncertain results} \cr -#' The algorithm can additionally use three different levels of uncertainty to guess valid results. The default is \code{allow_uncertain = TRUE}, which is uqual to uncertainty level 2. Using \code{allow_uncertain = FALSE} will skip all of these additional rules: +#' The algorithm can additionally use three different levels of uncertainty to guess valid results. The default is \code{allow_uncertain = TRUE}, which is equal to uncertainty level 2. Using \code{allow_uncertain = FALSE} will skip all of these additional rules: #' \itemize{ #' \item{(uncertainty level 1): It tries to look for only matching genera} #' \item{(uncertainty level 1): It tries to look for previously accepted (but now invalid) taxonomic names} @@ -121,11 +122,13 @@ #' @section Source: #' [1] Becker K \emph{et al.} \strong{Coagulase-Negative Staphylococci}. 2014. Clin Microbiol Rev. 27(4): 870–926. \url{https://dx.doi.org/10.1128/CMR.00109-13} #' -#' [2] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571–95. \url{https://dx.doi.org/10.1084/jem.57.4.571} +#' [2] Becker K \emph{et al.} \strong{Implications of identifying the recently defined members of the S. aureus complex, S. argenteus and S. schweitzeri: A position paper of members of the ESCMID Study Group for staphylococci and Staphylococcal Diseases (ESGS).}. 2019. Clin Microbiol Infect. 2019 Mar 11. \url{https://doi.org/10.1016/j.cmi.2019.02.028} #' -#' [3] Catalogue of Life: Annual Checklist (public online taxonomic database), \url{www.catalogueoflife.org} (check included annual version with \code{\link{catalogue_of_life_version}()}). +#' [3] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571–95. \url{https://dx.doi.org/10.1084/jem.57.4.571} +#' +#' [4] Catalogue of Life: Annual Checklist (public online taxonomic database), \url{www.catalogueoflife.org} (check included annual version with \code{\link{catalogue_of_life_version}()}). #' @export -#' @return Character (vector) with class \code{"mo"}. Unknown values will return \code{NA}. +#' @return Character (vector) with class \code{"mo"} #' @seealso \code{\link{microorganisms}} for the \code{data.frame} that is being used to determine ID's. \cr #' The \code{\link{mo_property}} functions (like \code{\link{mo_genus}}, \code{\link{mo_gramstain}}) to get properties based on the returned code. #' @inheritSection AMR Read more on our website! @@ -188,7 +191,8 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE, # check onLoad() in R/zzz.R: data tables are created there. } - mo_hist <- get_mo_history(x, force = isTRUE(list(...)$force_mo_history)) + uncertainty_level <- translate_allow_uncertain(allow_uncertain) + mo_hist <- get_mo_history(x, uncertainty_level, force = isTRUE(list(...)$force_mo_history)) if (mo_source_isvalid(reference_df) & isFALSE(Becker) @@ -247,13 +251,13 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE, "mo"][[1]] } # save them to history - set_mo_history(x, y, force = isTRUE(list(...)$force_mo_history)) + set_mo_history(x, y, 0, force = isTRUE(list(...)$force_mo_history)) } else { # will be checked for mo class in validation and uses exec_as.mo internally if necessary y <- mo_validate(x = x, property = "mo", Becker = Becker, Lancefield = Lancefield, - allow_uncertain = allow_uncertain, reference_df = reference_df, + allow_uncertain = uncertainty_level, reference_df = reference_df, force_mo_history = isTRUE(list(...)$force_mo_history)) } @@ -320,15 +324,8 @@ exec_as.mo <- function(x, fullname = character(0), mo = character(0)) failures <- character(0) - if (isTRUE(allow_uncertain)) { - # default to uncertainty level 2 - allow_uncertain <- 2 - } else { - allow_uncertain <- as.integer(allow_uncertain) - if (!allow_uncertain %in% c(0:3)) { - stop("`allow_uncertain` must be a number between 0 (none) and 3 (all), or TRUE (= 2) or FALSE (= 0).", call. = FALSE) - } - } + uncertainty_level <- translate_allow_uncertain(allow_uncertain) + x_input <- x # already strip leading and trailing spaces x <- trimws(x, which = "both") @@ -341,7 +338,6 @@ exec_as.mo <- function(x, & !identical(x, "") & !identical(x, "xxx") & !identical(x, "con")] - x_input_backup <- x # conversion of old MO codes from v0.5.0 (ITIS) to later versions (Catalogue of Life) if (any(x %like% "^[BFP]_[A-Z]{3,7}") & !all(x %in% microorganisms$mo)) { @@ -406,9 +402,13 @@ exec_as.mo <- function(x, } x <- y - } else if (all(x %in% read_mo_history(force = force_mo_history)$x)) { + } else if (all(x %in% read_mo_history(uncertainty_level, + force = force_mo_history)$x)) { # previously found code - x <- microorganismsDT[data.table(mo = get_mo_history(x, force = force_mo_history)), on = "mo", ..property][[1]] + x <- microorganismsDT[data.table(mo = get_mo_history(x, + uncertainty_level, + force = force_mo_history)), + on = "mo", ..property][[1]] } else if (all(tolower(x) %in% microorganismsDT$fullname_lower)) { # we need special treatment for very prevalent full names, they are likely! @@ -430,7 +430,7 @@ exec_as.mo <- function(x, # commonly used MO codes y <- as.data.table(AMR::microorganisms.codes)[data.table(code = toupper(x)), on = "code", ] # save them to history - set_mo_history(x, y$mo, force = force_mo_history) + set_mo_history(x, y$mo, 0, force = force_mo_history) x <- microorganismsDT[data.table(mo = y[["mo"]]), on = "mo", ..property][[1]] @@ -502,11 +502,16 @@ exec_as.mo <- function(x, progress$tick()$print() - found <- microorganismsDT[mo == get_mo_history(x_backup[i], force = force_mo_history), ..property][[1]] - # previously found result - if (length(found) > 0) { - x[i] <- found[1L] - next + if (initial_search == TRUE) { + found <- microorganismsDT[mo == get_mo_history(x_backup[i], + uncertainty_level, + force = force_mo_history), + ..property][[1]] + # previously found result + if (length(found) > 0) { + x[i] <- found[1L] + next + } } found <- microorganismsDT[mo == toupper(x_backup[i]), ..property][[1]] @@ -521,7 +526,7 @@ exec_as.mo <- function(x, if (length(found) > 0) { x[i] <- found[1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -535,7 +540,7 @@ exec_as.mo <- function(x, # empty and nonsense values, ignore without warning x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -552,7 +557,7 @@ exec_as.mo <- function(x, if (length(found) > 0) { x[i] <- found[1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -562,7 +567,7 @@ exec_as.mo <- function(x, x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]] if (initial_search == TRUE) { failures <- c(failures, x_backup[i]) - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -572,7 +577,7 @@ exec_as.mo <- function(x, x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]] if (initial_search == TRUE) { failures <- c(failures, x_backup[i]) - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -582,14 +587,14 @@ exec_as.mo <- function(x, if (toupper(x_backup_without_spp[i]) %in% c('MRSA', 'MSSA', 'VISA', 'VRSA')) { x[i] <- microorganismsDT[mo == 'B_STPHY_AUR', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } if (toupper(x_backup_without_spp[i]) %in% c('MRSE', 'MSSE')) { x[i] <- microorganismsDT[mo == 'B_STPHY_EPI', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -597,14 +602,14 @@ exec_as.mo <- function(x, | x_backup_without_spp[i] %like% '(enterococci|enterokok|enterococo)[a-z]*?$') { x[i] <- microorganismsDT[mo == 'B_ENTRC', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } if (toupper(x_backup_without_spp[i]) %in% c("EHEC", "EPEC", "EIEC", "STEC", "ATEC")) { x[i] <- microorganismsDT[mo == 'B_ESCHR_COL', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -612,7 +617,7 @@ exec_as.mo <- function(x, # multi resistant P. aeruginosa x[i] <- microorganismsDT[mo == 'B_PSDMN_AER', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -621,7 +626,7 @@ exec_as.mo <- function(x, # co-trim resistant S. maltophilia x[i] <- microorganismsDT[mo == 'B_STNTR_MAL', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -629,7 +634,7 @@ exec_as.mo <- function(x, # peni I, peni R, vanco I, vanco R: S. pneumoniae x[i] <- microorganismsDT[mo == 'B_STRPT_PNE', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -637,7 +642,7 @@ exec_as.mo <- function(x, # Streptococci, like GBS = Group B Streptococci (B_STRPT_GRB) x[i] <- microorganismsDT[mo == gsub("G([ABCDFGHK])S", "B_STRPT_GR\\1", x_backup_without_spp[i], ignore.case = TRUE), ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -645,7 +650,7 @@ exec_as.mo <- function(x, # Streptococci in different languages, like "estreptococos grupo B" x[i] <- microorganismsDT[mo == gsub(".*(streptococ|streptokok|estreptococ).* ([ABCDFGHK])$", "B_STRPT_GR\\2", x_backup_without_spp[i], ignore.case = TRUE), ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -653,7 +658,7 @@ exec_as.mo <- function(x, # Streptococci in different languages, like "Group A Streptococci" x[i] <- microorganismsDT[mo == gsub(".*group ([ABCDFGHK]) (streptococ|streptokok|estreptococ).*", "B_STRPT_GR\\1", x_backup_without_spp[i], ignore.case = TRUE), ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -664,7 +669,7 @@ exec_as.mo <- function(x, # coerce S. coagulase negative x[i] <- microorganismsDT[mo == 'B_STPHY_CNS', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -674,7 +679,7 @@ exec_as.mo <- function(x, # coerce S. coagulase positive x[i] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -684,7 +689,7 @@ exec_as.mo <- function(x, # coerce Gram negatives x[i] <- microorganismsDT[mo == 'B_GRAMN', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -694,7 +699,7 @@ exec_as.mo <- function(x, # coerce Gram positives x[i] <- microorganismsDT[mo == 'B_GRAMP', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -703,7 +708,7 @@ exec_as.mo <- function(x, # Salmonella Group A to Z, just return S. species for now x[i] <- microorganismsDT[mo == 'B_SLMNL', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } options(mo_renamed = c(getOption("mo_renamed"), magenta(paste0("Note: ", @@ -715,7 +720,7 @@ exec_as.mo <- function(x, # Salmonella with capital letter species like "Salmonella Goettingen" - they're all S. enterica x[i] <- microorganismsDT[mo == 'B_SLMNL_ENT', ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } options(mo_renamed = c(getOption("mo_renamed"), magenta(paste0("Note: ", @@ -735,7 +740,7 @@ exec_as.mo <- function(x, if (length(found) > 0) { x[i] <- found[1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -744,7 +749,7 @@ exec_as.mo <- function(x, if (length(found) > 0) { x[i] <- found[1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -759,7 +764,7 @@ exec_as.mo <- function(x, if (length(mo_found) > 0) { x[i] <- microorganismsDT[mo == mo_found, ..property][[1]][1L] if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -782,7 +787,7 @@ exec_as.mo <- function(x, x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]] if (initial_search == TRUE) { failures <- c(failures, x_backup[i]) - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -869,7 +874,7 @@ exec_as.mo <- function(x, g.x_backup_without_spp = x_backup_without_spp[i]) if (!empty_result(x[i])) { if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -884,7 +889,7 @@ exec_as.mo <- function(x, g.x_backup_without_spp = x_backup_without_spp[i]) if (!empty_result(x[i])) { if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -899,7 +904,7 @@ exec_as.mo <- function(x, g.x_backup_without_spp = x_backup_without_spp[i]) if (!empty_result(x[i])) { if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -925,7 +930,7 @@ exec_as.mo <- function(x, ref_new = microorganismsDT[col_id == found[1, col_id_new], ref], mo = microorganismsDT[col_id == found[1, col_id_new], mo]) if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } next } @@ -938,12 +943,12 @@ exec_as.mo <- function(x, f.x_withspaces_end_only, g.x_backup_without_spp) { - if (allow_uncertain == 0) { + if (uncertainty_level == 0) { # do not allow uncertainties return(NA_character_) } - if (allow_uncertain >= 1) { + if (uncertainty_level >= 1) { # (1) look again for old taxonomic names, now for G. species ---- found <- microorganisms.oldDT[fullname %like% c.x_withspaces_start_end | fullname %like% d.x_withspaces_start_only] @@ -966,11 +971,14 @@ exec_as.mo <- function(x, input = a.x_backup, fullname = found[1, fullname], mo = paste("CoL", found[1, col_id]))) + if (initial_search == TRUE) { + set_mo_history(a.x_backup, get_mo_code(x, property), 1, force = force_mo_history) + } return(x) } } - if (allow_uncertain >= 2) { + if (uncertainty_level >= 2) { # (3) look for genus only, part of name ---- if (nchar(g.x_backup_without_spp) > 4 & !b.x_trimmed %like% " ") { if (!grepl("^[A-Z][a-z]+", b.x_trimmed, ignore.case = FALSE)) { @@ -983,6 +991,9 @@ exec_as.mo <- function(x, input = a.x_backup, fullname = microorganismsDT[mo == found[1L], fullname][[1]], mo = found[1L])) + if (initial_search == TRUE) { + set_mo_history(a.x_backup, get_mo_code(x, property), 2, force = force_mo_history) + } return(x) } } @@ -1000,6 +1011,9 @@ exec_as.mo <- function(x, input = a.x_backup, fullname = microorganismsDT[mo == found_result[1L], fullname][[1]], mo = found_result[1L])) + if (initial_search == TRUE) { + set_mo_history(a.x_backup, get_mo_code(found[1L], property), 2, force = force_mo_history) + } return(found[1L]) } @@ -1018,6 +1032,33 @@ exec_as.mo <- function(x, input = a.x_backup, fullname = microorganismsDT[mo == found_result[1L], fullname][[1]], mo = found_result[1L])) + if (initial_search == TRUE) { + set_mo_history(a.x_backup, get_mo_code(found[1L], property), 2, force = force_mo_history) + } + return(found[1L]) + } + } + } + } + # (6) try to strip off one element from start and check the remains (only allow 2-part name outcome) ---- + x_strip <- a.x_backup %>% strsplit(" ") %>% unlist() + if (length(x_strip) > 1 & nchar(g.x_backup_without_spp) >= 6) { + for (i in 2:(length(x_strip))) { + x_strip_collapsed <- paste(x_strip[i:length(x_strip)], collapse = " ") + found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, allow_uncertain = FALSE))) + if (!empty_result(found)) { + found_result <- found + found <- microorganismsDT[mo == found_result[1L], ..property][[1]] + # uncertainty level 2 only if the fullname contains a space (otherwise it will be found with lvl 3) + if (microorganismsDT[mo == found_result[1L], fullname][[1]] %like% " ") { + uncertainties <<- rbind(uncertainties, + data.frame(uncertainty = 2, + input = a.x_backup, + fullname = microorganismsDT[mo == found_result[1L], fullname][[1]], + mo = found_result[1L])) + if (initial_search == TRUE) { + set_mo_history(a.x_backup, get_mo_code(found[1L], property), 2, force = force_mo_history) + } return(found[1L]) } } @@ -1025,8 +1066,8 @@ exec_as.mo <- function(x, } } - if (allow_uncertain >= 3) { - # (6) try to strip off one element from start and check the remains ---- + if (uncertainty_level >= 3) { + # (7) try to strip off one element from start and check the remains ---- x_strip <- a.x_backup %>% strsplit(" ") %>% unlist() if (length(x_strip) > 1 & nchar(g.x_backup_without_spp) >= 6) { for (i in 2:(length(x_strip))) { @@ -1040,12 +1081,15 @@ exec_as.mo <- function(x, input = a.x_backup, fullname = microorganismsDT[mo == found_result[1L], fullname][[1]], mo = found_result[1L])) + if (initial_search == TRUE) { + set_mo_history(a.x_backup, get_mo_code(found[1L], property), 3, force = force_mo_history) + } return(found[1L]) } } } - # (7) part of a name (very unlikely match) ---- + # (8) part of a name (very unlikely match) ---- found <- microorganismsDT[fullname %like% f.x_withspaces_end_only] if (nrow(found) > 0) { found_result <- found[["mo"]] @@ -1056,6 +1100,9 @@ exec_as.mo <- function(x, input = a.x_backup, fullname = microorganismsDT[mo == found_result[1L], fullname][[1]], mo = found_result[1L])) + if (initial_search == TRUE) { + set_mo_history(a.x_backup, get_mo_code(found[1L], property), 3, force = force_mo_history) + } return(found[1L]) } } @@ -1071,7 +1118,7 @@ exec_as.mo <- function(x, x_withspaces_end_only[i], x_backup_without_spp[i]) if (!empty_result(x[i])) { - # no set_mo_history here; these are uncertain + # no set_mo_history: is already set in uncertain_fn() next } @@ -1079,7 +1126,7 @@ exec_as.mo <- function(x, x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]] if (initial_search == TRUE) { failures <- c(failures, x_backup[i]) - set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history) + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history) } } } @@ -1127,8 +1174,8 @@ exec_as.mo <- function(x, MOs_staph <- microorganismsDT[genus == "Staphylococcus"] setkey(MOs_staph, species) CoNS <- MOs_staph[species %in% c("arlettae", "auricularis", "capitis", - "caprae", "carnosus", "cohnii", "condimenti", - "devriesei", "epidermidis", "equorum", + "caprae", "carnosus", "chromogenes", "cohnii", "condimenti", + "devriesei", "epidermidis", "equorum", "felis", "fleurettii", "gallinarum", "haemolyticus", "hominis", "jettensis", "kloosii", "lentus", "lugdunensis", "massiliensis", "microti", @@ -1136,16 +1183,31 @@ exec_as.mo <- function(x, "pettenkoferi", "piscifermentans", "rostri", "saccharolyticus", "saprophyticus", "sciuri", "stepanovicii", "simulans", "succinus", - "vitulinus", "warneri", "xylosus"), ..property][[1]] - CoPS <- MOs_staph[species %in% c("simiae", "agnetis", "chromogenes", - "delphini", "felis", "lutrae", + "vitulinus", "warneri", "xylosus") + | (species == "schleiferi" & subspecies %in% c("schleiferi", "")), ..property][[1]] + CoPS <- MOs_staph[species %in% c("simiae", "agnetis", + "delphini", "lutrae", "hyicus", "intermedius", "pseudintermedius", "pseudointermedius", - "schleiferi"), ..property][[1]] + "schweitzeri", "argenteus") + | (species == "schleiferi" & subspecies == "coagulans"), ..property][[1]] + + # warn when species found that are not in Becker (2014, PMID 25278577) and Becker (2019, PMID 30872103) + post_Becker <- c("argensis", "caeli", "cornubiensis", "edaphicus") + if (any(x %in% MOs_staph[species %in% post_Becker, ..property][[1]])) { + + warning("Becker ", italic("et al."), " (2014) does not contain species named after their publication: ", + italic(paste("S.", + sort(mo_species(unique(x[x %in% MOs_staph[species %in% post_Becker, ..property][[1]]]))), + collapse = ", ")), + call. = FALSE, + immediate. = TRUE) + } + x[x %in% CoNS] <- microorganismsDT[mo == 'B_STPHY_CNS', ..property][[1]][1L] x[x %in% CoPS] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L] if (Becker == "all") { - x[x == microorganismsDT[mo == 'B_STPHY_AUR', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L] + x[x %in% microorganismsDT[mo %like% '^B_STPHY_AUR', ..property][[1]]] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L] } } @@ -1305,7 +1367,7 @@ mo_uncertainties <- function() { } #' @exportMethod print.mo_uncertainties -#' @importFrom crayon green yellow red bgGreen bgYellow bgRed +#' @importFrom crayon green yellow red white bgGreen bgYellow bgRed #' @export #' @noRd print.mo_uncertainties <- function(x, ...) { @@ -1321,16 +1383,16 @@ print.mo_uncertainties <- function(x, ...) { for (i in 1:nrow(x)) { if (x[i, "uncertainty"] == 1) { colour1 <- green - colour2 <- bgGreen + colour2 <- function(...) bgGreen(white(...)) } else if (x[i, "uncertainty"] == 2) { colour1 <- yellow colour2 <- bgYellow } else { colour1 <- red - colour2 <- bgRed + colour2 <- function(...) bgRed(white(...)) } msg <- paste(msg, - paste0("[", colour2(paste0(" ", x[i, "uncertainty"], " ")), '] - "', x[i, "input"], '" -> ', + paste0(colour2(paste0(" [", x[i, "uncertainty"], "] ")), ' "', x[i, "input"], '" -> ', colour1(paste0(italic(x[i, "fullname"]), " (", x[i, "mo"], ")"))), sep = "\n") } @@ -1373,3 +1435,16 @@ get_mo_code <- function(x, property) { AMR::microorganisms[base::which(AMR::microorganisms[, property] %in% x),]$mo } } + +translate_allow_uncertain <- function(allow_uncertain) { + if (isTRUE(allow_uncertain)) { + # default to uncertainty level 2 + allow_uncertain <- 2 + } else { + allow_uncertain <- as.integer(allow_uncertain) + if (!allow_uncertain %in% c(0:3)) { + stop("`allow_uncertain` must be a number between 0 (none) and 3 (all), or TRUE (= 2) or FALSE (= 0).", call. = FALSE) + } + } + allow_uncertain +} diff --git a/R/mo_history.R b/R/mo_history.R index e9c52bf4..9217eab9 100644 --- a/R/mo_history.R +++ b/R/mo_history.R @@ -21,10 +21,10 @@ # print successful as.mo coercions to file, not uncertain ones #' @importFrom dplyr %>% distinct filter -set_mo_history <- function(x, mo, force = FALSE) { +set_mo_history <- function(x, mo, uncertainty_level, force = FALSE) { file_location <- base::path.expand('~/.Rhistory_mo') if (base::interactive() | force == TRUE) { - mo_hist <- read_mo_history(force = force) + mo_hist <- read_mo_history(uncertainty_level = uncertainty_level, force = force) df <- data.frame(x, mo, stringsAsFactors = FALSE) %>% distinct(x, .keep_all = TRUE) %>% filter(!is.na(x) & !is.na(mo)) @@ -35,10 +35,12 @@ set_mo_history <- function(x, mo, force = FALSE) { mo <- df$mo for (i in 1:length(x)) { # save package version too, as both the as.mo() algorithm and the reference data set may change - if (NROW(mo_hist[base::which(mo_hist$x == x[i] & mo_hist$package_version == utils::packageVersion("AMR")),]) == 0) { - base::write(x = c(x[i], mo[i], base::as.character(utils::packageVersion("AMR"))), + if (NROW(mo_hist[base::which(mo_hist$x == x[i] & + mo_hist$uncertainty_level >= uncertainty_level & + mo_hist$package_version == utils::packageVersion("AMR")),]) == 0) { + base::write(x = c(x[i], mo[i], uncertainty_level, base::as.character(utils::packageVersion("AMR"))), file = file_location, - ncolumns = 3, + ncolumns = 4, append = TRUE, sep = "\t") } @@ -47,8 +49,8 @@ set_mo_history <- function(x, mo, force = FALSE) { return(base::invisible()) } -get_mo_history <- function(x, force = FALSE) { - file_read <- read_mo_history(force = force) +get_mo_history <- function(x, uncertainty_level, force = FALSE) { + file_read <- read_mo_history(uncertainty_level = uncertainty_level, force = force) if (base::is.null(file_read)) { NA } else { @@ -59,30 +61,57 @@ get_mo_history <- function(x, force = FALSE) { } #' @importFrom dplyr %>% filter distinct -read_mo_history <- function(force = FALSE) { +read_mo_history <- function(uncertainty_level = 2, force = FALSE, unfiltered = FALSE) { file_location <- base::path.expand('~/.Rhistory_mo') if (!base::file.exists(file_location) | (!base::interactive() & force == FALSE)) { return(NULL) } + uncertainty_level_param <- uncertainty_level file_read <- utils::read.table(file = file_location, header = FALSE, sep = "\t", - col.names = c("x", "mo", "package_version"), + col.names = c("x", "mo", "uncertainty_level", "package_version"), stringsAsFactors = FALSE) # Below: filter on current package version. # Even current fullnames may be replaced by new taxonomic names, so new versions of # the Catalogue of Life must not lead to data corruption. - file_read %>% - filter(package_version == utils::packageVersion("AMR")) %>% - distinct(x, mo, .keep_all = TRUE) -} -#' @rdname as.mo -#' @export -clean_mo_history <- function() { - file_location <- base::path.expand('~/.Rhistory_mo') - if (base::file.exists(file_location)) { - base::unlink(file_location) + if (unfiltered == FALSE) { + file_read <- file_read %>% + filter(package_version == utils::packageVersion("AMR"), + # only take unknowns if uncertainty_level_param is higher + ((mo == "UNKNOWN" & uncertainty_level_param == uncertainty_level) | + (mo != "UNKNOWN" & uncertainty_level_param >= uncertainty_level))) %>% + arrange(desc(uncertainty_level)) %>% + distinct(x, mo, .keep_all = TRUE) + } + + if (nrow(file_read) == 0) { + NULL + } else { + file_read + } +} + +#' @rdname as.mo +#' @importFrom crayon red +#' @importFrom utils menu +#' @export +clean_mo_history <- function(...) { + file_location <- base::path.expand('~/.Rhistory_mo') + if (file.exists(file_location)) { + if (interactive() & !isTRUE(list(...)$force)) { + q <- menu(title = paste("This will remove all", + format(nrow(read_mo_history(999, unfiltered = TRUE)), big.mark = ","), + "previously determined microbial IDs. Are you sure?"), + choices = c("Yes", "No"), + graphics = FALSE) + if (q != 1) { + return(invisible()) + } + } + unlink(file_location) + cat(red("File", file_location, "removed.")) } } diff --git a/R/mo_property.R b/R/mo_property.R index e0fb4a24..1d3faccb 100755 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -446,6 +446,8 @@ mo_translate <- function(x, language) { # Spanish language == "es" ~ x[x_tobetranslated] %>% + # not 'negativa' + # https://www.sciencedirect.com/science/article/pii/S0123939215000739 gsub("Coagulase-negative Staphylococcus","Staphylococcus coagulasa negativo", ., fixed = TRUE) %>% gsub("Coagulase-positive Staphylococcus","Staphylococcus coagulasa positivo", ., fixed = TRUE) %>% gsub("Beta-haemolytic Streptococcus", "Streptococcus Beta-hemol\u00edtico", ., fixed = TRUE) %>% @@ -461,6 +463,8 @@ mo_translate <- function(x, language) { gsub("unknown species", "especie desconocida", ., fixed = TRUE) %>% gsub("unknown subspecies", "subespecie desconocida", ., fixed = TRUE) %>% gsub("unknown rank", "rango desconocido", ., fixed = TRUE) %>% + gsub("(CoNS)", "(SCN)", ., fixed = TRUE) %>% + gsub("(CoPS)", "(SCP)", ., fixed = TRUE) %>% gsub("Gram negative", "Gram negativo", ., fixed = TRUE) %>% gsub("Gram positive", "Gram positivo", ., fixed = TRUE) %>% gsub("Bacteria", "Bacterias", ., fixed = TRUE) %>% @@ -579,7 +583,12 @@ mo_validate <- function(x, property, ...) { # check onLoad() in R/zzz.R: data tables are created there. } - if (!all(x %in% microorganisms[, property]) + # try to catch an error when inputting an invalid parameter + # so the call can be set to FALSE + tryCatch(x[1L] %in% AMR::microorganisms[1, property], + error = function(e) stop(e$message, call. = FALSE)) + + if (!all(x %in% AMR::microorganisms[, property]) | Becker %in% c(TRUE, "all") | Lancefield %in% c(TRUE, "all")) { exec_as.mo(x, property = property, ...) diff --git a/R/portion.R b/R/portion.R index 38593462..278c9723 100755 --- a/R/portion.R +++ b/R/portion.R @@ -21,11 +21,11 @@ #' Calculate resistance of isolates #' -#' @description These functions can be used to calculate the (co-)resistance of microbial isolates (i.e. percentage S, SI, I, IR or R). All functions support quasiquotation with pipes, can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}. +#' @description These functions can be used to calculate the (co-)resistance of microbial isolates (i.e. percentage of S, SI, I, IR or R). All functions support quasiquotation with pipes, can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}. #' #' \code{portion_R} and \code{portion_IR} can be used to calculate resistance, \code{portion_S} and \code{portion_SI} can be used to calculate susceptibility.\cr #' @param ... one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed. Use multiple columns to calculate (the lack of) co-resistance: the probability where one of two drugs have a resistant or susceptible result. See Examples. -#' @param minimum the minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source. +#' @param minimum the minimum allowed number of available (tested) isolates. Any isolate count lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source. #' @param as_percent a logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}. #' @param also_single_tested a logical to indicate whether (in combination therapies) also observations should be included where not all antibiotics were tested, but at least one of the tested antibiotics contains a target interpretation (e.g. S in case of \code{portion_S} and R in case of \code{portion_R}). \strong{This would lead to selection bias in almost all cases.} #' @param data a \code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}}) @@ -39,19 +39,21 @@ #' #' The old \code{\link{rsi}} function is still available for backwards compatibility but is deprecated. #' \if{html}{ +# (created with https://www.latex4technics.com/) #' \cr\cr #' To calculate the probability (\emph{p}) of susceptibility of one antibiotic, we use this formula: -#' \out{
}\figure{mono_therapy.png}\out{
} +#' \out{
}\figure{combi_therapy_2.png}\out{
} #' To calculate the probability (\emph{p}) of susceptibility of more antibiotics (i.e. combination therapy), we need to check whether one of them has a susceptible result (as numerator) and count all cases where all antibiotics were tested (as denominator). \cr #' \cr #' For two antibiotics: -#' \out{
}\figure{combi_therapy_2.png}\out{
} +#' \out{
}\figure{combi_therapy_2.png}\out{
} #' \cr #' For three antibiotics: -#' \out{
}\figure{combi_therapy_3.png}\out{
} +#' \out{
}\figure{combi_therapy_2.png}\out{
} #' \cr #' And so on. #' } +#' #' @source \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition}, 2014, \emph{Clinical and Laboratory Standards Institute (CLSI)}. \url{https://clsi.org/standards/products/microbiology/documents/m39/}. #' #' Wickham H. \strong{Tidy Data.} The Journal of Statistical Software, vol. 59, 2014. \url{http://vita.had.co.nz/papers/tidy-data.html} @@ -92,8 +94,9 @@ #' summarise(R = portion_R(cipr, as_percent = TRUE), #' I = portion_I(cipr, as_percent = TRUE), #' S = portion_S(cipr, as_percent = TRUE), -#' n = n_rsi(cipr), # works like n_distinct in dplyr -#' total = n()) # NOT the amount of tested isolates! +#' n1 = count_all(cipr), # the actual total; sum of all three +#' n2 = n_rsi(cipr), # same - analogous to n_distinct +#' total = n()) # NOT the number of tested isolates! #' #' # Calculate co-resistance between amoxicillin/clav acid and gentamicin, #' # so we can see that combination therapy does a lot more than mono therapy: diff --git a/data/antibiotics.rda b/data/antibiotics.rda index 2f309d6c..0c9a1c9e 100755 Binary files a/data/antibiotics.rda and b/data/antibiotics.rda differ diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 5fab3eb1..a8fb96d7 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -78,7 +78,7 @@ AMR (for R) - 0.5.0.9024 + 0.5.0.9025 diff --git a/docs/articles/AMR.html b/docs/articles/AMR.html index f06c27bd..6c761450 100644 --- a/docs/articles/AMR.html +++ b/docs/articles/AMR.html @@ -40,7 +40,7 @@ AMR (for R) - 0.5.0.9023 + 0.5.0.9025 @@ -192,7 +192,7 @@

How to conduct AMR analysis

Matthijs S. Berends

-

15 March 2019

+

26 March 2019

@@ -201,7 +201,7 @@ -

Note: values on this page will change with every website update since they are based on randomly created values and the page was written in RMarkdown. However, the methodology remains unchanged. This page was generated on 15 March 2019.

+

Note: values on this page will change with every website update since they are based on randomly created values and the page was written in RMarkdown. However, the methodology remains unchanged. This page was generated on 26 March 2019.

Introduction

@@ -217,21 +217,21 @@ -2019-03-15 +2019-03-26 abcd Escherichia coli S S -2019-03-15 +2019-03-26 abcd Escherichia coli S R -2019-03-15 +2019-03-26 efgh Escherichia coli R @@ -327,54 +327,32 @@ -2011-03-23 -H4 -Hospital B +2014-08-13 +C5 +Hospital C Escherichia coli +R S -S -S +R S M -2016-02-07 -A10 -Hospital B +2017-11-08 +R4 +Hospital A Escherichia coli S -S -S -S -M - - -2017-05-30 -Q9 -Hospital D -Escherichia coli -S -S -S -S -F - - -2016-09-19 -U5 -Hospital B -Escherichia coli -S -S +I S S F -2016-03-20 -X10 +2015-01-27 +U9 Hospital D -Streptococcus pneumoniae +Klebsiella pneumoniae S S S @@ -382,9 +360,31 @@ F -2012-07-29 -D10 -Hospital D +2010-09-17 +R7 +Hospital A +Escherichia coli +R +I +R +S +F + + +2017-04-07 +Z10 +Hospital B +Staphylococcus aureus +S +S +S +S +F + + +2015-08-27 +C7 +Hospital A Escherichia coli S S @@ -411,8 +411,8 @@ #> #> Item Count Percent Cum. Count Cum. Percent #> --- ----- ------- -------- ----------- ------------- -#> 1 M 10,422 52.1% 10,422 52.1% -#> 2 F 9,578 47.9% 20,000 100.0% +#> 1 M 10,435 52.2% 10,435 52.2% +#> 2 F 9,565 47.8% 20,000 100.0%

So, we can draw at least two conclusions immediately. From a data scientist perspective, the data looks clean: only values M and F. From a researcher perspective: there are slightly more men. Nothing we didn’t already know.

The data is already quite clean, but we still need to transform some variables. The bacteria column now consists of text, and we want to add more variables based on microbial IDs later on. So, we will transform this column to valid IDs. The mutate() function of the dplyr package makes this really easy:

data <- data %>%
@@ -443,10 +443,10 @@
 #> Kingella kingae (no changes)
 #> 
 #> EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes (v3.1, 2016)
-#> Table 1:  Intrinsic resistance in Enterobacteriaceae (1315 changes)
+#> Table 1:  Intrinsic resistance in Enterobacteriaceae (1262 changes)
 #> Table 2:  Intrinsic resistance in non-fermentative Gram-negative bacteria (no changes)
 #> Table 3:  Intrinsic resistance in other Gram-negative bacteria (no changes)
-#> Table 4:  Intrinsic resistance in Gram-positive bacteria (2799 changes)
+#> Table 4:  Intrinsic resistance in Gram-positive bacteria (2756 changes)
 #> Table 8:  Interpretive rules for B-lactam agents and Gram-positive cocci (no changes)
 #> Table 9:  Interpretive rules for B-lactam agents and Gram-negative rods (no changes)
 #> Table 10: Interpretive rules for B-lactam agents and other Gram-negative bacteria (no changes)
@@ -462,9 +462,9 @@
 #> Non-EUCAST: piperacillin/tazobactam = S where piperacillin = S (no changes)
 #> Non-EUCAST: trimethoprim/sulfa = S where trimethoprim = S (no changes)
 #> 
-#> => EUCAST rules affected 7,488 out of 20,000 rows
+#> => EUCAST rules affected 7,403 out of 20,000 rows
 #>    -> added 0 test results
-#>    -> changed 4,114 test results (0 to S; 0 to I; 4,114 to R)
+#> -> changed 4,018 test results (0 to S; 0 to I; 4,018 to R)

@@ -489,8 +489,8 @@ #> NOTE: Using column `bacteria` as input for `col_mo`. #> NOTE: Using column `date` as input for `col_date`. #> NOTE: Using column `patient_id` as input for `col_patient_id`. -#> => Found 5,688 first isolates (28.4% of total)

-

So only 28.4% is suitable for resistance analysis! We can now filter on it with the filter() function, also from the dplyr package:

+#> => Found 5,648 first isolates (28.2% of total) +

So only 28.2% is suitable for resistance analysis! We can now filter on it with the filter() function, also from the dplyr package:

data_1st <- data %>% 
   filter(first == TRUE)

For future use, the above two syntaxes can be shortened with the filter_first_isolate() function:

@@ -516,10 +516,10 @@ 1 -2010-04-01 -K1 +2010-01-29 +P7 B_ESCHR_COL -R +S S S S @@ -527,101 +527,101 @@ 2 -2010-04-30 -K1 +2010-05-18 +P7 B_ESCHR_COL -R S +R S S FALSE 3 -2010-10-12 -K1 +2010-06-01 +P7 B_ESCHR_COL R S -S +R S FALSE 4 -2010-12-05 -K1 +2010-07-21 +P7 B_ESCHR_COL S +I S S -R FALSE 5 -2011-01-19 -K1 +2010-08-20 +P7 B_ESCHR_COL S -S +R S S FALSE 6 -2011-04-07 -K1 +2010-12-14 +P7 B_ESCHR_COL S -S -S -S -TRUE - - -7 -2011-06-16 -K1 -B_ESCHR_COL -S -S +I S S FALSE - -8 -2011-07-16 -K1 + +7 +2011-03-02 +P7 B_ESCHR_COL S -R S S +R +TRUE + + +8 +2011-03-14 +P7 +B_ESCHR_COL +S +S +R +S FALSE 9 -2011-08-25 -K1 +2011-05-28 +P7 B_ESCHR_COL -R S +I S S FALSE 10 -2011-09-11 -K1 +2011-08-09 +P7 B_ESCHR_COL -R +I S R -R +S FALSE @@ -637,7 +637,7 @@ #> NOTE: Using column `patient_id` as input for `col_patient_id`. #> NOTE: Using column `keyab` as input for `col_keyantibiotics`. Use col_keyantibiotics = FALSE to prevent this. #> [Criterion] Inclusion based on key antibiotics, ignoring I. -#> => Found 15,948 first weighted isolates (79.7% of total) +#> => Found 15,891 first weighted isolates (79.5% of total) @@ -654,10 +654,10 @@ - - + + - + @@ -666,95 +666,95 @@ - - + + + - - + - - + + - + - + - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - + @@ -762,23 +762,23 @@ - - + + - + - +
isolate
12010-04-01K12010-01-29P7 B_ESCHR_COLRS S S S
22010-04-30K12010-05-18P7 B_ESCHR_COLS R S SSFALSE FALSETRUE
32010-10-12K12010-06-01P7 B_ESCHR_COL R SSR S FALSEFALSETRUE
42010-12-05K12010-07-21P7 B_ESCHR_COL SI S SR FALSE TRUE
52011-01-19K1B_ESCHR_COLSSSSFALSETRUE
62011-04-07K1B_ESCHR_COLSSSSTRUETRUE
72011-06-16K1B_ESCHR_COLSSSSFALSEFALSE
82011-07-16K12010-08-20P7 B_ESCHR_COL S R S S FALSEFALSE
62010-12-14P7B_ESCHR_COLSISSFALSEFALSE
72011-03-02P7B_ESCHR_COLSSSRTRUETRUE
82011-03-14P7B_ESCHR_COLSSRSFALSE TRUE
92011-08-25K12011-05-28P7 B_ESCHR_COLR SI S S FALSE
102011-09-11K12011-08-09P7 B_ESCHR_COLRI S RRS FALSE TRUE
-

Instead of 2, now 7 isolates are flagged. In total, 79.7% of all isolates are marked ‘first weighted’ - 51.3% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.

+

Instead of 2, now 8 isolates are flagged. In total, 79.5% of all isolates are marked ‘first weighted’ - 51.2% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.

As with filter_first_isolate(), there’s a shortcut for this new algorithm too:

data_1st <- data %>% 
   filter_first_weighted_isolate()
-

So we end up with 15,948 isolates for analysis.

+

So we end up with 15,891 isolates for analysis.

We can remove unneeded columns:

data_1st <- data_1st %>% 
   select(-c(first, keyab))
@@ -804,13 +804,13 @@ 1 -2011-03-23 -H4 -Hospital B +2014-08-13 +C5 +Hospital C B_ESCHR_COL +R S -S -S +R S M Gram negative @@ -819,16 +819,16 @@ TRUE -2 -2016-02-07 -A10 -Hospital B +4 +2010-09-17 +R7 +Hospital A B_ESCHR_COL +R +I +R S -S -S -S -M +F Gram negative Escherichia coli @@ -836,41 +836,41 @@ 5 -2016-03-20 -X10 -Hospital D -B_STRPT_PNE +2017-04-07 +Z10 +Hospital B +B_STPHY_AUR +S S S S -R F Gram positive -Streptococcus -pneumoniae +Staphylococcus +aureus TRUE 7 -2015-08-01 -Q4 +2012-04-03 +J2 Hospital A B_ESCHR_COL -S -I -S R -F +R +R +S +M Gram negative Escherichia coli TRUE -8 -2012-03-10 -Z10 -Hospital C +9 +2017-09-09 +U3 +Hospital A B_ESCHR_COL R S @@ -883,10 +883,10 @@ TRUE -11 -2014-10-21 -G8 -Hospital C +10 +2015-12-21 +E1 +Hospital B B_ESCHR_COL S S @@ -915,9 +915,9 @@
freq(paste(data_1st$genus, data_1st$species))

Or can be used like the dplyr way, which is easier readable:

data_1st %>% freq(genus, species)
-

Frequency table of genus and species from a data.frame (15,948 x 13)

+

Frequency table of genus and species from a data.frame (15,891 x 13)

Columns: 2
-Length: 15,948 (of which NA: 0 = 0.00%)
+Length: 15,891 (of which NA: 0 = 0.00%)
Unique: 4

Shortest: 16
Longest: 24

@@ -935,32 +935,32 @@ Longest: 24

1 Escherichia coli 7,952 -49.9% +50.0% 7,952 -49.9% +50.0% 2 Staphylococcus aureus -3,886 -24.4% -11,838 -74.2% +3,895 +24.5% +11,847 +74.6% 3 Streptococcus pneumoniae -2,509 +2,502 15.7% -14,347 -90.0% +14,349 +90.3% 4 Klebsiella pneumoniae -1,601 -10.0% -15,948 +1,542 +9.7% +15,891 100.0% @@ -969,9 +969,9 @@ Longest: 24

Resistance percentages

-

The functions portion_R(), portion_RI(), portion_I(), portion_IS() and portion_S() can be used to determine the portion of a specific antimicrobial outcome. They can be used on their own:

+

The functions portion_S(), portion_SI(), portion_I(), portion_IR() and portion_R() can be used to determine the portion of a specific antimicrobial outcome. They can be used on their own:

data_1st %>% portion_IR(amox)
-#> [1] 0.4812516
+#> [1] 0.4711472

Or can be used in conjuction with group_by() and summarise(), both from the dplyr package:

data_1st %>% 
   group_by(hospital) %>% 
@@ -984,19 +984,19 @@ Longest: 24

Hospital A -0.4801481 +0.4674370 Hospital B -0.4811895 +0.4698925 Hospital C -0.4707087 +0.4813574 Hospital D -0.4915144 +0.4712389 @@ -1014,23 +1014,23 @@ Longest: 24

Hospital A -0.4801481 -4861 +0.4674370 +4760 Hospital B -0.4811895 -5582 +0.4698925 +5580 Hospital C -0.4707087 -2441 +0.4813574 +2387 Hospital D -0.4915144 -3064 +0.4712389 +3164 @@ -1050,27 +1050,27 @@ Longest: 24

Escherichia -0.7282445 -0.9031690 -0.9756036 +0.7272384 +0.9034205 +0.9763581 Klebsiella -0.7270456 -0.9000625 -0.9787633 +0.7457847 +0.9014267 +0.9760052 Staphylococcus -0.7220793 -0.9184251 -0.9796706 +0.7245186 +0.9181001 +0.9756098 Streptococcus -0.7182144 +0.7234213 0.0000000 -0.7182144 +0.7234213 @@ -1081,36 +1081,36 @@ Longest: 24

"2. Gentamicin" = portion_S(gent), "3. Amoxi/clav + gent" = portion_S(amcl, gent)) %>% tidyr::gather("Antibiotic", "S", -genus) %>% - ggplot(aes(x = genus, + ggplot(aes(x = genus, y = S, fill = Antibiotic)) + - geom_col(position = "dodge2")
+ geom_col(position = "dodge2")

Plots

To show results in plots, most R users would nowadays use the ggplot2 package. This package lets you create plots in layers. You can read more about it on their website. A quick example would look like these syntaxes:

-
ggplot(data = a_data_set,
-       mapping = aes(x = year,
+
ggplot(data = a_data_set,
+       mapping = aes(x = year,
                      y = value)) +
-  geom_col() +
-  labs(title = "A title",
+  geom_col() +
+  labs(title = "A title",
        subtitle = "A subtitle",
        x = "My X axis",
        y = "My Y axis")
 
 # or as short as:
-ggplot(a_data_set) +
-  geom_bar(aes(year))
+ggplot(a_data_set) + + geom_bar(aes(year))

The AMR package contains functions to extend this ggplot2 package, for example geom_rsi(). It automatically transforms data with count_df() or portion_df() and show results in stacked bars. Its simplest and shortest example:

-
ggplot(data_1st) +
+
ggplot(data_1st) +
   geom_rsi(translate_ab = FALSE)

Omit the translate_ab = FALSE to have the antibiotic codes (amox, amcl, cipr, gent) translated to official WHO names (amoxicillin, amoxicillin and betalactamase inhibitor, ciprofloxacin, gentamicin).

If we group on e.g. the genus column and add some additional functions from our package, we can create this:

+ theme(axis.text.y = element_text(face = "italic"))

To simplify this, we also created the ggplot_rsi() function, which combines almost all above functions:

data_1st %>% 
@@ -1137,7 +1137,7 @@ Longest: 24

facet = "Antibiotic", breaks = 0:4 * 25, datalabels = FALSE) + - coord_flip()
+ coord_flip()

diff --git a/docs/articles/AMR_files/figure-html/plot 1-1.png b/docs/articles/AMR_files/figure-html/plot 1-1.png index c93a5298..bb00739e 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 1-1.png and b/docs/articles/AMR_files/figure-html/plot 1-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 3-1.png b/docs/articles/AMR_files/figure-html/plot 3-1.png index 464e8829..1eb0f5f2 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 3-1.png and b/docs/articles/AMR_files/figure-html/plot 3-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 4-1.png b/docs/articles/AMR_files/figure-html/plot 4-1.png index 7da245da..8232c403 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 4-1.png and b/docs/articles/AMR_files/figure-html/plot 4-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 5-1.png b/docs/articles/AMR_files/figure-html/plot 5-1.png index df3534bd..5a888aaf 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 5-1.png and b/docs/articles/AMR_files/figure-html/plot 5-1.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html index 331cf09a..9f777dc6 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -78,7 +78,7 @@ AMR (for R) - 0.5.0.9024 + 0.5.0.9025
diff --git a/docs/authors.html b/docs/authors.html index f1ee4ea0..dac0b881 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -78,7 +78,7 @@ AMR (for R) - 0.5.0.9024 + 0.5.0.9025 diff --git a/docs/index.html b/docs/index.html index 8a6ba10e..7712b63e 100644 --- a/docs/index.html +++ b/docs/index.html @@ -42,7 +42,7 @@ AMR (for R) - 0.5.0.9024 + 0.5.0.9025 diff --git a/docs/news/index.html b/docs/news/index.html index 4c8c600a..c5ab9efe 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -78,7 +78,7 @@ AMR (for R) - 0.5.0.9024 + 0.5.0.9025 diff --git a/docs/reference/as.mo.html b/docs/reference/as.mo.html index eaa6f33b..af1ff7bd 100644 --- a/docs/reference/as.mo.html +++ b/docs/reference/as.mo.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9024 + 0.5.0.9025 @@ -252,7 +252,7 @@ mo_renamed() -clean_mo_history() +clean_mo_history(...)

Arguments

@@ -263,12 +263,12 @@ - - @@ -287,7 +287,7 @@

Value

-

Character (vector) with class "mo". Unknown values will return NA.

+

Character (vector) with class "mo"

Details

@@ -303,14 +303,15 @@ A microbial ID from this package (class: mo) typically looks like t | | | ----> subspecies, a 3-4 letter acronym | | ----> species, a 3-4 letter acronym | ----> genus, a 5-7 letter acronym, mostly without vowels - ----> taxonomic kingdom: A (Archaea), AN (Animalia), B (Bacteria), C (Chromista), - F (Fungi), P (Protozoa) or PL (Plantae) + ----> taxonomic kingdom: A (Archaea), AN (Animalia), B (Bacteria), + C (Chromista), F (Fungi), P (Protozoa) or + PL (Plantae) -

Values that cannot be coered will be considered 'unknown' and have an MO code UNKNOWN.

+

Values that cannot be coered will be considered 'unknown' and will get the MO code UNKNOWN.

Use the mo_property_* functions to get properties based on the returned code, see Examples.

The algorithm uses data from the Catalogue of Life (see below) and from one other source (see ?microorganisms).

Self-learning algoritm
-The as.mo() function gains experience from previously determined microbial IDs and learns from it. This drastically improves both speed and reliability. Use clean_mo_history() to reset the algorithms. Only experience from your current AMR package version is used. This is done because in the future the taxonomic tree (which is included in this package) may change for any organism and it consequently has to rebuild its knowledge. Usually, any guess after the first try runs 90-95% faster than the first try. The algorithm saves its previous findings to ~/.Rhistory_mo.

+The as.mo() function gains experience from previously determined microbial IDs and learns from it. This drastically improves both speed and reliability. Use clean_mo_history() to reset the algorithms. Only experience from your current AMR package version is used. This is done because in the future the taxonomic tree (which is included in this package) may change for any organism and it consequently has to rebuild its knowledge. Usually, any guess after the first try runs 80-95% faster than the first try. The algorithm saves its previous findings to ~/.Rhistory_mo.

Intelligent rules
This function uses intelligent rules to help getting fast and logical results. It tries to find matches in this order:

This means that looking up human pathogenic microorganisms takes less time than looking up human non-pathogenic microorganisms.

Uncertain results
-The algorithm can additionally use three different levels of uncertainty to guess valid results. The default is allow_uncertain = TRUE, which is uqual to uncertainty level 2. Using allow_uncertain = FALSE will skip all of these additional rules:

- + @@ -317,8 +317,14 @@ # filter on isolates that show resistance to# any aminoglycoside and any fluoroquinoloneseptic_patients%>% - filter_aminoglycosides("R", "any") %>% - filter_fluoroquinolones("R", "any") + filter_aminoglycosides("R") %>% + filter_fluoroquinolones("R") + +# filter on isolates that show resistance to +# all aminoglycosides and all fluoroquinolones +septic_patients%>% + filter_aminoglycosides("R", "all") %>% + filter_fluoroquinolones("R", "all") # } @@ -431,7 +431,7 @@ - + - + @@ -311,14 +311,14 @@ portion_R and portion_IR can be used to calculate resistance, portion_S and port

The old rsi function is still available for backwards compatibility but is deprecated.

To calculate the probability (p) of susceptibility of one antibiotic, we use this formula: -

+
To calculate the probability (p) of susceptibility of more antibiotics (i.e. combination therapy), we need to check whether one of them has a susceptible result (as numerator) and count all cases where all antibiotics were tested (as denominator).

For two antibiotics: -
+

For three antibiotics: -
+

And so on.

@@ -362,8 +362,9 @@ portion_R and portion_IR can be used to calculate resistance, portion_S and port summarise(R=portion_R(cipr, as_percent=TRUE), I=portion_I(cipr, as_percent=TRUE), S=portion_S(cipr, as_percent=TRUE), - n=n_rsi(cipr), # works like n_distinct in dplyr - total=n()) # NOT the amount of tested isolates! + n1=count_all(cipr), # the actual total; sum of all three + n2=n_rsi(cipr), # same - analogous to n_distinct + total=n()) # NOT the number of tested isolates!# Calculate co-resistance between amoxicillin/clav acid and gentamicin,# so we can see that combination therapy does a lot more than mono therapy: diff --git a/docs/reference/rsi.html b/docs/reference/rsi.html index d102b48c..ad19d7ab 100644 --- a/docs/reference/rsi.html +++ b/docs/reference/rsi.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9023 + 0.5.0.9025 @@ -257,7 +257,7 @@ - + diff --git a/man/as.mo.Rd b/man/as.mo.Rd index d64d3062..69fc3cf6 100644 --- a/man/as.mo.Rd +++ b/man/as.mo.Rd @@ -21,16 +21,16 @@ mo_uncertainties() mo_renamed() -clean_mo_history() +clean_mo_history(...) } \arguments{ \item{x}{a character vector or a \code{data.frame} with one or two columns} -\item{Becker}{a logical to indicate whether \emph{Staphylococci} should be categorised into Coagulase Negative \emph{Staphylococci} ("CoNS") and Coagulase Positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} [1]. Note that this does not include species that were newly named after this publication. +\item{Becker}{a logical to indicate whether \emph{Staphylococci} should be categorised into coagulase-negative \emph{Staphylococci} ("CoNS") and coagulase-positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} [1,2]. Note that this does not include species that were newly named after these publications, like \emph{S. caeli}. This excludes \emph{Staphylococcus aureus} at default, use \code{Becker = "all"} to also categorise \emph{S. aureus} as "CoPS".} -\item{Lancefield}{a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [2]. These \emph{Streptococci} will be categorised in their first group, e.g. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L. +\item{Lancefield}{a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [3]. These \emph{Streptococci} will be categorised in their first group, e.g. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L. This excludes \emph{Enterococci} at default (who are in group D), use \code{Lancefield = "all"} to also categorise all \emph{Enterococci} as group D.} @@ -41,7 +41,7 @@ clean_mo_history() \item{...}{other parameters passed on to functions} } \value{ -Character (vector) with class \code{"mo"}. Unknown values will return \code{NA}. +Character (vector) with class \code{"mo"} } \description{ Use this function to determine a valid microorganism ID (\code{mo}). Determination is done using intelligent rules and the complete taxonomic kingdoms Bacteria, Chromista, Protozoa, Archaea and most microbial species from the kingdom Fungi (see Source). The input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. Please see Examples. @@ -60,18 +60,19 @@ A microbial ID from this package (class: \code{mo}) typically looks like these e | | | ----> subspecies, a 3-4 letter acronym | | ----> species, a 3-4 letter acronym | ----> genus, a 5-7 letter acronym, mostly without vowels - ----> taxonomic kingdom: A (Archaea), AN (Animalia), B (Bacteria), C (Chromista), - F (Fungi), P (Protozoa) or PL (Plantae) + ----> taxonomic kingdom: A (Archaea), AN (Animalia), B (Bacteria), + C (Chromista), F (Fungi), P (Protozoa) or + PL (Plantae) } -Values that cannot be coered will be considered 'unknown' and have an MO code \code{UNKNOWN}. +Values that cannot be coered will be considered 'unknown' and will get the MO code \code{UNKNOWN}. Use the \code{\link{mo_property}_*} functions to get properties based on the returned code, see Examples. The algorithm uses data from the Catalogue of Life (see below) and from one other source (see \code{?microorganisms}). \strong{Self-learning algoritm} \cr -The \code{as.mo()} function gains experience from previously determined microbial IDs and learns from it. This drastically improves both speed and reliability. Use \code{clean_mo_history()} to reset the algorithms. Only experience from your current \code{AMR} package version is used. This is done because in the future the taxonomic tree (which is included in this package) may change for any organism and it consequently has to rebuild its knowledge. Usually, any guess after the first try runs 90-95\% faster than the first try. The algorithm saves its previous findings to \code{~/.Rhistory_mo}. +The \code{as.mo()} function gains experience from previously determined microbial IDs and learns from it. This drastically improves both speed and reliability. Use \code{clean_mo_history()} to reset the algorithms. Only experience from your current \code{AMR} package version is used. This is done because in the future the taxonomic tree (which is included in this package) may change for any organism and it consequently has to rebuild its knowledge. Usually, any guess after the first try runs 80-95\% faster than the first try. The algorithm saves its previous findings to \code{~/.Rhistory_mo}. \strong{Intelligent rules} \cr This function uses intelligent rules to help getting fast and logical results. It tries to find matches in this order: @@ -91,7 +92,7 @@ A couple of effects because of these rules: This means that looking up human pathogenic microorganisms takes less time than looking up human non-pathogenic microorganisms. \strong{Uncertain results} \cr -The algorithm can additionally use three different levels of uncertainty to guess valid results. The default is \code{allow_uncertain = TRUE}, which is uqual to uncertainty level 2. Using \code{allow_uncertain = FALSE} will skip all of these additional rules: +The algorithm can additionally use three different levels of uncertainty to guess valid results. The default is \code{allow_uncertain = TRUE}, which is equal to uncertainty level 2. Using \code{allow_uncertain = FALSE} will skip all of these additional rules: \itemize{ \item{(uncertainty level 1): It tries to look for only matching genera} \item{(uncertainty level 1): It tries to look for previously accepted (but now invalid) taxonomic names} @@ -132,9 +133,11 @@ Group 2 probably contains all other microbial pathogens ever found in humans. [1] Becker K \emph{et al.} \strong{Coagulase-Negative Staphylococci}. 2014. Clin Microbiol Rev. 27(4): 870–926. \url{https://dx.doi.org/10.1128/CMR.00109-13} -[2] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571–95. \url{https://dx.doi.org/10.1084/jem.57.4.571} +[2] Becker K \emph{et al.} \strong{Implications of identifying the recently defined members of the S. aureus complex, S. argenteus and S. schweitzeri: A position paper of members of the ESCMID Study Group for staphylococci and Staphylococcal Diseases (ESGS).}. 2019. Clin Microbiol Infect. 2019 Mar 11. \url{https://doi.org/10.1016/j.cmi.2019.02.028} -[3] Catalogue of Life: Annual Checklist (public online taxonomic database), \url{www.catalogueoflife.org} (check included annual version with \code{\link{catalogue_of_life_version}()}). +[3] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571–95. \url{https://dx.doi.org/10.1084/jem.57.4.571} + +[4] Catalogue of Life: Annual Checklist (public online taxonomic database), \url{www.catalogueoflife.org} (check included annual version with \code{\link{catalogue_of_life_version}()}). } \section{Catalogue of Life}{ diff --git a/man/catalogue_of_life_version.Rd b/man/catalogue_of_life_version.Rd index 6bdce96f..9decac39 100644 --- a/man/catalogue_of_life_version.Rd +++ b/man/catalogue_of_life_version.Rd @@ -7,13 +7,13 @@ catalogue_of_life_version() } \value{ -a \code{list}, invisibly +a \code{list}, which prints in pretty format } \description{ This function returns information about the included data from the Catalogue of Life. It also shows if the included version is their latest annual release. The Catalogue of Life releases their annual release in March each year. } \details{ -The list item \code{is_latest_annual_release} is based on the system date. +The list item \code{...$catalogue_of_life$is_latest_annual_release} is based on the system date. For DSMZ, see \code{?microorganisms}. } diff --git a/man/count.Rd b/man/count.Rd index 3ab59c1b..adebc5b3 100644 --- a/man/count.Rd +++ b/man/count.Rd @@ -93,7 +93,7 @@ septic_patients \%>\% S = count_S(cipr), n1 = count_all(cipr), # the actual total; sum of all three n2 = n_rsi(cipr), # same - analogous to n_distinct - total = n()) # NOT the amount of tested isolates! + total = n()) # NOT the number of tested isolates! # Count co-resistance between amoxicillin/clav acid and gentamicin, # so we can see that combination therapy does a lot more than mono therapy. diff --git a/man/filter_ab_class.Rd b/man/filter_ab_class.Rd index 78ef7433..19df430c 100644 --- a/man/filter_ab_class.Rd +++ b/man/filter_ab_class.Rd @@ -13,7 +13,7 @@ \alias{filter_glycopeptides} \alias{filter_macrolides} \alias{filter_tetracyclines} -\title{Filter on antibiotic class} +\title{Filter isolates on result in antibiotic class} \usage{ filter_ab_class(tbl, ab_class, result = NULL, scope = "any", ...) @@ -42,7 +42,7 @@ filter_tetracyclines(tbl, result = NULL, scope = "any", ...) \arguments{ \item{tbl}{a data set} -\item{ab_class}{an antimicrobial class, like \code{"carbapenems"}} +\item{ab_class}{an antimicrobial class, like \code{"carbapenems"}. More specifically, this should be a text that can be found in a 4th level ATC group (chemical subgroup) or a 5th level ATC group (chemical substance), please see \href{https://www.whocc.no/atc/structure_and_principles/}{this explanation on the WHOCC website}.} \item{result}{an antibiotic result: S, I or R (or a combination of more of them)} @@ -51,7 +51,7 @@ filter_tetracyclines(tbl, result = NULL, scope = "any", ...) \item{...}{parameters passed on to \code{\link[dplyr]{filter_at}}} } \description{ -Filter on specific antibiotic variables based on their class (ATC groups). +Filter isolates on results in specific antibiotic variables based on their class (ATC groups). This makes it easy to get a list of isolates that were tested for e.g. any aminoglycoside. } \details{ The \code{\link{antibiotics}} data set will be searched for \code{ab_class} in the columns \code{atc_group1} and \code{atc_group2} (case-insensitive). Next, \code{tbl} will be checked for column names with a value in any abbreviations, codes or official names found in the \code{antibiotics} data set. @@ -77,8 +77,14 @@ septic_patients \%>\% filter_aminoglycosides("R", "all") # filter on isolates that show resistance to # any aminoglycoside and any fluoroquinolone septic_patients \%>\% - filter_aminoglycosides("R", "any") \%>\% - filter_fluoroquinolones("R", "any") + filter_aminoglycosides("R") \%>\% + filter_fluoroquinolones("R") + +# filter on isolates that show resistance to +# all aminoglycosides and all fluoroquinolones +septic_patients \%>\% + filter_aminoglycosides("R", "all") \%>\% + filter_fluoroquinolones("R", "all") } \keyword{fillter_class} \keyword{filter} diff --git a/man/mo_property.Rd b/man/mo_property.Rd index dc549af9..86092391 100644 --- a/man/mo_property.Rd +++ b/man/mo_property.Rd @@ -111,9 +111,11 @@ This package contains the complete taxonomic tree of almost all microorganisms ( [1] Becker K \emph{et al.} \strong{Coagulase-Negative Staphylococci}. 2014. Clin Microbiol Rev. 27(4): 870–926. \url{https://dx.doi.org/10.1128/CMR.00109-13} -[2] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571–95. \url{https://dx.doi.org/10.1084/jem.57.4.571} +[2] Becker K \emph{et al.} \strong{Implications of identifying the recently defined members of the S. aureus complex, S. argenteus and S. schweitzeri: A position paper of members of the ESCMID Study Group for staphylococci and Staphylococcal Diseases (ESGS).}. 2019. Clin Microbiol Infect. 2019 Mar 11. \url{https://doi.org/10.1016/j.cmi.2019.02.028} -[3] Catalogue of Life: Annual Checklist (public online taxonomic database), \url{www.catalogueoflife.org} (check included annual version with \code{\link{catalogue_of_life_version}()}). +[3] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571–95. \url{https://dx.doi.org/10.1084/jem.57.4.571} + +[4] Catalogue of Life: Annual Checklist (public online taxonomic database), \url{www.catalogueoflife.org} (check included annual version with \code{\link{catalogue_of_life_version}()}). } \section{Read more on our website!}{ diff --git a/man/portion.Rd b/man/portion.Rd index 1fbd5296..ee3f044e 100644 --- a/man/portion.Rd +++ b/man/portion.Rd @@ -36,7 +36,7 @@ portion_df(data, translate_ab = getOption("get_antibiotic_names", \arguments{ \item{...}{one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed. Use multiple columns to calculate (the lack of) co-resistance: the probability where one of two drugs have a resistant or susceptible result. See Examples.} -\item{minimum}{the minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.} +\item{minimum}{the minimum allowed number of available (tested) isolates. Any isolate count lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.} \item{as_percent}{a logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}.} @@ -52,7 +52,7 @@ portion_df(data, translate_ab = getOption("get_antibiotic_names", Double or, when \code{as_percent = TRUE}, a character. } \description{ -These functions can be used to calculate the (co-)resistance of microbial isolates (i.e. percentage S, SI, I, IR or R). All functions support quasiquotation with pipes, can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}. +These functions can be used to calculate the (co-)resistance of microbial isolates (i.e. percentage of S, SI, I, IR or R). All functions support quasiquotation with pipes, can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}. \code{portion_R} and \code{portion_IR} can be used to calculate resistance, \code{portion_S} and \code{portion_SI} can be used to calculate susceptibility.\cr } @@ -67,14 +67,14 @@ The old \code{\link{rsi}} function is still available for backwards compatibilit \if{html}{ \cr\cr To calculate the probability (\emph{p}) of susceptibility of one antibiotic, we use this formula: - \out{
}\figure{mono_therapy.png}\out{
} + \out{
}\figure{combi_therapy_2.png}\out{
} To calculate the probability (\emph{p}) of susceptibility of more antibiotics (i.e. combination therapy), we need to check whether one of them has a susceptible result (as numerator) and count all cases where all antibiotics were tested (as denominator). \cr \cr For two antibiotics: - \out{
}\figure{combi_therapy_2.png}\out{
} + \out{
}\figure{combi_therapy_2.png}\out{
} \cr For three antibiotics: - \out{
}\figure{combi_therapy_3.png}\out{
} + \out{
}\figure{combi_therapy_2.png}\out{
} \cr And so on. } @@ -113,8 +113,9 @@ septic_patients \%>\% summarise(R = portion_R(cipr, as_percent = TRUE), I = portion_I(cipr, as_percent = TRUE), S = portion_S(cipr, as_percent = TRUE), - n = n_rsi(cipr), # works like n_distinct in dplyr - total = n()) # NOT the amount of tested isolates! + n1 = count_all(cipr), # the actual total; sum of all three + n2 = n_rsi(cipr), # same - analogous to n_distinct + total = n()) # NOT the number of tested isolates! # Calculate co-resistance between amoxicillin/clav acid and gentamicin, # so we can see that combination therapy does a lot more than mono therapy: diff --git a/man/rsi.Rd b/man/rsi.Rd index def785c0..9a96902b 100644 --- a/man/rsi.Rd +++ b/man/rsi.Rd @@ -12,7 +12,7 @@ rsi(ab1, ab2 = NULL, interpretation = "IR", minimum = 30, \item{interpretation}{antimicrobial interpretation to check for} -\item{minimum}{the minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.} +\item{minimum}{the minimum allowed number of available (tested) isolates. Any isolate count lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.} \item{as_percent}{a logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}.} diff --git a/tests/testthat/test-get_locale.R b/tests/testthat/test-get_locale.R index 8e9880c1..d4631dd3 100644 --- a/tests/testthat/test-get_locale.R +++ b/tests/testthat/test-get_locale.R @@ -28,7 +28,7 @@ test_that("get_locale works", { expect_identical(mo_fullname("CoNS", "en"), "Coagulase-negative Staphylococcus (CoNS)") expect_identical(mo_fullname("CoNS", "de"), "Koagulase-negative Staphylococcus (KNS)") expect_identical(mo_fullname("CoNS", "nl"), "Coagulase-negatieve Staphylococcus (CNS)") - expect_identical(mo_fullname("CoNS", "es"), "Staphylococcus coagulasa negativo (CoNS)") + expect_identical(mo_fullname("CoNS", "es"), "Staphylococcus coagulasa negativo (SCN)") expect_identical(mo_fullname("CoNS", "it"), "Staphylococcus negativo coagulasi (CoNS)") # expect_identical(mo_fullname("CoNS", "fr"), "Staphylococcus \u00e0 coagulase n\u00e9gative (CoNS)") expect_identical(mo_fullname("CoNS", "pt"), "Staphylococcus coagulase negativo (CoNS)") diff --git a/tests/testthat/test-mo_history.R b/tests/testthat/test-mo_history.R index 7b272118..a0a6daf3 100644 --- a/tests/testthat/test-mo_history.R +++ b/tests/testthat/test-mo_history.R @@ -22,13 +22,23 @@ context("mo_history.R") test_that("mo_history works", { - clean_mo_history() + clean_mo_history(force = TRUE) expect_equal(read_mo_history(force = TRUE), NULL) - set_mo_history("testsubject", "B_ESCHR_COL", force = TRUE) - expect_equal(get_mo_history("testsubject", force = TRUE), + expect_equal(as.character(suppressWarnings(as.mo("testsubject"))), "UNKNOWN") + + set_mo_history("testsubject", "B_ESCHR_COL", + uncertainty_level = translate_allow_uncertain(TRUE), + force = TRUE) + + expect_equal(get_mo_history("testsubject", + uncertainty_level = translate_allow_uncertain(TRUE), + force = TRUE), "B_ESCHR_COL") + + expect_equal(as.character(suppressWarnings(as.mo("testsubject"))), "B_ESCHR_COL") + expect_equal(colnames(read_mo_history(force = TRUE)), - c("x", "mo", "package_version")) + c("x", "mo", "uncertainty_level", "package_version")) }) diff --git a/vignettes/AMR.Rmd b/vignettes/AMR.Rmd index 6a0f2a01..a0aa0787 100755 --- a/vignettes/AMR.Rmd +++ b/vignettes/AMR.Rmd @@ -315,7 +315,7 @@ data_1st %>% ## Resistance percentages -The functions `portion_R()`, `portion_RI()`, `portion_I()`, `portion_IS()` and `portion_S()` can be used to determine the portion of a specific antimicrobial outcome. They can be used on their own: +The functions `portion_S()`, `portion_SI()`, `portion_I()`, `portion_IR()` and `portion_R()` can be used to determine the portion of a specific antimicrobial outcome. They can be used on their own: ```{r} data_1st %>% portion_IR(amox)
Becker

a logical to indicate whether Staphylococci should be categorised into Coagulase Negative Staphylococci ("CoNS") and Coagulase Positive Staphylococci ("CoPS") instead of their own species, according to Karsten Becker et al. [1]. Note that this does not include species that were newly named after this publication.

+

a logical to indicate whether Staphylococci should be categorised into coagulase-negative Staphylococci ("CoNS") and coagulase-positive Staphylococci ("CoPS") instead of their own species, according to Karsten Becker et al. [1,2]. Note that this does not include species that were newly named after these publications, like S. caeli.

This excludes Staphylococcus aureus at default, use Becker = "all" to also categorise S. aureus as "CoPS".

Lancefield

a logical to indicate whether beta-haemolytic Streptococci should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [2]. These Streptococci will be categorised in their first group, e.g. Streptococcus dysgalactiae will be group C, although officially it was also categorised into groups G and L.

+

a logical to indicate whether beta-haemolytic Streptococci should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [3]. These Streptococci will be categorised in their first group, e.g. Streptococcus dysgalactiae will be group C, although officially it was also categorised into groups G and L.

This excludes Enterococci at default (who are in group D), use Lancefield = "all" to also categorise all Enterococci as group D.

ab_class

an antimicrobial class, like "carbapenems"

an antimicrobial class, like "carbapenems". More specifically, this should be a text that can be found in a 4th level ATC group (chemical subgroup) or a 5th level ATC group (chemical substance), please see this explanation on the WHOCC website.

result

filter_ab_class() filter_aminoglycosides() filter_carbapenems() filter_cephalosporins() filter_1st_cephalosporins() filter_2nd_cephalosporins() filter_3rd_cephalosporins() filter_4th_cephalosporins() filter_fluoroquinolones() filter_glycopeptides() filter_macrolides() filter_tetracyclines()

Filter on antibiotic class

Filter isolates on result in antibiotic class

diff --git a/docs/reference/microorganisms.html b/docs/reference/microorganisms.html index c773534c..bf77573d 100644 --- a/docs/reference/microorganisms.html +++ b/docs/reference/microorganisms.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9024 + 0.5.0.9025 diff --git a/docs/reference/microorganisms.old.html b/docs/reference/microorganisms.old.html index 7a5e5f37..dde94272 100644 --- a/docs/reference/microorganisms.old.html +++ b/docs/reference/microorganisms.old.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9024 + 0.5.0.9025 diff --git a/docs/reference/mo_property.html b/docs/reference/mo_property.html index 9e2ef6b1..314bbc64 100644 --- a/docs/reference/mo_property.html +++ b/docs/reference/mo_property.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9024 + 0.5.0.9025 @@ -341,8 +341,9 @@ This package contains the complete taxonomic tree of almost all microorganisms (

[1] Becker K et al. Coagulase-Negative Staphylococci. 2014. Clin Microbiol Rev. 27(4): 870–926. https://dx.doi.org/10.1128/CMR.00109-13

-

[2] Lancefield RC A serological differentiation of human and other groups of hemolytic streptococci. 1933. J Exp Med. 57(4): 571–95. https://dx.doi.org/10.1084/jem.57.4.571

-

[3] Catalogue of Life: Annual Checklist (public online taxonomic database), www.catalogueoflife.org (check included annual version with catalogue_of_life_version()).

+

[2] Becker K et al. Implications of identifying the recently defined members of the S. aureus complex, S. argenteus and S. schweitzeri: A position paper of members of the ESCMID Study Group for staphylococci and Staphylococcal Diseases (ESGS).. 2019. Clin Microbiol Infect. 2019 Mar 11. https://doi.org/10.1016/j.cmi.2019.02.028

+

[3] Lancefield RC A serological differentiation of human and other groups of hemolytic streptococci. 1933. J Exp Med. 57(4): 571–95. https://dx.doi.org/10.1084/jem.57.4.571

+

[4] Catalogue of Life: Annual Checklist (public online taxonomic database), www.catalogueoflife.org (check included annual version with catalogue_of_life_version()).

Read more on our website!

diff --git a/docs/reference/portion.html b/docs/reference/portion.html index 28e94e0c..90d311cb 100644 --- a/docs/reference/portion.html +++ b/docs/reference/portion.html @@ -47,7 +47,7 @@ - @@ -81,7 +81,7 @@ portion_R and portion_IR can be used to calculate resistance, portion_S and port AMR (for R) - 0.5.0.9023 + 0.5.0.9025 @@ -238,7 +238,7 @@ portion_R and portion_IR can be used to calculate resistance, portion_S and port
-

These functions can be used to calculate the (co-)resistance of microbial isolates (i.e. percentage S, SI, I, IR or R). All functions support quasiquotation with pipes, can be used in dplyrs summarise and support grouped variables, see Examples.

+

These functions can be used to calculate the (co-)resistance of microbial isolates (i.e. percentage of S, SI, I, IR or R). All functions support quasiquotation with pipes, can be used in dplyrs summarise and support grouped variables, see Examples.

portion_R and portion_IR can be used to calculate resistance, portion_S and portion_SI can be used to calculate susceptibility.

@@ -270,7 +270,7 @@ portion_R and portion_IR can be used to calculate resistance, portion_S and port
minimum

the minimal amount of available isolates. Any number lower than minimum will return NA with a warning. The default number of 30 isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.

the minimum allowed number of available (tested) isolates. Any isolate count lower than minimum will return NA with a warning. The default number of 30 isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.

as_percent
minimum

the minimal amount of available isolates. Any number lower than minimum will return NA with a warning. The default number of 30 isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.

the minimum allowed number of available (tested) isolates. Any isolate count lower than minimum will return NA with a warning. The default number of 30 isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.

as_percent