new rsi_translation

2025-08-24 17:42:11 +02:00 · 2022-10-22 22:00:15 +02:00
parent d10651eb26
commit c2801ba7a1
43 changed files with 5290 additions and 7300 deletions
--- a/R/aa_helper_functions.R
+++ b/R/aa_helper_functions.R
@@ -1083,8 +1083,7 @@ try_colour <- function(..., before, after, collapse = " ") {
 font_black <- function(..., collapse = " ") {
  before <- "\033[38;5;232m"
  after <- "\033[39m"
-  theme_info <- import_fn("getThemeInfo", "rstudioapi", error_on_fail = FALSE)
-  if (!is.null(theme_info) && isTRUE(theme_info()$dark)) {
+  if (isTRUE(AMR_env$is_dark_theme)) {
    # white
    before <- "\033[37m"
    after <- "\033[39m"
@@ -1094,8 +1093,7 @@ font_black <- function(..., collapse = " ") {
 font_white <- function(..., collapse = " ") {
  before <- "\033[37m"
  after <- "\033[39m"
-  theme_info <- import_fn("getThemeInfo", "rstudioapi", error_on_fail = FALSE)
-  if (!is.null(theme_info) && isTRUE(theme_info()$dark)) {
+  if (isTRUE(AMR_env$is_dark_theme)) {
    # black
    before <- "\033[38;5;232m"
    after <- "\033[39m"
@@ -1191,7 +1189,7 @@ progress_ticker <- function(n = 1, n_min = 0, print = TRUE, ...) {
      # so we use progress::progress_bar
      # a close() method was also added, see below this function
      pb <- progress_bar$new(
-        format = "(:spin) [:bar] :percent (:current/:total,:eta)",
+        format = "[:bar] :percent (:current/:total,:eta)",
        total = n
      )
    } else {
--- a/R/amr.R
+++ b/R/amr.R
@@ -42,7 +42,7 @@
 #'
 #' This package can be used for:
 #' - Reference for the taxonomy of microorganisms, since the package contains all microbial (sub)species from the List of Prokaryotic names with Standing in Nomenclature (LPSN) and the Global Biodiversity Information Facility (GBIF)
-#' - Interpreting raw MIC and disk diffusion values, based on the latest CLSI or EUCAST guidelines
+#' - Interpreting raw MIC and disk diffusion values, based on any CLSI or EUCAST guideline from the last 10 years
 #' - Retrieving antimicrobial drug names, doses and forms of administration from clinical health care records
 #' - Determining first isolates to be used for AMR data analysis
 #' - Calculating antimicrobial resistance
--- a/R/bug_drug_combinations.R
+++ b/R/bug_drug_combinations.R
@@ -43,7 +43,6 @@
 #' @export
 #' @rdname bug_drug_combinations
 #' @return The function [bug_drug_combinations()] returns a [data.frame] with columns "mo", "ab", "S", "I", "R" and "total".
-#' @source \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 5th Edition}, 2022, *Clinical and Laboratory Standards Institute (CLSI)*. <https://clsi.org/standards/products/microbiology/documents/m39/>.
 #' @examples
 #' \donttest{
 #' x <- bug_drug_combinations(example_isolates)
--- a/R/custom_eucast_rules.R
+++ b/R/custom_eucast_rules.R
@@ -253,7 +253,7 @@ print.custom_eucast_rules <- function(x, ...) {
 }

 format_custom_query_rule <- function(query, colours = has_colour()) {
-  # font_black() is very expensive in RStudio because it checks if the theme is dark, so do it once:
+  # font_black() is a bit expensive so do it once:
  txt <- font_black("{text}")
  query <- gsub(" & ", sub("{text}", font_bold(" and "), txt, fixed = TRUE), query, fixed = TRUE)
  query <- gsub(" | ", sub("{text}", " or ", txt, fixed = TRUE), query, fixed = TRUE)
--- a/R/mo.R
+++ b/R/mo.R
@@ -236,6 +236,8 @@ as.mo <- function(x,
    # set up progress bar
    progress <- progress_ticker(n = length(x_unique), n_min = 10, print = info)
    on.exit(close(progress))
+    
+    msg <- character(0)

    # run it
    x_coerced <- vapply(FUN.VALUE = character(1), x_unique, function(x_search) {
@@ -249,12 +251,16 @@ as.mo <- function(x,
      x_search_cleaned <- x_out
      x_out <- tolower(x_out)

+      # first check if cleaning led to an exact result, case-insensitive
+      if (x_out %in% AMR_env$MO_lookup$fullname_lower) {
+        return(as.character(AMR_env$MO_lookup$mo[match(x_out, AMR_env$MO_lookup$fullname_lower)]))
+      }
+
      # input must not be too short
      if (nchar(x_out) < 3) {
        return("UNKNOWN")
      }

-
      # take out the parts, split by space
      x_parts <- strsplit(gsub("-", " ", x_out, fixed = TRUE), " ", fixed = TRUE)[[1]]

@@ -267,17 +273,13 @@ as.mo <- function(x,
        filtr <- which(AMR_env$MO_lookup$full_first %like_case% first_chars)
      } else if (nchar(x_out) == 4) {
        # no space and 4 characters - probably a code such as STAU or ESCO!
-        if (isTRUE(info)) {
-          message_("Input \"", x_search, "\" is assumed to be a microorganism code - trying to match on ", vector_and(c(substr(x_out, 1, 2), substr(x_out, 3, 4)), sort = FALSE))
-        }
+        msg <- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on ", vector_and(c(substr(x_out, 1, 2), substr(x_out, 3, 4)), sort = FALSE)))
        filtr <- which(AMR_env$MO_lookup$fullname_lower %like_case% paste0("(^| )", substr(x_out, 1, 2), ".* ", substr(x_out, 3, 4)))
      } else if (nchar(x_out) <= 6) {
        # no space and 5-6 characters - probably a code such as STAAUR or ESCCOL!
        first_part <- paste0(substr(x_out, 1, 2), "[a-z]*", substr(x_out, 3, 3))
        second_part <- substr(x_out, 4, nchar(x_out))
-        if (isTRUE(info)) {
-          message_("Input \"", x_search, "\" is assumed to be a microorganism code - trying to match on ", vector_and(c(gsub("[a-z]*", "(...)", first_part, fixed = TRUE), second_part), sort = FALSE))
-        }
+        msg <- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on ", vector_and(c(gsub("[a-z]*", "(...)", first_part, fixed = TRUE), second_part), sort = FALSE)))
        filtr <- which(AMR_env$MO_lookup$fullname_lower %like_case% paste0("(^| )", first_part, ".* ", second_part))
      } else {
        filtr <- which(AMR_env$MO_lookup$full_first == substr(x_out, 1, 1))
@@ -287,6 +289,7 @@ as.mo <- function(x,
      } else {
        mo_to_search <- AMR_env$MO_lookup$fullname[filtr]
      }
+      
      AMR_env$mo_to_search <- mo_to_search
      # determine the matching score on the original search value
      m <- mo_matching_score(x = x_search_cleaned, n = mo_to_search)
@@ -303,7 +306,7 @@ as.mo <- function(x,

      top_hits <- mo_to_search[order(m, decreasing = TRUE, na.last = NA)] # na.last = NA will remove the NAs
      if (length(top_hits) == 0) {
-        warning_("No hits found for \"", x_search, "\" with minimum_matching_score = ", ifelse(is.null(minimum_matching_score), paste0("NULL (=", round(min(minimum_matching_score_current, na.rm = TRUE), 3), ")"), minimum_matching_score), ". Try setting this value lower or even to 0.")
+        warning_("No hits found for \"", x_search, "\" with minimum_matching_score = ", ifelse(is.null(minimum_matching_score), paste0("NULL (=", round(min(minimum_matching_score_current, na.rm = TRUE), 3), ")"), minimum_matching_score), ". Try setting this value lower or even to 0.", call = FALSE)
        result_mo <- NA_character_
      } else {
        result_mo <- AMR_env$MO_lookup$mo[match(top_hits[1], AMR_env$MO_lookup$fullname)]
@@ -356,15 +359,18 @@ as.mo <- function(x,
        } else {
          examples <- paste0(nr2char(length(AMR_env$mo_uncertainties$original_input)), " microorganism", plural[1])
        }
-        msg <- paste0(
+        msg <- c(msg, paste0(
          "Microorganism translation was uncertain for ", examples,
          ". Run `mo_uncertainties()` to review ", plural[2], "."
-        )
-        message_(msg)
+        ))
+        
+        for (m in msg) {
+          message_(m)  
+        }
      }
    }
  } # end of loop over all yet unknowns
-
+  
  # Keep or replace synonyms ----
  gbif_matches <- AMR::microorganisms$gbif_renamed_to[match(out, AMR::microorganisms$mo)]
  gbif_matches[!gbif_matches %in% AMR::microorganisms$gbif] <- NA
@@ -383,7 +389,7 @@ as.mo <- function(x,
    }
  } else if (is.null(getOption("AMR_keep_synonyms")) && length(AMR_env$mo_renamed$old) > 0 && message_not_thrown_before("as.mo", "keep_synonyms_warning", entire_session = TRUE)) {
    # keep synonyms is TRUE, so check if any do have synonyms
-    warning_("Function `as.mo()` returned ", nr2char(length(unique(AMR_env$mo_renamed$old))), " old taxonomic name", ifelse(length(unique(AMR_env$mo_renamed$old)) > 1, "s", ""), ". Use `as.mo(..., keep_synonyms = FALSE)` to clean the input to currently accepted taxonomic names, or set the R option `AMR_keep_synonyms` to `FALSE`. This warning will be shown once per session.")
+    warning_("Function `as.mo()` returned ", nr2char(length(unique(AMR_env$mo_renamed$old))), " old taxonomic name", ifelse(length(unique(AMR_env$mo_renamed$old)) > 1, "s", ""), ". Use `as.mo(..., keep_synonyms = FALSE)` to clean the input to currently accepted taxonomic names, or set the R option `AMR_keep_synonyms` to `FALSE`. This warning will be shown once per session.", call = FALSE)
  }

  # Apply Becker ----
@@ -403,7 +409,7 @@ as.mo <- function(x,
        warning_("in `as.mo()`: Becker ", font_italic("et al."), " (2014, 2019, 2020) does not contain these species named after their publication: ",
          vector_and(font_italic(gsub("Staphylococcus", "S.", post_Becker, fixed = TRUE), collapse = NULL), quotes = FALSE),
          ". Categorisation to CoNS/CoPS was taken from the original scientific publication(s).",
-          immediate = TRUE
+          immediate = TRUE, call = FALSE
        )
      }
    }
@@ -442,7 +448,7 @@ as.mo <- function(x,
  out[is.na(out) & !is.na(x)] <- "UNKNOWN"
  AMR_env$mo_failures <- unique(x[out == "UNKNOWN" & !x %in% c("UNKNOWN", "con") & !x %like_case% "^[(]unknown [a-z]+[)]$" & !is.na(x)])
  if (length(AMR_env$mo_failures) > 0) {
-    warning_("The following input could not be coerced and was returned as \"UNKNOWN\": ", vector_and(AMR_env$mo_failures, quotes = TRUE), ".\nYou can retrieve this list with `mo_failures()`.")
+    warning_("The following input could not be coerced and was returned as \"UNKNOWN\": ", vector_and(AMR_env$mo_failures, quotes = TRUE), ".\nYou can retrieve this list with `mo_failures()`.", call = FALSE)
  }

  # Return class ----
@@ -542,10 +548,11 @@ pillar_shaft.mo <- function(x, ...) {
    mo_cols <- NULL
  }

-  if (!all(x %in% c(AMR::microorganisms$mo, NA)) ||
-    (!is.null(df) && !all(unlist(df[, which(mo_cols), drop = FALSE]) %in% AMR::microorganisms$mo))) {
+  all_mos <- c(AMR::microorganisms$mo, NA)
+  if (!all(x %in% all_mos) ||
+    (!is.null(df) && !all(unlist(df[, which(mo_cols), drop = FALSE]) %in% all_mos))) {
    # markup old mo codes
-    out[!x %in% AMR::microorganisms$mo] <- font_italic(font_na(x[!x %in% AMR::microorganisms$mo],
+    out[!x %in% all_mos] <- font_italic(font_na(x[!x %in% all_mos],
      collapse = NULL
    ),
    collapse = NULL
@@ -558,7 +565,7 @@ pillar_shaft.mo <- function(x, ...) {
    }
    warning_(
      col, " contains old MO codes (from a previous AMR package version). ",
-      "Please update your MO codes with `as.mo()`."
+      "Please update your MO codes with `as.mo()`.", call = FALSE
    )
  }

@@ -645,7 +652,7 @@ print.mo <- function(x, print.shortnames = FALSE, ...) {
  if (!all(x %in% c(AMR::microorganisms$mo, NA))) {
    warning_(
      "Some MO codes are from a previous AMR package version. ",
-      "Please update the MO codes with `as.mo()`."
+      "Please update the MO codes with `as.mo()`.", call = FALSE
    )
  }
  print.default(x, quote = FALSE)
@@ -911,11 +918,12 @@ convert_colloquial_input <- function(x) {
  # CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese)
  out[x %like_case% "([ck]oagulas[ea].negatie?[vf]|^[ck]o?ns[^a-z]*$)"] <- "B_STPHY_CONS"
  out[x %like_case% "([ck]oagulas[ea].positie?[vf]|^[ck]o?ps[^a-z]*$)"] <- "B_STPHY_COPS"
-
+  
  # Gram stains
-  out[x %like_case% "gram[ -]?neg.*|negatie?[vf]"] <- "B_GRAMN"
-  out[x %like_case% "gram[ -]?pos.*|positie?[vf]"] <- "B_GRAMP"
-
+  out[x %like_case% "gram[ -]?neg.*"] <- "B_GRAMN"
+  out[x %like_case% "gram[ -]?pos.*"] <- "B_GRAMP"
+  out[is.na(out) & x %like_case% "anaerob[a-z]+ (micro)?.*organism"] <- "B_ANAER"
+  
  # yeasts and fungi
  out[x %like_case% "^yeast?"] <- "F_YEAST"
  out[x %like_case% "^fung(us|i)"] <- "F_FUNGUS"
@@ -932,6 +940,10 @@ convert_colloquial_input <- function(x) {
  # unexisting names (xxx and con are WHONET codes)
  out[x %in% c("con", "other", "none", "unknown") | x %like_case% "virus"] <- "UNKNOWN"

+  # WHONET has a lot of E. coli and Vibrio cholerae names
+  out[x %like_case% "escherichia coli"] <- "B_ESCHR_COLI"
+  out[x %like_case% "vibrio cholerae"] <- "B_VIBRI_CHLR"
+
  out
 }

--- a/R/rsi.R
+++ b/R/rsi.R
@@ -100,6 +100,12 @@
 #' @aliases rsi
 #' @export
 #' @seealso [as.mic()], [as.disk()], [as.mo()]
+#' @source
+#' For interpretations of minimum inhibitory concentration (MIC) values and disk diffusion diameters:
+#' 
+#' - **M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data**, `r min(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "CLSI")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "CLSI")$guideline)))`, *Clinical and Laboratory Standards Institute* (CLSI). <https://clsi.org/standards/products/microbiology/documents/m39/>.
+#' - **M100 Performance Standard for Antimicrobial Susceptibility Testing**, `r min(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "CLSI")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "CLSI")$guideline)))`, *Clinical and Laboratory Standards Institute* (CLSI). <https://clsi.org/standards/products/microbiology/documents/m100/>.
+#' - **Breakpoint tables for interpretation of MICs and zone diameters**, `r min(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "EUCAST")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "EUCAST")$guideline)))`, *European Committee on Antimicrobial Susceptibility Testing* (EUCAST). <https://www.eucast.org/clinical_breakpoints>.
 #' @inheritSection AMR Reference Data Publicly Available
 #' @examples
 #' example_isolates
@@ -332,11 +338,13 @@ as.rsi.default <- function(x, ...) {
    # set to capitals
    x <- toupper(x)
    x <- gsub("[^A-Z]+", "", x, perl = TRUE)
+    # CLSI uses SDD for "susceptible dose-dependent"
+    x <- gsub("SDD", "I", x, fixed = TRUE)
    # some labs now report "H" instead of "I" to not interfere with EUCAST prior to 2019
    x <- gsub("H", "I", x, fixed = TRUE)
-    # and MIPS uses D for Dose-dependent (which is I, but it will throw a note)
+    # MIPS uses D for Dose-dependent (which is I, but it will throw a note)
    x <- gsub("D", "I", x, fixed = TRUE)
-    # and MIPS uses U for "susceptible urine"
+    # MIPS uses U for "susceptible urine"
    x <- gsub("U", "S", x, fixed = TRUE)
    # in cases of "S;S" keep S, but in case of "S;I" make it NA
    x <- gsub("^S+$", "S", x)
@@ -368,6 +376,9 @@ as.rsi.default <- function(x, ...) {
      if (any(toupper(x.bak[!is.na(x.bak)]) == "D") && message_not_thrown_before("as.rsi", "D")) {
        warning_("in `as.rsi()`: 'D' (dose-dependent) was interpreted as 'I', following some laboratory systems")
      }
+      if (any(toupper(x.bak[!is.na(x.bak)]) == "SDD") && message_not_thrown_before("as.rsi", "SDD")) {
+        warning_("in `as.rsi()`: 'SDD' (susceptible dose-dependent, coined by CLSI) was interpreted as 'I' to comply with EUCAST's 'I'")
+      }
      if (any(toupper(x.bak[!is.na(x.bak)]) == "H") && message_not_thrown_before("as.rsi", "H")) {
        warning_("in `as.rsi()`: 'H' was interpreted as 'I', following some laboratory systems")
      }
@@ -875,9 +886,17 @@ as_rsi_method <- function(method_short,
        pm_filter(uti == FALSE) %pm>% # 'uti' is a column in rsi_translation
        pm_arrange(rank_index)
    }
-
+    
+    records_same_mo <- get_record[get_record$mo == get_record[1, "mo", drop = TRUE], , drop = FALSE]
+    if (message_not_thrown_before("as.rsi", "site", records_same_mo$mo[1]) && nrow(records_same_mo) > 1 && length(unique(records_same_mo$site)) > 1) {
+      warning_("in `as.rsi()`: assuming site '",
+               get_record[1L, "site", drop = FALSE], "' for ",
+               font_italic(suppressMessages(suppressWarnings(mo_name(records_same_mo$mo[1], language = NULL, keep_synonyms = FALSE)))),
+               call = FALSE)
+      rise_warning <- TRUE
+    }
    get_record <- get_record[1L, , drop = FALSE]
-
+    
    if (NROW(get_record) > 0) {
      if (is.na(x[i]) | (is.na(get_record$breakpoint_S) & is.na(get_record$breakpoint_R))) {
        new_rsi[i] <- NA_character_
--- a/R/sysdata.rda
+++ b/R/sysdata.rda
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -68,6 +68,7 @@ AMR_env$rsi_interpretation_history <- data.frame(
 )
 AMR_env$has_data.table <- pkg_is_available("data.table", also_load = FALSE)
 AMR_env$custom_ab_codes <- character(0)
+AMR_env$is_dark_theme <- tryCatch(isTRUE(getExportedValue("getThemeInfo", ns = asNamespace("rstudioapi"))()$dark), error = function(e) FALSE)

 # determine info icon for messages
 utf8_supported <- isTRUE(base::l10n_info()$`UTF-8`)