(v2.1.1.9157) improved as.ab(), fixed knit_print of antibiogram

2026-03-07 16:41:35 +01:00 · 2025-02-26 13:27:20 +01:00
parent b10989f431
commit 195dfb4b91
20 changed files with 107 additions and 42 deletions
--- a/R/ab.R
+++ b/R/ab.R
@@ -97,11 +97,9 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
  meet_criteria(flag_multiple_results, allow_class = "logical", has_length = 1)
  meet_criteria(info, allow_class = "logical", has_length = 1)

-  if (is.ab(x)) {
-    return(x)
-  }
-  if (all(x %in% c(AMR_env$AB_lookup$ab, NA))) {
-    # all valid AB codes, but not yet right class
+  if (is.ab(x) || all(x %in% c(AMR_env$AB_lookup$ab, NA))) {
+    # all valid AB codes, but not yet right class or might have additional attributes as AMR selector
+    attributes(x) <- NULL
    return(set_clean_class(x,
      new_class = c("ab", "character")
    ))
@@ -130,9 +128,10 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {

  x <- unique(x_bak_clean) # this means that every x is in fact generalise_antibiotic_name(x)
  x_new <- rep(NA_character_, length(x))
+  x_uncertain <- character(0)
  x_unknown <- character(0)
  x_unknown_ATCs <- character(0)
-
+  
  note_if_more_than_one_found <- function(found, index, from_text) {
    if (loop_time == 1 && isTRUE(length(from_text) > 1)) {
      abnames <- ab_name(from_text, tolower = TRUE, loop_time = loop_time + 1)
@@ -176,6 +175,14 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
  x_new[known_codes_cid] <- AMR_env$AB_lookup$ab[match(x[known_codes_cid], AMR_env$AB_lookup$cid)]
  previously_coerced <- x %in% AMR_env$ab_previously_coerced$x
  x_new[previously_coerced & is.na(x_new)] <- AMR_env$ab_previously_coerced$ab[match(x[is.na(x_new) & x %in% AMR_env$ab_previously_coerced$x], AMR_env$ab_previously_coerced$x)]
+  prev <- x_bak[which(x[which(previously_coerced)] %in% x_bak_clean)]
+  if (any(previously_coerced) && isTRUE(info) && message_not_thrown_before("as.ab", prev, entire_session = TRUE)) {
+    message_(
+      "Returning previously coerced value", ifelse(length(unique(prev)) > 1, "s", ""),
+      " for ", vector_and(prev), ". Run `ab_reset_session()` to reset this. This note will be shown once per session for this input."
+    )
+  }
+
  already_known <- known_names | known_codes_ab | known_codes_atc | known_codes_cid | previously_coerced

  # fix for NAs
@@ -325,6 +332,18 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
    if (loop_time <= 2 && fast_mode == FALSE) {
      # only run on first and second try
      
+      # base on the Levensthein distance function if length >= 6
+      if (nchar(x[i]) >= 6) {
+        l_dist <- as.double(utils::adist(x[i], AMR_env$AB_lookup$generalised_name,
+                                         ignore.case = FALSE,
+                                         fixed = TRUE,
+                                         costs = c(insertions = 1, deletions = 2, substitutions = 2),
+                                         counts = FALSE))
+        x_new[i] <- AMR_env$AB_lookup$ab[order(l_dist)][1]
+        x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
+        next
+      }
+      
      # try by removing all spaces
      if (x[i] %like% " ") {
        found <- suppressWarnings(as.ab(gsub(" +", "", x[i], perl = TRUE), loop_time = loop_time + 2))
@@ -554,6 +573,8 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
      vector_and(x_unknown_ATCs), "."
    )
  }
+  
+  # Throw note about uncertainties
  x_unknown <- x_unknown[!x_unknown %in% x_unknown_ATCs]
  x_unknown <- c(
    x_unknown,
@@ -566,6 +587,28 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
      vector_and(x_unknown), "."
    )
  }
+  
+  # Throw note about uncertainties
+  if (isTRUE(info) && length(x_uncertain) > 0 && fast_mode == FALSE) {
+    if (message_not_thrown_before("as.ab", "uncertainties", x_bak)) {
+      plural <- c("", "this")
+      if (length(x_uncertain) > 1) {
+        plural <- c("s", "these uncertainties")
+      }
+      if (length(x_uncertain) <= 3) {
+        examples <- vector_and(
+          paste0(
+            '"', x_uncertain, '" (assumed ', 
+            ab_name(AMR_env$ab_previously_coerced$ab[which(AMR_env$ab_previously_coerced$x_bak %in% x_uncertain)], language = NULL, tolower = TRUE),
+            ", ", AMR_env$ab_previously_coerced$ab[which(AMR_env$ab_previously_coerced$x_bak %in% x_uncertain)], ")"),
+          quotes = FALSE)
+      } else {
+        examples <- paste0(nr2char(length(x_uncertain)), " antimicrobial", plural[1])
+      }
+      message_("Antimicrobial translation was uncertain for ", examples,
+               ". If required, use `add_custom_antimicrobials()` to add custom entries.")
+    }
+  }

  x_result <- x_new[match(x_bak_clean, x)]
  if (length(x_result) == 0) {
@@ -583,6 +626,18 @@ is.ab <- function(x) {
  inherits(x, "ab")
 }

+#' @rdname as.ab
+#' @export
+ab_reset_session <- function() {
+  if (NROW(AMR_env$ab_previously_coerced) > 0) {
+    message_("Reset ", nr2char(NROW(AMR_env$ab_previously_coerced)), " previously matched input value", ifelse(NROW(AMR_env$ab_previously_coerced) > 1, "s", ""), ".")
+    AMR_env$ab_previously_coerced <- AMR_env$ab_previously_coerced[0, , drop = FALSE]
+    AMR_env$mo_uncertainties <- AMR_env$mo_uncertainties[0, , drop = FALSE]
+  } else {
+    message_("No previously matched input values to reset.")
+  }
+}
+
 # will be exported using s3_register() in R/zzz.R
 pillar_shaft.ab <- function(x, ...) {
  out <- trimws(format(x))
@@ -606,6 +661,15 @@ type_sum.ab <- function(x, ...) {
 #' @export
 #' @noRd
 print.ab <- function(x, ...) {
+  if (!is.null(attributes(x)$amr_selector)) {
+    function_name <- attributes(x)$amr_selector
+    message_("This 'ab' vector was retrieved using `" , function_name, "()`, which should normally be used inside a `dplyr` verb or `data.frame` call, e.g.:\n",
+             "  ", AMR_env$bullet_icon, " your_data %>% select(", function_name, "())\n",
+             "  ", AMR_env$bullet_icon, " your_data %>% select(column_a, column_b, ", function_name, "())\n",
+             "  ", AMR_env$bullet_icon, " your_data %>% filter(any(", function_name, "() == \"R\"))\n",
+             "  ", AMR_env$bullet_icon, " your_data[, ", function_name, "()]\n",
+             "  ", AMR_env$bullet_icon, " your_data[, c(\"column_a\", \"column_b\", ", function_name, "())]")
+  }
  cat("Class 'ab'\n")
  print(as.character(x), quote = FALSE)
 }
@@ -692,7 +756,8 @@ generalise_antibiotic_name <- function(x) {
  # non-character, space or number should be a slash
  x <- gsub("[^A-Z0-9 -)(]", "/", x, perl = TRUE)
  # correct for 'high level' antibiotics
-  x <- gsub("([^A-Z0-9/ -]+)?(HIGH(.?LE?VE?L)?|[^A-Z0-9/]H[^A-Z0-9]?L)([^A-Z0-9 -]+)?", "-HIGH", x, perl = TRUE)
+  x <- trimws(gsub("([^A-Z0-9/ -]+)?(HIGH(.?LE?VE?L)?|[^A-Z0-9/]H[^A-Z0-9]?L)([^A-Z0-9 -]+)?", "-HIGH", x, perl = TRUE))
+  x <- trimws(gsub("^(-HIGH)(.*)", "\\2\\1", x))
  # remove part between brackets if that's followed by another string
  x <- gsub("(.*)+ [(].*[)]", "\\1", x)
  # spaces around non-characters must be removed: amox + clav -> amox/clav
--- a/R/amr_selectors.R
+++ b/R/amr_selectors.R
@@ -47,7 +47,7 @@
 #' @details
 #' These functions can be used in data set calls for selecting columns and filtering rows. They work with base \R, the Tidyverse, and `data.table`. They are heavily inspired by the [Tidyverse selection helpers][tidyselect::language] such as [`everything()`][tidyselect::everything()], but are not limited to `dplyr` verbs. Nonetheless, they are very convenient to use with `dplyr` functions such as [`select()`][dplyr::select()], [`filter()`][dplyr::filter()] and [`summarise()`][dplyr::summarise()], see *Examples*.
 #' 
-#' All selectors can also be used in `tidymodels` packages such as `recipe` and `parsnip`. See for more info [our tutorial](https://msberends.github.io/AMR/articles/AMR_with_tidymodels.html) on using these AMR functions for predictive modelling.
+#' All selectors can also be used in `tidymodels` packages such as `recipe` and `parsnip`. See for more info [our tutorial](https://msberends.github.io/AMR/articles/AMR_with_tidymodels.html) on using antimicrobial selectors for predictive modelling.
 #'
 #' All columns in the data in which these functions are called will be searched for known antimicrobial names, abbreviations, brand names, and codes (ATC, EARS-Net, WHO, etc.) according to the [antibiotics] data set. This means that a selector such as [aminoglycosides()] will pick up column names like 'gen', 'genta', 'J01GB03', 'tobra', 'Tobracin', etc.
 #'
@@ -747,16 +747,8 @@ amr_select_exec <- function(function_name,
  
  if (is.null(vars_df)) {
    # no data found, no antimicrobials, so no input. Happens if users run e.g. `aminoglycosides()` as a separate command.
-    examples <- paste0(
-      "  ", AMR_env$bullet_icon, " your_data %>% select(", function_name, "())\n",
-      "  ", AMR_env$bullet_icon, " your_data %>% select(column_a, column_b, ", function_name, "())\n",
-      "  ", AMR_env$bullet_icon, " your_data %>% filter(any(", function_name, "() == \"R\"))\n",
-      "  ", AMR_env$bullet_icon, " your_data[, ", function_name, "()]\n",
-      "  ", AMR_env$bullet_icon, " your_data[, c(\"column_a\", \"column_b\", ", function_name, "())]")
-    message_("The function `" , function_name, "()` should be used inside a `dplyr` verb or `data.frame` call, e.g.:\n",
-             examples,
-             "\n\nNow returning a vector of all possible antimicrobials that `" , function_name, "()` can select.")
-    return(sort(abx))
+    # print.ab will cover the additional printing text
+    return(structure(sort(abx), amr_selector = function_name))
  }
  
  # get the columns with a group names in the chosen ab class
--- a/R/antibiogram.R
+++ b/R/antibiogram.R
@@ -441,7 +441,7 @@ antibiogram.default <- function(x,
  x <- ascertain_sir_classes(x, "x")
  meet_criteria(wisca, allow_class = "logical", has_length = 1)
  if (isTRUE(wisca)) {
-    if (!is.null(mo_transform)) {
+    if (!is.null(mo_transform) && !missing(mo_transform)) {
      warning_("WISCA must be based on the species level as WISCA parameters are based on this. For that reason, `mo_transform` will be ignored.")
    }
    mo_transform <- function(x) suppressMessages(suppressWarnings(paste(mo_genus(x, keep_synonyms = TRUE, language = NULL), mo_species(x, keep_synonyms = TRUE, language = NULL))))
@@ -1245,10 +1245,14 @@ knit_print.antibiogram <- function(x, italicise = TRUE, na = getOption("knitr.ka
  meet_criteria(italicise, allow_class = "logical", has_length = 1)
  meet_criteria(na, allow_class = "character", has_length = 1, allow_NA = TRUE)
  
-  if (!isTRUE(attributes(x)$wisca) && isTRUE(italicise) && "mo" %in% colnames(attributes(x)$long_numeric)) {
-    # make all microorganism names italic, according to nomenclature
-    names_col <- ifelse(isTRUE(attributes(x)$has_syndromic_group), 2, 1)
-    x[[names_col]] <- italicise_taxonomy(x[[names_col]], type = "markdown")
+  add_MO_lookup_to_AMR_env()
+  
+  cols_with_mo_names <- vapply(FUN.VALUE = logical(1), x, function(x) any(x %in% AMR_env$MO_lookup$fullname, na.rm = TRUE))
+  if (any(cols_with_mo_names)) {
+    for (i in which(cols_with_mo_names)) {
+      # make all microorganism names italic, according to nomenclature
+      x[[i]] <- italicise_taxonomy(x[[i]], type = "markdown")
+    }
  }
  
  old_option <- getOption("knitr.kable.NA")
--- a/R/plotting.R
+++ b/R/plotting.R
@@ -218,12 +218,10 @@ create_scale_mic <- function(aest, keep_operators, mic_range = NULL, ...) {
    as.double(rescale_mic(x = as.double(as.mic(x)), keep_operators = keep_operators, mic_range = mic_range, as.mic = TRUE))
  }
  scale$transform_df <- function(self, df) {
-    stop_if(all(is.na(df[[aest]])),
-            "`scale_", aest, "_mic()`: All MIC values are `NA`. Check your input data.", call = FALSE)
    self$mic_values_rescaled <- rescale_mic(x = as.double(as.mic(df[[aest]])), keep_operators = keep_operators, mic_range = mic_range, as.mic = TRUE)
    # create new breaks and labels here
    lims <- range(self$mic_values_rescaled, na.rm = TRUE)
-    # support inner and outer mic_range settings (e.g., data ranges 0.5-8 and mic_range is set to 0.025-64)
+    # support inner and outer 'mic_range' settings (e.g., the data ranges 0.5-8 and 'mic_range' is set to 0.025-32)
    if (!is.null(mic_range) && !is.na(mic_range[1]) && !is.na(lims[1]) && mic_range[1] < lims[1]) {
      lims[1] <- mic_range[1]
    }