(v1.3.0.9026) eucast expert rules 3.2

2025-07-08 10:31:53 +02:00 · 2020-09-24 00:30:11 +02:00
parent a1411ddafc
commit c19095a3d5
107 changed files with 48638 additions and 3953 deletions
--- a/R/aa_helper_functions.R
+++ b/R/aa_helper_functions.R
@ -87,6 +87,8 @@ check_dataset_integrity <- function() {
  data_in_pkg <- data(package = "AMR", envir = asNamespace("AMR"))$results[, "Item", drop = TRUE]
  data_in_globalenv <- ls(envir = globalenv())
  overwritten <- data_in_pkg[data_in_pkg %in% data_in_globalenv]
+  # exception for example_isolates
+  overwritten <- overwritten[overwritten != "example_isolates"]
  stop_if(length(overwritten) > 0,
          "the following data set is overwritten by your global environment and prevents the AMR package from working correctly:\n",
          paste0("'", overwritten, "'", collapse = ", "),
@ -110,7 +112,7 @@ check_dataset_integrity <- function() {
  invisible(TRUE)
 }

-search_type_in_df <- function(x, type) {
+search_type_in_df <- function(x, type, info = TRUE) {
  # try to find columns based on type
  found <- NULL
  
@ -187,7 +189,7 @@ search_type_in_df <- function(x, type) {
    }
  }
  
-  if (!is.null(found)) {
+  if (!is.null(found) & info == TRUE) {
    msg <- paste0("NOTE: Using column `", font_bold(found), "` as input for `col_", type, "`.")
    if (type %in% c("keyantibiotics", "specimen")) {
      msg <- paste(msg, "Use", font_bold(paste0("col_", type), "= FALSE"), "to prevent this.")
@ -197,6 +199,11 @@ search_type_in_df <- function(x, type) {
  found
 }

+is_possibly_regex <- function(x) {
+  sapply(strsplit(x, ""),
+         function(y) any(y %in% c("$", "(", ")", "*", "+", "-", ".", "?", "[", "]", "^", "{", "|", "}", "\\"), na.rm = TRUE))
+}
+
 stop_ifnot_installed <- function(package) {
  # no "utils::installed.packages()" since it requires non-staged install since R 3.6.0
  # https://developer.r-project.org/Blog/public/2019/02/14/staged-install/index.html
@ -259,7 +266,7 @@ stop_if <- function(expr, ..., call = TRUE) {
 }

 stop_ifnot <- function(expr, ..., call = TRUE) {
-  if (!isTRUE(expr)) {
+  if (isFALSE(expr)) {
    if (isTRUE(call)) {
      call <- -1
    }
@ -317,6 +324,18 @@ dataset_UTF8_to_ASCII <- function(df) {
  df
 }

+create_ab_documentation <- function(ab) {
+  ab_names <- ab_name(ab, language = NULL, tolower = TRUE)
+  ab <- ab[order(ab_names)]
+  ab_names <- ab_names[order(ab_names)]
+  atcs <- ab_atc(ab)
+  atcs[!is.na(atcs)] <- paste0("[", atcs[!is.na(atcs)], "](", ab_url(ab[!is.na(atcs)]), ")")
+  atcs[is.na(atcs)] <- "no ATC code"
+  out <- paste0(ab_names, " (`", ab, "`, ", atcs, ")", collapse = ", ")
+  substr(out, 1, 1) <- toupper(substr(out, 1, 1))
+  out
+}
+
 has_colour <- function() {
  # this is a base R version of crayon::has_color
  enabled <- getOption("crayon.enabled")
--- a/R/ab.R
+++ b/R/ab.R
@ -110,8 +110,14 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
  
  note_if_more_than_one_found <- function(found, index, from_text) {
    if (initial_search == TRUE & isTRUE(length(from_text) > 1)) {
-      message(font_blue(paste0("NOTE: more than one result was found for item ", index, ": ",
-                               paste0(ab_name(from_text, tolower = TRUE, initial_search = FALSE), collapse = ", "))))
+      abnames <- ab_name(from_text, tolower = TRUE, initial_search = FALSE)
+      if (ab_name(found[1L], language = NULL) %like% "clavulanic acid") {
+        abnames <- abnames[!abnames == "clavulanic acid"]
+      }
+      if (length(abnames) > 1) {
+        message(font_blue(paste0("NOTE: more than one result was found for item ", index, ": ",
+                                 paste0(abnames, collapse = ", "))))
+      }
    }
    found[1L]
  }
--- a/R/ab_class_selectors.R
+++ b/R/ab_class_selectors.R
@ -149,7 +149,7 @@ ab_selector <- function(ab_class, function_name) {
  vars_vct <- peek_vars_tidyselect(fn = function_name)
  vars_df <- data.frame(as.list(vars_vct))[0, , drop = FALSE]
  colnames(vars_df) <- vars_vct
-  ab_in_data <- suppressMessages(get_column_abx(vars_df))
+  ab_in_data <- get_column_abx(vars_df, info = FALSE)
  
  if (length(ab_in_data) == 0) {
    message(font_blue("NOTE: no antimicrobial agents found."))
--- a/R/data.R
+++ b/R/data.R
@ -254,7 +254,7 @@ catalogue_of_life <- list(
 #' - `antibiotic`\cr Name of the antibiotic drug
 #' @details The repository of this `AMR` package contains a file comprising this exact data set: <https://github.com/msberends/AMR/blob/master/data-raw/intrinsic_resistant.txt>. This file **allows for machine reading EUCAST guidelines about intrinsic resistance**, which is almost impossible with the Excel and PDF files distributed by EUCAST. The file is updated automatically.
 #' 
-#' This data set is based on 'EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes', version `r EUCAST_VERSION_EXPERT_RULES`.
+#' This data set is based on '`r EUCAST_VERSION_EXPERT_RULES[["3.2"]]$title`', `r EUCAST_VERSION_EXPERT_RULES[["3.2"]]$version_txt` from `r EUCAST_VERSION_EXPERT_RULES[["3.2"]]$year`.
 #' @inheritSection AMR Reference data publicly available
 #' @inheritSection AMR Read more on our website!
 #' @examples
--- a/R/eucast_rules.R
+++ b/R/eucast_rules.R
--- a/R/filter_ab_class.R
+++ b/R/filter_ab_class.R
@ -93,7 +93,7 @@ filter_ab_class <- function(x,
  stop_ifnot(all(scope %in% c("any", "all")), "`scope` must be one of: 'any', 'all'")
  
  # get all columns in data with names that resemble antibiotics
-  ab_in_data <- suppressMessages(get_column_abx(x))
+  ab_in_data <- get_column_abx(x, info = FALSE)
  if (length(ab_in_data) == 0) {
    message(font_blue("NOTE: no columns with class <rsi> found (see ?as.rsi), data left unchanged."))
    return(x.bak)
--- a/R/guess_ab_col.R
+++ b/R/guess_ab_col.R
@ -110,16 +110,21 @@ get_column_abx <- function(x,
                           soft_dependencies = NULL,
                           hard_dependencies = NULL,
                           verbose = FALSE,
+                           info = TRUE,
                           ...) {
  
-  message(font_blue("NOTE: Auto-guessing columns suitable for analysis"), appendLF = FALSE)
+  if (info == TRUE) {
+    message(font_blue("NOTE: Auto-guessing columns suitable for analysis"), appendLF = FALSE)
+  }
  
  x <- as.data.frame(x, stringsAsFactors = FALSE)
  if (NROW(x) > 10000) {
    # only test maximum of 10,000 values per column
-    message(font_blue(paste0(" (using only ", font_bold("the first 10,000 rows"), ")...")), appendLF = FALSE)
+    if (info == TRUE) {
+      message(font_blue(paste0(" (using only ", font_bold("the first 10,000 rows"), ")...")), appendLF = FALSE)
+    }
    x <- x[1:10000, , drop = FALSE]
-  } else {
+  } else if (info == TRUE) {
    message(font_blue("..."), appendLF = FALSE)
  }
  x_bak <- x
@ -130,8 +135,8 @@ get_column_abx <- function(x,
  vectr_antibiotics <- vectr_antibiotics[!is.na(vectr_antibiotics) & nchar(vectr_antibiotics) >= 3]
  x_columns <- sapply(colnames(x), function(col, df = x_bak) {
    if (toupper(col) %in% vectr_antibiotics | 
-        is.rsi(as.data.frame(df)[, col]) |
-        is.rsi.eligible(as.data.frame(df)[, col], threshold = 0.5)) {
+        is.rsi(as.data.frame(df)[, col, drop = TRUE]) |
+        is.rsi.eligible(as.data.frame(df)[, col, drop = TRUE], threshold = 0.5)) {
      return(col)
    } else {
      return(NA_character_)
@ -142,7 +147,7 @@ get_column_abx <- function(x,
  
  df_trans <- data.frame(colnames = colnames(x),
                         abcode = suppressWarnings(as.ab(colnames(x), info = FALSE)))
-  df_trans <- df_trans[!is.na(df_trans$abcode), ]
+  df_trans <- df_trans[!is.na(df_trans$abcode), , drop = FALSE]
  x <- as.character(df_trans$colnames)
  names(x) <- df_trans$abcode
  
@ -166,7 +171,9 @@ get_column_abx <- function(x,
  }
  
  if (length(x) == 0) {
-    message(font_blue("No columns found."))
+    if (info == TRUE) {
+      message(font_blue("No columns found."))
+    }
    return(x)
  }
  
@ -178,14 +185,16 @@ get_column_abx <- function(x,
  x <- x[order(names(x), x)]
  
  # succeeded with auto-guessing
-  message(font_blue("OK."))
+  if (info == TRUE) {
+    message(font_blue("OK."))
+  }
  
  for (i in seq_len(length(x))) {
-    if (verbose == TRUE & !names(x[i]) %in% names(duplicates)) {
+    if (info == TRUE & verbose == TRUE & !names(x[i]) %in% names(duplicates)) {
      message(font_blue(paste0("NOTE: Using column `", font_bold(x[i]), "` as input for `", names(x)[i],
                               "` (", ab_name(names(x)[i], tolower = TRUE, language = NULL), ").")))
    }
-    if (names(x[i]) %in% names(duplicates)) {
+    if (info == TRUE & names(x[i]) %in% names(duplicates)) {
      warning(font_red(paste0("Using column `", font_bold(x[i]), "` as input for `", names(x)[i],
                              "` (", ab_name(names(x)[i], tolower = TRUE, language = NULL),
                              "), although it was matched for multiple antibiotics or columns.")), 
@ -206,14 +215,19 @@ get_column_abx <- function(x,
  }
  if (!is.null(soft_dependencies)) {
    soft_dependencies <- unique(soft_dependencies)
-    if (!all(soft_dependencies %in% names(x))) {
+    if (info == TRUE & !all(soft_dependencies %in% names(x))) {
      # missing a soft dependency may lower the reliability
      missing <- soft_dependencies[!soft_dependencies %in% names(x)]
-      missing_txt <- paste(paste0(ab_name(missing, tolower = TRUE, language = NULL), 
-                                  " (", font_bold(missing, collapse = NULL), ")"), 
+      missing_msg <- paste(paste0(ab_name(missing, tolower = TRUE, language = NULL), 
+                                  " (", missing, ")"), 
                           collapse = ", ")
-      message(font_blue("NOTE: Reliability would be improved if these antimicrobial results would be available too:",
-                        missing_txt))
+      missing_msg <- paste("NOTE: Reliability would be improved if these antimicrobial results would be available too:",
+                           missing_msg)
+      wrapped <- strwrap(missing_msg,
+                         width = 0.95 * getOption("width"),
+                         exdent = 6)
+      wrapped <- gsub("\\((.*?)\\)", paste0("(", font_bold("\\1"), ")"), wrapped) # add bold abbreviations
+      message(font_blue(wrapped, collapse = "\n"))
    }
  }
  x
--- a/R/like.R
+++ b/R/like.R
@ -64,7 +64,7 @@
 #' }
 like <- function(x, pattern, ignore.case = TRUE) {
  # set to fixed if no regex found
-  fixed <- all(!grepl("[\\[$.^*?+-}{|)(]", pattern))
+  fixed <- !any(is_possibly_regex(pattern))
  if (ignore.case == TRUE) {
    # set here, otherwise if fixed = TRUE, this warning will be thrown: argument 'ignore.case = TRUE' will be ignored
    x <- tolower(x)
@ -140,3 +140,13 @@ like <- function(x, pattern, ignore.case = TRUE) {
 "%like_case%" <- function(x, pattern) {
  like(x, pattern, ignore.case = FALSE)
 }
+
+# don't export his one, it's just for convenience in eucast_rules()
+# match all Klebsiella and Raoultella, but not K. aerogenes: fullname %like_perl% "^(Klebsiella(?! aerogenes)|Raoultella)"
+"%like_perl%" <- function(x, pattern) {
+  grepl(x = tolower(x),
+        pattern = tolower(pattern),
+        perl = TRUE,
+        fixed = FALSE,
+        ignore.case = TRUE)
+}
--- a/R/mdro.R
+++ b/R/mdro.R
@ -141,7 +141,7 @@ mdro <- function(x,
  # try to find columns based on type
  # -- mo
  if (is.null(col_mo)) {
-    col_mo <- search_type_in_df(x = x, type = "mo")
+    col_mo <- search_type_in_df(x = x, type = "mo", info = info)
  }
  if (is.null(col_mo) & guideline$code == "tb") {
    message(font_blue("NOTE: No column found as input for `col_mo`,",
@ -303,7 +303,9 @@ mdro <- function(x,
                                "TCY",
                                "DOX",
                                "MNO"),
-                              verbose = verbose, ...)
+                              verbose = verbose,
+                              info = info,
+                              ...)
  } else if (guideline$code == "tb") {
    cols_ab <- get_column_abx(x = x,
                              soft_dependencies = c("CAP",
@ -314,7 +316,9 @@ mdro <- function(x,
                                                    "RIF",
                                                    "RIB",
                                                    "RFP"),
-                              verbose = verbose, ...)
+                              info = info,
+                              verbose = verbose,
+                              ...)
  } else if (guideline$code == "mrgn") {
    cols_ab <- get_column_abx(x = x,
                              soft_dependencies = c("PIP",
@ -323,9 +327,14 @@ mdro <- function(x,
                                                    "IPM",
                                                    "MEM",
                                                    "CIP"),
-                              verbose = verbose, ...)
+                              verbose = verbose,
+                              info = info,
+                              ...)
  } else {
-    cols_ab <- get_column_abx(x = x, verbose = verbose, ...)
+    cols_ab <- get_column_abx(x = x,
+                              verbose = verbose,
+                              info = info,
+                              ...)
  }
  
  AMC <- cols_ab["AMC"]
--- a/R/mo.R
+++ b/R/mo.R
@ -109,7 +109,7 @@
 #' 3. The level of uncertainty \eqn{U} needed to get to the result, as stated above (1 to 3);
 #' 4. The [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) \eqn{L} is the distance between the user input and all taxonomic full names, with the text length of the user input being the maximum distance. A modified version of the Levenshtein distance \eqn{L'} based on the text length of the full name \eqn{F} is calculated as:
 #' 
-#' \deqn{L' = F - \frac{0.5L}{F}}{L' = (F - 0.5L) / F}
+#' \deqn{L' = 1 - \frac{0.5L}{F}}{L' = 1 - ((0.5 * L) / F)}
 #'   
 #' The final matching score \eqn{M} is calculated as:
 #' \deqn{M = L' \times \frac{1}{P K U} = \frac{F - 0.5L}{F P K U}}{M = L' * (1 / (P * K * U)) = (F - 0.5L) / (F * P * K * U)}
--- a/R/mo_matching_score.R
+++ b/R/mo_matching_score.R
@ -32,7 +32,7 @@
 #' 3. The level of uncertainty \eqn{U} that is needed to get to a result (1 to 3, see [as.mo()]);
 #' 4. The [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) \eqn{L} is the distance between the user input and all taxonomic full names, with the text length of the user input being the maximum distance. A modified version of the Levenshtein distance \eqn{L'} based on the text length of the full name \eqn{F} is calculated as:
 #'  
-#' \deqn{L' = F - \frac{0.5L}{F}}{L' = (F - 0.5L) / F}
+#' \deqn{L' = 1 - \frac{0.5L}{F}}{L' = 1 - ((0.5 * L) / F)}
 #'   
 #' The final matching score \eqn{M} is calculated as:
 #' \deqn{M = L' \times \frac{1}{P K U} = \frac{F - 0.5L}{F P K U}}{M = L' * (1 / (P * K * U)) = (F - 0.5L) / (F * P * K * U)}
--- a/R/mo_source.R
+++ b/R/mo_source.R
@ -35,7 +35,7 @@
 #' 
 #' The created compressed data file `"~/.mo_source.rds"` will be used at default for MO determination (function [as.mo()] and consequently all `mo_*` functions like [mo_genus()] and [mo_gramstain()]). The location of the original file will be saved as an R option with `options(mo_source = path)`. Its timestamp will be saved with `options(mo_source_datetime = ...)`. 
 #' 
-#' The function [get_mo_source()] will return the data set by reading `"~/.mo_source.rds"` with [readRDS()]. If the original file has changed (by checking the aforementioned options `mo_source` and `mo_source_datetime`), it will call [set_mo_source()] to update the data file automatically.
+#' The function [get_mo_source()] will return the data set by reading `"~/.mo_source.rds"` with [readRDS()]. If the original file has changed (by checking the aforementioned options `mo_source` and `mo_source_datetime`), it will call [set_mo_source()] to update the data file automatically if used in an interactive session.
 #'
 #' Reading an Excel file (`.xlsx`) with only one row has a size of 8-9 kB. The compressed file created with [set_mo_source()] will then have a size of 0.1 kB and can be read by [get_mo_source()] in only a couple of microseconds (millionths of a second).
 #' 
@ -224,7 +224,7 @@ get_mo_source <- function() {
    set_mo_source("")
    return(NULL)
  }
-  if (new_time != old_time) {
+  if (interactive() && new_time != old_time) {
    # set updated source
    set_mo_source(getOption("mo_source"))
  }
--- a/R/rsi.R
+++ b/R/rsi.R
@ -187,7 +187,6 @@ is.rsi <- function(x) {
 #' @export
 is.rsi.eligible <- function(x, threshold = 0.05) {
  stop_if(NCOL(x) > 1, "`x` must be a one-dimensional vector.")
-  
  if (any(c("logical",
            "numeric",
            "integer",
@ -226,15 +225,12 @@ as.rsi.default <- function(x, ...) {
              class =  c("rsi", "ordered", "factor"))
  } else {
    
-    ab <- deparse(substitute(x))
    if (!any(x %like% "(R|S|I)", na.rm = TRUE)) {
-      if (!is.na(suppressWarnings(as.ab(ab)))) {
-        # check if they are actually MICs or disks now that the antibiotic name is valid
-        if (all_valid_mics(x)) {
-          as.rsi(as.mic(x), ab = ab, ...)
-        } else if (all_valid_disks(x)) {
-          as.rsi(as.disk(x), ab = ab, ...)
-        }
+      # check if they are actually MICs or disks now that the antibiotic name is valid
+      if (all_valid_mics(x)) {
+        warning("The input seems to be MIC values. Transform them with as.mic() before running as.rsi() to interpret them.")
+      } else if (all_valid_disks(x)) {
+        warning("The input seems to be disk diffusion values. Transform them with as.disk() before running as.rsi() to interpret them.")
      }
    }
    
--- a/R/sysdata.rda
+++ b/R/sysdata.rda