(v1.3.0.9002) intrinsic_resistant data set

2025-08-25 13:42:10 +02:00 · 2020-08-14 13:36:10 +02:00
parent 7d16bec21f
commit 08d62bb5d5
111 changed files with 50487 additions and 525 deletions
--- a/R/aa_helper_functions.R
+++ b/R/aa_helper_functions.R
@@ -48,6 +48,37 @@ distinct.default <- function(.data, ..., .keep_all = FALSE) {
 distinct.grouped_data <- function(.data, ..., .keep_all = FALSE) {
  apply_grouped_function(.data, "distinct", ..., .keep_all = .keep_all)
 }
+# faster implementation of left_join than using base::merge() by poorman - we use base::match():
+left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) {
+  if (is.null(by)) {
+    by <- intersect(names(x), names(y))[1L]
+    if (is.na(by)) {
+      stop_("no common column found for left_join()")
+    }
+    join_message(by)
+  } else if (!is.null(names(by))) {
+    by <- unname(c(names(by), by))
+  }
+  if (length(by) == 1) {
+    by <- rep(by, 2)
+  }
+  requires_suffix <- any(colnames(x) %in% colnames(y))
+  if (requires_suffix == TRUE) {
+    int_x <- colnames(x) %in% colnames(y) & colnames(x) != by[1]
+    int_y <- colnames(y) %in% colnames(x) & colnames(y) != by[2]
+    
+    colnames(x)[int_x] <- paste0(colnames(x)[int_x], suffix[1L])
+    colnames(y)[int_y] <- paste0(colnames(y)[int_y], suffix[2L])
+  }
+  merged <- cbind(x,
+                  y[match(x[, by[1], drop = TRUE],
+                          y[, by[2], drop = TRUE]),
+                    colnames(y)[!colnames(y) %in% colnames(x) & !colnames(y) == by[2]],
+                    drop = FALSE])
+  
+  rownames(merged) <- NULL
+  merged
+}
 filter_join_worker <- function(x, y, by = NULL, type = c("anti", "semi")) {
  type <- match.arg(type, choices = c("anti", "semi"), several.ok = FALSE)
  if (is.null(by)) {
@@ -92,9 +123,10 @@ check_dataset_integrity <- function() {
                               "synonyms", "oral_ddd", "oral_units", 
                               "iv_ddd", "iv_units", "loinc") %in% colnames(antibiotics),
                             na.rm = TRUE)
-  }, error = function(e)
-    stop_('please use the command \'library("AMR")\' before using this function, to load the required reference data.', call = FALSE)
-  )
+  }, error = function(e) {
+    # package not yet loaded
+    require("AMR")
+  })
  invisible(TRUE)
 }

--- a/R/aa_helper_functions_dplyr.R
+++ b/R/aa_helper_functions_dplyr.R
@@ -248,9 +248,9 @@ inner_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) {
  join_worker(x = x, y = y, by = by, suffix = suffix, sort = FALSE)
 }

-left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) {
-  join_worker(x = x, y = y, by = by, suffix = suffix, all.x = TRUE)
-}
+# left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) {
+#   join_worker(x = x, y = y, by = by, suffix = suffix, all.x = TRUE)
+# }

 right_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) {
  join_worker(x = x, y = y, by = by, suffix = suffix, all.y = TRUE)
--- a/R/ab.R
+++ b/R/ab.R
@@ -25,6 +25,7 @@
 #' @inheritSection lifecycle Maturing lifecycle
 #' @param x character vector to determine to antibiotic ID
 #' @param flag_multiple_results logical to indicate whether a note should be printed to the console that probably more than one antibiotic code or name can be retrieved from a single input value.
+#' @param info logical to indicate whether a progress bar should be printed
 #' @param ... arguments passed on to internal functions
 #' @rdname as.ab
 #' @inheritSection WHOCC WHOCC
@@ -75,7 +76,7 @@
 #' # they use as.ab() internally:
 #' ab_name("J01FA01")    # "Erythromycin"
 #' ab_name("eryt")       # "Erythromycin"
-as.ab <- function(x, flag_multiple_results = TRUE, ...) {
+as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
  
  check_dataset_integrity()
  
@@ -131,7 +132,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
  }
  
  if (initial_search == TRUE) {
-    progress <- progress_estimated(n = length(x), n_min = 25) # start if n >= 25
+    progress <- progress_estimated(n = length(x), n_min = ifelse(isTRUE(info), 25, length(x) + 1)) # start if n >= 25
    on.exit(close(progress))
  }
  
@@ -158,6 +159,13 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
      from_text <- character(0)
    }
    
+    # exact name
+    found <- antibiotics[which(toupper(antibiotics$name) == x[i]), ]$ab
+    if (length(found) > 0) {
+      x_new[i] <- found[1L]
+      next
+    }
+    
    # exact AB code
    found <- antibiotics[which(antibiotics$ab == x[i]), ]$ab
    if (length(found) > 0) {
@@ -179,13 +187,6 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
      next
    }
    
-    # exact name
-    found <- antibiotics[which(toupper(antibiotics$name) == x[i]), ]$ab
-    if (length(found) > 0) {
-      x_new[i] <- note_if_more_than_one_found(found, i, from_text)
-      next
-    }
-    
    # exact LOINC code
    loinc_found <- unlist(lapply(antibiotics$loinc,
                                 function(s) x[i] %in% s))
--- a/R/data.R
+++ b/R/data.R
@@ -70,7 +70,7 @@
 #' European Commission Public Health PHARMACEUTICALS - COMMUNITY REGISTER: <http://ec.europa.eu/health/documents/community-register/html/atc.htm>
 #' @inheritSection WHOCC WHOCC
 #' @inheritSection AMR Read more on our website!
-#' @seealso [microorganisms]
+#' @seealso [microorganisms], [intrinsic_resistant]
 "antibiotics"

 #' @rdname antibiotics
@@ -119,7 +119,7 @@
 #'
 #' Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures, Germany, Prokaryotic Nomenclature Up-to-Date, <https://www.dsmz.de/services/online-tools/prokaryotic-nomenclature-up-to-date> and <https://lpsn.dsmz.de> (check included version with [catalogue_of_life_version()]).
 #' @inheritSection AMR Read more on our website!
-#' @seealso [as.mo()], [mo_property()], [microorganisms.codes]
+#' @seealso [as.mo()], [mo_property()], [microorganisms.codes], [intrinsic_resistant]
 "microorganisms"

 catalogue_of_life <- list(
@@ -235,4 +235,25 @@ catalogue_of_life <- list(
 #' - `uti`\cr A logical value (`TRUE`/`FALSE`) to indicate whether the rule applies to a urinary tract infection (UTI)
 #' @details The repository of this `AMR` package contains a file comprising this exact data set: <https://github.com/msberends/AMR/blob/master/data-raw/rsi_translation.txt>. This file **allows for machine reading EUCAST and CLSI guidelines**, which is almost impossible with the Excel and PDF files distributed by EUCAST and CLSI. The file is updated automatically.
 #' @inheritSection AMR Read more on our website!
+#' @seealso [intrinsic_resistant]
 "rsi_translation"
+
+#' Data set with bacterial intrinsic resistance
+#'
+#' Data set containing defined intrinsic resistance by EUCAST of all bug-drug combinations.
+#' @format A [`data.frame`] with `r format(nrow(intrinsic_resistant), big.mark = ",")` observations and `r ncol(intrinsic_resistant)` variables:
+#' - `microorganism`\cr Name of the microorganism
+#' - `antibiotic`\cr Name of the antibiotic drug
+#' @details The repository of this `AMR` package contains a file comprising this exact data set: <https://github.com/msberends/AMR/blob/master/data-raw/intrinsic_resistant.txt>. This file **allows for machine reading EUCAST guidelines about intrinsic resistance**, which is almost impossible with the Excel and PDF files distributed by EUCAST. The file is updated automatically.
+#' 
+#' This data set is based on 'EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes', version `r EUCAST_VERSION_EXPERT_RULES`.
+#' @inheritSection AMR Read more on our website!
+#' @examples
+#' if (require("dplyr")) {
+#'   intrinsic_resistant %>%
+#'     filter(antibiotic == "Vancomycin", microorganism %like% "Enterococcus") %>% 
+#'     pull(microorganism)
+#'   # [1] "Enterococcus casseliflavus" "Enterococcus gallinarum"
+#' }
+#' @seealso [intrinsic_resistant]
+"intrinsic_resistant"
--- a/R/eucast_rules.R
+++ b/R/eucast_rules.R
@@ -668,7 +668,13 @@ eucast_rules <- function(x,
  
  # Official EUCAST rules ---------------------------------------------------
  eucast_notification_shown <- FALSE
-  eucast_rules_df <- eucast_rules_file # internal data file
+  if (!is.null(list(...)$eucast_rules_df)) {
+    # this allows: eucast_rules(x, eucast_rules_df = AMR:::eucast_rules_file %>% filter(is.na(have_these_values)))
+    eucast_rules_df <- list(...)$eucast_rules_df
+  } else {
+    # otherwise internal data file, created in data-raw/internals.R
+    eucast_rules_df <- eucast_rules_file
+  }
  for (i in seq_len(nrow(eucast_rules_df))) {
    
    rule_previous <- eucast_rules_df[max(1, i - 1), "reference.rule"]
--- a/R/guess_ab_col.R
+++ b/R/guess_ab_col.R
@@ -141,7 +141,7 @@ get_column_abx <- function(x,
  x <- x[, x_columns, drop = FALSE] # without drop = TRUE, x will become a vector when x_columns is length 1
  
  df_trans <- data.frame(colnames = colnames(x),
-                         abcode = suppressWarnings(as.ab(colnames(x))))
+                         abcode = suppressWarnings(as.ab(colnames(x), info = FALSE)))
  df_trans <- df_trans[!is.na(df_trans$abcode), ]
  x <- as.character(df_trans$colnames)
  names(x) <- df_trans$abcode
@@ -150,7 +150,7 @@ get_column_abx <- function(x,
  # such as get_column_abx(example_isolates %>% rename(thisone = AMX), amox = "thisone")
  dots <- list(...)
  if (length(dots) > 0) {
-    newnames <- suppressWarnings(as.ab(names(dots)))
+    newnames <- suppressWarnings(as.ab(names(dots), info = FALSE))
    if (any(is.na(newnames))) {
      warning("Invalid antibiotic reference(s): ", toString(names(dots)[is.na(newnames)]),
              call. = FALSE, immediate. = TRUE)
--- a/R/lifecycle.R
+++ b/R/lifecycle.R
@@ -28,7 +28,7 @@
 #' @rdname lifecycle
 #' @description Functions in this `AMR` package are categorised using [the lifecycle circle of the Tidyverse as found on www.tidyverse.org/lifecycle](https://www.Tidyverse.org/lifecycle).
 #' 
-#' \if{html}{\figure{lifecycle_Tidyverse.svg}{options: height=200px style=margin-bottom:5px} \cr}
+#' \if{html}{\figure{lifecycle_tidyverse.svg}{options: height=200px style=margin-bottom:5px} \cr}
 #' This page contains a section for every lifecycle (with text borrowed from the aforementioned Tidyverse website), so they can be used in the manual pages of the functions. 
 #' @section Experimental lifecycle:
 #' \if{html}{\figure{lifecycle_experimental.svg}{options: style=margin-bottom:5px} \cr}
--- a/R/mo.R
+++ b/R/mo.R
@@ -375,22 +375,20 @@ exec_as.mo <- function(x,
    x <- data.frame(fullname_lower = tolower(x), stringsAsFactors = FALSE) %>% 
      left_join_MO_lookup(by = "fullname_lower") %>% 
      pull(property)
-    # x <- reference_data_to_use[data.table(fullname_lower = tolower(x)),
-    #                            on = "fullname_lower",
-    #                            ..property][[1]]
    
+  } else if (all(x %in% reference_data_to_use$fullname)) {
+    # we need special treatment for very prevalent full names, they are likely!
+    # e.g. as.mo("Staphylococcus aureus")
+    x <- data.frame(fullname = x, stringsAsFactors = FALSE) %>% 
+      left_join_MO_lookup(by = "fullname") %>% 
+      pull(property)
+
  } else if (all(toupper(x) %in% microorganisms.codes$code)) {
    # commonly used MO codes
    x <- data.frame(code = toupper(x), stringsAsFactors = FALSE) %>%
      left_join(microorganisms.codes, by = "code") %>%
      left_join_MO_lookup(by = "mo") %>%
      pull(property)
-    # y <- as.data.table(microorganisms.codes)[data.table(code = toupper(x)),
-    #                                               on = "code", ]
-    # 
-    # x <- reference_data_to_use[data.table(mo = y[["mo"]]),
-    #                            on = "mo",
-    #                            ..property][[1]]
    
  } else if (!all(x %in% microorganisms[, property])) {
    
--- a/R/proportion.R
+++ b/R/proportion.R
@@ -44,7 +44,7 @@
 #'
 #' The function [proportion_df()] takes any variable from `data` that has an [`rsi`] class (created with [as.rsi()]) and calculates the proportions R, I and S. It also supports grouped variables. The function [rsi_df()] works exactly like [proportion_df()], but adds the number of isolates.
 #' @section Combination therapy:
-#' When using more than one variable for `...` (= combination therapy)), use `only_all_tested` to only count isolates that are tested for all antibiotics/variables that you test them for. See this example for two antibiotics, Drug A and Drug B, about how [susceptibility()] works to calculate the %SI:
+#' When using more than one variable for `...` (= combination therapy), use `only_all_tested` to only count isolates that are tested for all antibiotics/variables that you test them for. See this example for two antibiotics, Drug A and Drug B, about how [susceptibility()] works to calculate the %SI:
 #'
 #' ```
 #' --------------------------------------------------------------------
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -30,13 +30,13 @@
 }

 .onAttach <- function(...) {
-  if (!interactive() || stats::runif(1) > 0.25 || isTRUE(as.logical(Sys.getenv("AMR_silentstart", FALSE)))) {
+  if (!interactive() || stats::runif(1) > 0.1 || isTRUE(as.logical(Sys.getenv("AMR_silentstart", FALSE)))) {
    return()
  }
  packageStartupMessage("Thank you for using the AMR package! ",
                        "If you have a minute, please anonymously fill in this short questionnaire to improve the package and its functionalities:",
                        "\nhttps://msberends.github.io/AMR/survey.html",
-                        "\n[ permanently turn this message off with: Sys.setenv(AMR_silentstart = TRUE) ]")
+                        "\n[ prevent his notice with suppressPackageStartupMessages(library(AMR)) or use Sys.setenv(AMR_silentstart = TRUE) ]")
 }

 create_MO_lookup <- function() {