(v1.5.0.9008) Internal data sets to pkg, speed for auto col determination

2025-08-25 07:02:10 +02:00 · 2021-01-22 10:20:41 +01:00
parent 27f084d819
commit 1ba44776a1
87 changed files with 408 additions and 292 deletions
--- a/R/aa_helper_functions.R
+++ b/R/aa_helper_functions.R
@@ -585,7 +585,7 @@ get_current_data <- function(arg_name, call) {
      stop_("argument `", arg_name, "` is missing with no default", call = call)
    }
  }
-
+  
  # try a (base R) method, by going over the complete system call stack with sys.frames()
  not_set <- TRUE
  frms <- lapply(sys.frames(), function(el) {
@@ -615,6 +615,7 @@ get_current_data <- function(arg_name, call) {
      NULL
    }
  })
+  
  vars_df <- tryCatch(frms[[which(!vapply(FUN.VALUE = logical(1), frms, is.null))]], error = function(e) NULL)
  if (is.data.frame(vars_df)) {
    return(vars_df)
--- a/R/ab_class_selectors.R
+++ b/R/ab_class_selectors.R
@@ -173,7 +173,15 @@ ab_selector <- function(ab_class, function_name) {
  }
  
  vars_df <- get_current_data(arg_name = NA, call = -3)
-  ab_in_data <- get_column_abx(vars_df, info = FALSE)
+  
+  # improve speed here so it will only run once when e.g. in one select call
+  if (!identical(pkg_env$ab_selector, unique_call_id())) {
+    ab_in_data <- get_column_abx(vars_df, info = FALSE)
+    pkg_env$ab_selector <- unique_call_id()
+    pkg_env$ab_selector_cols <- ab_in_data
+  } else {
+    ab_in_data <- pkg_env$ab_selector_cols
+  }
  
  if (length(ab_in_data) == 0) {
    message_("No antimicrobial agents found.")
@@ -199,13 +207,14 @@ ab_selector <- function(ab_class, function_name) {
    } else {
      agents_formatted <- paste0("column '", font_bold(agents, collapse = NULL), "'")
      agents_names <- ab_name(names(agents), tolower = TRUE, language = NULL)
-      agents_formatted[agents != agents_names] <- paste0(agents_formatted[agents != agents_names],
-                                                         " (", agents_names[agents != agents_names], ")")
+      need_name <- tolower(agents) != tolower(agents_names)
+      agents_formatted[need_name] <- paste0(agents_formatted[need_name],
+                                            " (", agents_names[need_name], ")")
      message_("Selecting ", ab_group, ": ", paste(agents_formatted, collapse = ", "),
               as_note = FALSE,
               extra_indent = nchar(paste0("Selecting ", ab_group, ": ")))
    }
-   remember_thrown_message(function_name)
- }
+    remember_thrown_message(function_name)
+  }
  unname(agents)
 }
--- a/R/eucast_rules.R
+++ b/R/eucast_rules.R
@@ -1049,6 +1049,7 @@ eucast_rules <- function(x,
    warn_lacking_rsi_class <- unique(warn_lacking_rsi_class)
    warning_("Not all columns with antimicrobial results are of class <rsi>. Transform them on beforehand, with e.g.:\n",
             "  ", x_deparsed, " %>% mutate_if(is.rsi.eligible, as.rsi)\n",
+             "  ", x_deparsed, " %>% mutate(across((is.rsi.eligible), as.rsi))\n",
             "  ", x_deparsed, " %>% as.rsi(", ifelse(length(warn_lacking_rsi_class) == 1, 
                                                      warn_lacking_rsi_class,
                                                      paste0(warn_lacking_rsi_class[1], ":", warn_lacking_rsi_class[length(warn_lacking_rsi_class)])), 
--- a/R/guess_ab_col.R
+++ b/R/guess_ab_col.R
@@ -137,25 +137,25 @@ get_column_abx <- function(x,
  } else if (info == TRUE) {
    message_("...", appendLF = FALSE, as_note = FALSE)
  }
-  x_bak <- x
+
  # only check columns that are a valid AB code, ATC code, name, abbreviation or synonym,
  # or already have the <rsi> class (as.rsi) 
  # and that they have no more than 50% invalid values
  vectr_antibiotics <- unique(toupper(unlist(antibiotics[, c("ab", "atc", "name", "abbreviations", "synonyms")])))
  vectr_antibiotics <- vectr_antibiotics[!is.na(vectr_antibiotics) & nchar(vectr_antibiotics) >= 3]
-  x_columns <- vapply(FUN.VALUE = character(1), colnames(x), function(col, df = x_bak) {
+  x_columns <- vapply(FUN.VALUE = character(1), colnames(x), function(col, df = x) {
    if (toupper(col) %in% vectr_antibiotics || 
-        is.rsi(as.data.frame(df, stringsAsFactors = FALSE)[, col, drop = TRUE]) ||
-        is.rsi.eligible(as.data.frame(df, stringsAsFactors = FALSE)[, col, drop = TRUE],
-                        threshold = 0.5)) {
+        is.rsi(x[, col, drop = TRUE]) ||
+        is.rsi.eligible(x[, col, drop = TRUE], threshold = 0.5)
+        ) {
      return(col)
    } else {
      return(NA_character_)
    }
  })
+  
  x_columns <- x_columns[!is.na(x_columns)]
  x <- x[, x_columns, drop = FALSE] # without drop = TRUE, x will become a vector when x_columns is length 1
-  
  df_trans <- data.frame(colnames = colnames(x),
                         abcode = suppressWarnings(as.ab(colnames(x), info = FALSE)),
                         stringsAsFactors = FALSE)
@@ -217,7 +217,6 @@ get_column_abx <- function(x,
    }
  }
  
-  
  if (!is.null(hard_dependencies)) {
    hard_dependencies <- unique(hard_dependencies)
    if (!all(hard_dependencies %in% names(x))) {
--- a/R/isolate_identifier.R
+++ b/R/isolate_identifier.R
@@ -26,9 +26,10 @@
 #' Create Identifier of an Isolate
 #' 
 #' This function will paste the microorganism code with all antimicrobial results into one string for each row in a data set. This is useful to compare isolates, e.g. between institutions or regions, when there is no genotyping available.
-#' @inheritSection lifecycle Maturing Lifecycle
+#' @inheritSection lifecycle Experimental Lifecycle
 #' @inheritParams eucast_rules
 #' @param cols_ab a character vector of column names of `x`, or (a combination with) an [antibiotic selector function]([ab_class()]), such as [carbapenems()] and [aminoglycosides()]
+#' @rdname isolate_identifier
 #' @export
 #' @inheritSection AMR Read more on Our Website!
 #' @examples 
@@ -43,7 +44,12 @@
 isolate_identifier <- function(x, col_mo = NULL, cols_ab = NULL) {
  if (is.null(col_mo)) {
    col_mo <- search_type_in_df(x, "mo")
+    if (is.null(col_mo)) {
+      # no column found, then ignore the argument
+      col_mo <- FALSE
+    }
  }
+  
  if (isFALSE(col_mo)) {
    # is FALSE then ignore mo column
    x$col_mo <- ""
@@ -60,14 +66,77 @@ isolate_identifier <- function(x, col_mo = NULL, cols_ab = NULL) {
                        # tryCatch adds 4 calls, so total is -5
                        error = function(e) stop_(e$message, call = -5))
  }
-  if (length(cols_ab) == 0) {
-    warning_("no columns with antimicrobial agents found", call = TRUE)
+  
+  # cope with empty values
+  if (length(cols_ab) == 0 && all(x[, col_mo, drop = TRUE] == "", na.rm = TRUE)) {
+    warning_("in isolate_identifier(): no column with microorganisms and no columns with antimicrobial agents found", call = FALSE)
+  } else if (length(cols_ab) == 0) {
+    warning_("in isolate_identifier(): no columns with antimicrobial agents found", call = FALSE)
  }
  
  out <- x[, c(col_mo, cols_ab), drop = FALSE]
  out <- do.call(paste, c(out, sep = ""))
  out <- gsub("NA", ".", out, fixed = TRUE)
-  set_clean_class(out, new_class = c("isolate_identifier", "character"))
+  out <- set_clean_class(out, new_class = c("isolate_identifier", "character"))
+  attr(out, "ab") <- cols_ab
+  out
+}
+
+#' @method all.equal isolate_identifier
+#' @rdname isolate_identifier
+#' @export
+all.equal.isolate_identifier <- function(target, current, ignore_empty_results = TRUE, ...) {
+  if (isTRUE(all.equal.character(target, current))) {
+    return(TRUE)
+  }
+  # vectorise over both target and current
+  if (length(target) > 1 && length(current) == 1) {
+    current <- rep(current, length(target))
+  } else if (length(current) > 1 && length(target) == 1) {
+    target <- rep(target, length(current))
+  }
+  stop_if(length(target) != length(current),
+          "length of `target` and `current` must be the same, or one must be 1")
+  
+  get_vector <- function(x) {
+    if (grepl("|", x, fixed = TRUE)) {
+      mo <- gsub("(.*)\\|.*", "\\1", x)
+    } else {
+      mo <- NULL
+    }
+    if (grepl("|", x, fixed = TRUE)) {
+      ab <- gsub(".*\\|(.*)", "\\1", x)
+    } else {
+      ab <- x
+    }
+    ab <- strsplit(ab, "")[[1L]]
+    if (is.null(mo)) {
+      out <- as.character(ab)
+      names(out) <- attributes(x)$ab
+    } else {
+      out <- as.character(c(mo, ab))
+      names(out) <- c("mo", attributes(x)$ab)
+    }
+    out
+  }
+  
+  # run it
+  for (i in seq_len(length(target))) {
+    if (i == 1) {
+      df <- data.frame(object = paste0(c("target[", "current["), i, "]"))
+    }
+    trgt <- get_vector(target[i])
+    crnt <- get_vector(current[i])
+    if (ignore_empty_results == TRUE) {
+      diff <- names(trgt[trgt != crnt & trgt != "." & crnt != "."])  
+    } else {
+      diff <- names(trgt[trgt != crnt])
+    }
+    
+  }
+  
+  stop("THIS FUNCTION IS WORK IN PROGRESS AND NOT AVAILABLE IN THIS BETA VERSION")
+  
 }

 #' @method print isolate_identifier
--- a/R/rsi.R
+++ b/R/rsi.R
@@ -49,16 +49,16 @@
 #' 2. For **interpreting minimum inhibitory concentration (MIC) values** according to EUCAST or CLSI. You must clean your MIC values first using [as.mic()], that also gives your columns the new data class [`mic`]. Also, be sure to have a column with microorganism names or codes. It will be found automatically, but can be set manually using the `mo` argument.
 #'    * Using `dplyr`, R/SI interpretation can be done very easily with either: 
 #'      ```
-#'      your_data %>% mutate_if(is.mic, as.rsi)             # until dplyr 1.0.0
-#'      your_data %>% mutate(across(where(is.mic), as.rsi)) # since dplyr 1.0.0
+#'      your_data %>% mutate_if(is.mic, as.rsi)         # until dplyr 1.0.0
+#'      your_data %>% mutate(across((is.mic), as.rsi))  # since dplyr 1.0.0
 #'      ```
 #'    * Operators like "<=" will be stripped before interpretation. When using `conserve_capped_values = TRUE`, an MIC value of e.g. ">2" will always return "R", even if the breakpoint according to the chosen guideline is ">=4". This is to prevent that capped values from raw laboratory data would not be treated conservatively. The default behaviour (`conserve_capped_values = FALSE`) considers ">2" to be lower than ">=4" and might in this case return "S" or "I".
 #'      
 #' 3. For **interpreting disk diffusion diameters** according to EUCAST or CLSI. You must clean your disk zones first using [as.disk()], that also gives your columns the new data class [`disk`]. Also, be sure to have a column with microorganism names or codes. It will be found automatically, but can be set manually using the `mo` argument.
 #'    * Using `dplyr`, R/SI interpretation can be done very easily with either: 
 #'      ```
-#'      your_data %>% mutate_if(is.disk, as.rsi)             # until dplyr 1.0.0
-#'      your_data %>% mutate(across(where(is.disk), as.rsi)) # since dplyr 1.0.0
+#'      your_data %>% mutate_if(is.disk, as.rsi)         # until dplyr 1.0.0
+#'      your_data %>% mutate(across((is.disk), as.rsi))  # since dplyr 1.0.0
 #'      ```
 #' 
 #' 4. For **interpreting a complete data set**, with automatic determination of MIC values, disk diffusion diameters, microorganism names or codes, and antimicrobial test results. This is done very simply by running `as.rsi(data)`.
@@ -133,7 +133,7 @@
 #' if (require("dplyr")) {
 #'   df %>% mutate_if(is.mic, as.rsi)
 #'   df %>% mutate_if(function(x) is.mic(x) | is.disk(x), as.rsi)
-#'   df %>% mutate(across(where(is.mic), as.rsi))
+#'   df %>% mutate(across((is.mic), as.rsi))
 #'   df %>% mutate_at(vars(AMP:TOB), as.rsi)
 #'   df %>% mutate(across(AMP:TOB, as.rsi))
 #'  
@@ -179,7 +179,7 @@
 #'     
 #'   # note: from dplyr 1.0.0 on, this will be: 
 #'   # example_isolates %>%
-#'   #   mutate(across(where(is.rsi.eligible), as.rsi))
+#'   #   mutate(across((is.rsi.eligible), as.rsi))
 #' }
 #' }
 as.rsi <- function(x, ...) {
@@ -202,14 +202,19 @@ is.rsi.eligible <- function(x, threshold = 0.05) {
            "numeric",
            "integer",
            "mo",
+            "ab",
            "Date",
-            "POSIXct",
+            "POSIXt",
            "rsi",
            "raw",
-            "hms")
+            "hms",
+            "mic",
+            "disk")
          %in% class(x))) {
    # no transformation needed
-    FALSE
+    return(FALSE)
+  } else if (!any(c("R", "S", "I") %in% x, na.rm = TRUE)) {
+    return(FALSE)
  } else {
    x <- x[!is.na(x) & !is.null(x) & !identical(x, "")]
    if (length(x) == 0) {
--- a/R/rsi_calc.R
+++ b/R/rsi_calc.R
@@ -148,7 +148,9 @@ rsi_calc <- function(...,
  
  if (print_warning == TRUE) {
    if (message_not_thrown_before("rsi_calc")) {
-      warning_("Increase speed by transforming to class <rsi> on beforehand: your_data %>% mutate_if(is.rsi.eligible, as.rsi)",
+      warning_("Increase speed by transforming to class <rsi> on beforehand:\n",
+               "  your_data %>% mutate_if(is.rsi.eligible, as.rsi)\n",
+               "  your_data %>% mutate(across((is.rsi.eligible), as.rsi))",
               call = FALSE)
      remember_thrown_message("rsi_calc")
    }
--- a/R/sysdata.rda
+++ b/R/sysdata.rda
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -28,35 +28,6 @@ pkg_env <- new.env(hash = FALSE)
 pkg_env$mo_failed <- character(0)

 .onLoad <- function(libname, pkgname) {
-  
-  assign(x = "AB_lookup",
-         value = create_AB_lookup(),
-         envir = asNamespace("AMR"))
-  
-  assign(x = "MO_lookup",
-         value = create_MO_lookup(),
-         envir = asNamespace("AMR"))
-  
-  assign(x = "MO.old_lookup",
-         value = create_MO.old_lookup(),
-         envir = asNamespace("AMR"))
-  
-  assign(x = "INTRINSIC_R",
-         value = create_intr_resistance(),
-         envir = asNamespace("AMR"))
-  
-  assign(x = "LANGUAGES_SUPPORTED",
-         value = sort(c("en", unique(translations_file$lang))),
-         envir = asNamespace("AMR"))
-  
-  assign(x = "MO_CONS",
-         value = create_species_cons_cops("CoNS"),
-         envir = asNamespace("AMR"))
-  
-  assign(x = "MO_COPS",
-         value = create_species_cons_cops("CoPS"),
-         envir = asNamespace("AMR"))
-
  # Support for tibble headers (type_sum) and tibble columns content (pillar_shaft)
  # without the need to depend on other packages. This was suggested by the 
  # developers of the vctrs package: 
@@ -102,89 +73,5 @@ pkg_env$mo_failed <- character(0)
                                  font_bold("options(AMR_silentstart = TRUE)"), "]"))
 }

-create_intr_resistance <- function() {
-  # for mo_is_intrinsic_resistant() - saves a lot of time when executed on this vector
-  paste(AMR::microorganisms[match(AMR::intrinsic_resistant$microorganism, AMR::microorganisms$fullname), "mo", drop = TRUE],
-        AMR::antibiotics[match(AMR::intrinsic_resistant$antibiotic, AMR::antibiotics$name), "ab", drop = TRUE])
-}

-create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
-  # Determination of which staphylococcal species are CoNS/CoPS according to:
-  # - Becker et al. 2014, PMID 25278577
-  # - Becker et al. 2019, PMID 30872103
-  # - Becker et al. 2020, PMID 32056452
-  # this function returns class <mo>
-  MO_staph <- AMR::microorganisms
-  MO_staph <- MO_staph[which(MO_staph$genus == "Staphylococcus"), , drop = FALSE]
-  if (type == "CoNS") {
-    MO_staph[which(MO_staph$species %in% c("coagulase-negative", "argensis", "arlettae",
-                                           "auricularis", "caeli", "capitis", "caprae", 
-                                           "carnosus", "chromogenes", "cohnii", "condimenti",
-                                           "debuckii", "devriesei", "edaphicus", "epidermidis",
-                                           "equorum", "felis", "fleurettii", "gallinarum",
-                                           "haemolyticus", "hominis", "jettensis", "kloosii",
-                                           "lentus", "lugdunensis", "massiliensis", "microti",
-                                           "muscae", "nepalensis", "pasteuri", "petrasii",
-                                           "pettenkoferi", "piscifermentans", "pseudoxylosus",
-                                           "rostri", "saccharolyticus", "saprophyticus",
-                                           "sciuri", "simulans", "stepanovicii", "succinus",
-                                           "vitulinus", "warneri", "xylosus")
-                   | (MO_staph$species == "schleiferi" & MO_staph$subspecies %in% c("schleiferi", ""))),
-             "mo", drop = TRUE]
-  } else if (type == "CoPS") {
-    MO_staph[which(MO_staph$species %in% c("coagulase-positive",
-                                           "simiae", "agnetis",
-                                           "delphini", "lutrae",
-                                           "hyicus", "intermedius",
-                                           "pseudintermedius", "pseudointermedius",
-                                           "schweitzeri", "argenteus")
-                   | (MO_staph$species == "schleiferi" & MO_staph$subspecies == "coagulans")),
-             "mo", drop = TRUE]
-  }
-}

-create_AB_lookup <- function() {
-  AB_lookup <- AMR::antibiotics
-  AB_lookup$generalised_name <- generalise_antibiotic_name(AB_lookup$name)
-  AB_lookup$generalised_synonyms <- lapply(AB_lookup$synonyms, generalise_antibiotic_name)
-  AB_lookup$generalised_abbreviations <- lapply(AB_lookup$abbreviations, generalise_antibiotic_name)
-  AB_lookup$generalised_loinc <- lapply(AB_lookup$loinc, generalise_antibiotic_name)
-  AB_lookup
-}
-
-create_MO_lookup <- function() {
-  MO_lookup <- AMR::microorganisms
-  
-  MO_lookup$kingdom_index <- NA_real_
-  MO_lookup[which(MO_lookup$kingdom == "Bacteria" | MO_lookup$mo == "UNKNOWN"), "kingdom_index"] <- 1
-  MO_lookup[which(MO_lookup$kingdom == "Fungi"), "kingdom_index"] <- 2
-  MO_lookup[which(MO_lookup$kingdom == "Protozoa"), "kingdom_index"] <- 3
-  MO_lookup[which(MO_lookup$kingdom == "Archaea"), "kingdom_index"] <- 4
-  # all the rest
-  MO_lookup[which(is.na(MO_lookup$kingdom_index)), "kingdom_index"] <- 5
-  
-  # use this paste instead of `fullname` to work with Viridans Group Streptococci, etc.
-  MO_lookup$fullname_lower <- tolower(trimws(paste(MO_lookup$genus, 
-                                                   MO_lookup$species,
-                                                   MO_lookup$subspecies)))
-  ind <- MO_lookup$genus == "" | grepl("^[(]unknown ", MO_lookup$fullname)
-  MO_lookup[ind, "fullname_lower"] <- tolower(MO_lookup[ind, "fullname"])
-  MO_lookup$fullname_lower <- trimws(gsub("[^.a-z0-9/ \\-]+", "", MO_lookup$fullname_lower, perl = TRUE))
-  
-  # add a column with only "e coli" like combinations
-  MO_lookup$g_species <- gsub("^([a-z])[a-z]+ ([a-z]+) ?.*", "\\1 \\2", MO_lookup$fullname_lower, perl = TRUE)
-  
-  # so arrange data on prevalence first, then kingdom, then full name
-  MO_lookup[order(MO_lookup$prevalence, MO_lookup$kingdom_index, MO_lookup$fullname_lower), ]
-}
-
-create_MO.old_lookup <- function() {
-  MO.old_lookup <- AMR::microorganisms.old
-  MO.old_lookup$fullname_lower <- trimws(gsub("[^.a-z0-9/ \\-]+", "", tolower(trimws(MO.old_lookup$fullname))))
-  
-  # add a column with only "e coli"-like combinations
-  MO.old_lookup$g_species <- trimws(gsub("^([a-z])[a-z]+ ([a-z]+) ?.*", "\\1 \\2", MO.old_lookup$fullname_lower))
-  
-  # so arrange data on prevalence first, then full name
-  MO.old_lookup[order(MO.old_lookup$prevalence, MO.old_lookup$fullname_lower), ]
-}