(v1.7.1.9009) fix for ab class selectors

2025-08-25 01:42:10 +02:00 · 2021-07-03 21:56:53 +02:00
parent c8491d07f8
commit 3e26929838
28 changed files with 442 additions and 356 deletions
--- a/R/aa_helper_functions.R
+++ b/R/aa_helper_functions.R
@@ -736,22 +736,22 @@ get_current_data <- function(arg_name, call) {
  }
  
  # try a manual (base R) method, by going over all underlying environments with sys.frames()
-  for (el in sys.frames()) {
-    if (!is.null(el$`.Generic`)) {
-      # don't check `".Generic" %in% names(el)`, because in R < 3.2, `names(el)` is always NULL
+  for (env in sys.frames()) {
+    if (!is.null(env$`.Generic`)) {
+      # don't check `".Generic" %in% names(env)`, because in R < 3.2, `names(env)` is always NULL
      
-      if (!is.null(el$`.data`) && is.data.frame(el$`.data`)) {
+      if (!is.null(env$`.data`) && is.data.frame(env$`.data`)) {
        # an element `.data` will be in the environment when using `dplyr::select()`
        # (but not when using `dplyr::filter()`, `dplyr::mutate()` or `dplyr::summarise()`)
-        return(structure(el$`.data`, type = "dplyr_selector"))
+        return(structure(env$`.data`, type = "dplyr_selector"))
        
-      } else if (!is.null(el$xx) && is.data.frame(el$xx)) {
+      } else if (!is.null(env$xx) && is.data.frame(env$xx)) {
        # an element `xx` will be in the environment for rows + cols, e.g. `example_isolates[c(1:3), carbapenems()]`
-        return(structure(el$xx, type = "base_R"))
+        return(structure(env$xx, type = "base_R"))
        
-      } else if (!is.null(el$x) && is.data.frame(el$x)) {
+      } else if (!is.null(env$x) && is.data.frame(env$x)) {
        # an element `x` will be in the environment for only cols, e.g. `example_isolates[, carbapenems()]`
-        return(structure(el$x, type = "base_R"))
+        return(structure(env$x, type = "base_R"))
      }
    }
  }
@@ -786,19 +786,19 @@ get_current_column <- function() {
    }
  }
  
-  # cur_column() doesn't always work (only allowed for conditions set by dplyr), but it's probably still possible:
-  frms <- lapply(sys.frames(), function(el) {
-    if ("i" %in% names(el)) {
-      if ("tibble_vars" %in% names(el)) {
+  # cur_column() doesn't always work (only allowed for certain conditions set by dplyr), but it's probably still possible:
+  frms <- lapply(sys.frames(), function(env) {
+    if (!is.null(env$i)) {
+      if (!is.null(env$tibble_vars)) {
        # for mutate_if()
-        el$tibble_vars[el$i]
+        env$tibble_vars[env$i]
      } else {
        # for mutate(across())
        df <- tryCatch(get_current_data(NA, 0), error = function(e) NULL)
        if (is.data.frame(df)) {
-          colnames(df)[el$i]
+          colnames(df)[env$i]
        } else {
-          el$i
+          env$i
        }
      }
    } else {
@@ -816,7 +816,7 @@ get_current_column <- function() {
 }

 is_null_or_grouped_tbl <- function(x) {
-  # attribute "grouped_df" might change at one point, so only set in one place; here.
+  # class "grouped_df" might change at one point, so only set in one place; here.
  is.null(x) || inherits(x, "grouped_df")
 }

@@ -825,7 +825,7 @@ unique_call_id <- function(entire_session = FALSE) {
    c(envir = "session",
      call = "session")
  } else {
-    # combination of environment ID (like "0x7fed4ee8c848")
+    # combination of environment ID (such as "0x7fed4ee8c848")
    # and highest system call
    call <- paste0(deparse(sys.calls()[[1]]), collapse = "")
    if (!interactive() || call %like% "run_test_dir|test_all|tinytest|test_package|testthat") {
--- a/R/ab_class_selectors.R
+++ b/R/ab_class_selectors.R
@@ -299,7 +299,7 @@ ab_selector <- function(function_name,
  # get the columns with a group names in the chosen ab class
  agents <- ab_in_data[names(ab_in_data) %in% abx]
    
-  if (message_not_thrown_before(function_name)) {
+  if (message_not_thrown_before(paste0(function_name, ".", paste(pkg_env$get_column_abx.out, collapse = "|")))) {
    if (length(agents) == 0) {
      message_("No antimicrobial agents of class '", ab_group, "' found", examples, ".")
    } else {
@@ -315,7 +315,7 @@ ab_selector <- function(function_name,
               ifelse(length(agents) == 1, "column: ", "columns: "),
               vector_and(agents_formatted, quotes = FALSE, sort = FALSE))
    }
-    remember_thrown_message(function_name)
+    remember_thrown_message(paste0(function_name, ".", paste(pkg_env$get_column_abx.out, collapse = "|")))
  }
  
  if (!is.null(attributes(vars_df)$type) &&
--- a/R/guess_ab_col.R
+++ b/R/guess_ab_col.R
@@ -97,16 +97,39 @@ guess_ab_col <- function(x = NULL, search_string = NULL, verbose = FALSE, only_r
 }

 get_column_abx <- function(x,
+                           ...,
                           soft_dependencies = NULL,
                           hard_dependencies = NULL,
                           verbose = FALSE,
                           info = TRUE,
                           only_rsi_columns = FALSE,
                           sort = TRUE,
-                           ...) {
+                           reuse_previous_result = TRUE) {
  
  # check if retrieved before, then get it from package environment
-  if (identical(unique_call_id(entire_session = FALSE), pkg_env$get_column_abx.call)) {
+  if (isTRUE(reuse_previous_result) && identical(unique_call_id(entire_session = FALSE), pkg_env$get_column_abx.call)) {
+    # so within the same call, within the same environment, we got here again.
+    # but we could've come from another function within the same call, so now only check the columns that changed
+    
+    # first remove the columns that are not existing anymore
+    previous <- pkg_env$get_column_abx.out
+    current <- previous[previous %in% colnames(x)]
+    
+    # then compare columns in current call with columns in original call
+    new_cols <- colnames(x)[!colnames(x) %in% pkg_env$get_column_abx.checked_cols]
+    if (length(new_cols) > 0) {
+      # these columns did not exist in the last call, so add them
+      new_cols_rsi <- get_column_abx(x[, new_cols, drop = FALSE], reuse_previous_result = FALSE, info = FALSE, sort = FALSE)
+      current <- c(current, new_cols_rsi)
+      # order according to data in current call
+      current <- current[match(colnames(x)[colnames(x) %in% current], current)]
+    }
+    
+    # update pkg environment to improve speed on next run
+    pkg_env$get_column_abx.out <- current
+    pkg_env$get_column_abx.checked_cols <- colnames(x)
+
+    # and return right values
    return(pkg_env$get_column_abx.out)
  }
  
@@ -123,6 +146,7 @@ get_column_abx <- function(x,
  }
  
  x <- as.data.frame(x, stringsAsFactors = FALSE)
+  x.bak <- x
  if (only_rsi_columns == TRUE) {
    x <- x[, which(is.rsi(x)), drop = FALSE]
  }
@@ -163,8 +187,8 @@ get_column_abx <- function(x,
                         abcode = suppressWarnings(as.ab(colnames(x), info = FALSE)),
                         stringsAsFactors = FALSE)
  df_trans <- df_trans[!is.na(df_trans$abcode), , drop = FALSE]
-  x <- as.character(df_trans$colnames)
-  names(x) <- df_trans$abcode
+  out <- as.character(df_trans$colnames)
+  names(out) <- df_trans$abcode
  
  # add from self-defined dots (...):
  # such as get_column_abx(example_isolates %>% rename(thisone = AMX), amox = "thisone")
@@ -177,33 +201,34 @@ get_column_abx <- function(x,
               immediate = TRUE)
    }
    # turn all NULLs to NAs
-    dots <- unlist(lapply(dots, function(x) if (is.null(x)) NA else x))
+    dots <- unlist(lapply(dots, function(dot) if (is.null(dot)) NA else dot))
    names(dots) <- newnames
    dots <- dots[!is.na(names(dots))]
    # merge, but overwrite automatically determined ones by 'dots'
-    x <- c(x[!x %in% dots & !names(x) %in% names(dots)], dots)
+    out <- c(out[!out %in% dots & !names(out) %in% names(dots)], dots)
    # delete NAs, this will make e.g. eucast_rules(... TMP = NULL) work to prevent TMP from being used
-    x <- x[!is.na(x)]
+    out <- out[!is.na(out)]
  }
  
-  if (length(x) == 0) {
+  if (length(out) == 0) {
    if (info == TRUE) {
      message_("No columns found.")
    }
    pkg_env$get_column_abx.call <- unique_call_id(entire_session = FALSE)
-    pkg_env$get_column_abx.out <- x
-    return(x)
+    pkg_env$get_column_abx.checked_cols <- colnames(x.bak)
+    pkg_env$get_column_abx.out <- out
+    return(out)
  }
  
  # sort on name
  if (sort == TRUE) {
-    x <- x[order(names(x), x)]
+    out <- out[order(names(out), out)]
  }
-  duplicates <- c(x[duplicated(x)], x[duplicated(names(x))]) 
+  duplicates <- c(out[duplicated(out)], out[duplicated(names(out))]) 
  duplicates <- duplicates[unique(names(duplicates))]
-  x <- c(x[!names(x) %in% names(duplicates)], duplicates)
+  out <- c(out[!names(out) %in% names(duplicates)], duplicates)
  if (sort == TRUE) {
-    x <- x[order(names(x), x)]
+    out <- out[order(names(out), out)]
  }
  
  # succeeded with auto-guessing
@@ -211,14 +236,14 @@ get_column_abx <- function(x,
    message_(" OK.", add_fn = list(font_green, font_bold), as_note = FALSE)
  }
  
-  for (i in seq_len(length(x))) {
-    if (info == TRUE & verbose == TRUE & !names(x[i]) %in% names(duplicates)) {
-      message_("Using column '", font_bold(x[i]), "' as input for ", names(x)[i],
-               " (", ab_name(names(x)[i], tolower = TRUE, language = NULL), ").")
+  for (i in seq_len(length(out))) {
+    if (info == TRUE & verbose == TRUE & !names(out[i]) %in% names(duplicates)) {
+      message_("Using column '", font_bold(out[i]), "' as input for ", names(out)[i],
+               " (", ab_name(names(out)[i], tolower = TRUE, language = NULL), ").")
    }
-    if (info == TRUE & names(x[i]) %in% names(duplicates)) {
-      warning_(paste0("Using column '", font_bold(x[i]), "' as input for ", names(x)[i],
-                      " (", ab_name(names(x)[i], tolower = TRUE, language = NULL),
+    if (info == TRUE & names(out[i]) %in% names(duplicates)) {
+      warning_(paste0("Using column '", font_bold(out[i]), "' as input for ", names(out)[i],
+                      " (", ab_name(names(out)[i], tolower = TRUE, language = NULL),
                      "), although it was matched for multiple antibiotics or columns."),
               add_fn = font_red,
               call = FALSE, 
@@ -228,18 +253,18 @@ get_column_abx <- function(x,
  
  if (!is.null(hard_dependencies)) {
    hard_dependencies <- unique(hard_dependencies)
-    if (!all(hard_dependencies %in% names(x))) {
+    if (!all(hard_dependencies %in% names(out))) {
      # missing a hard dependency will return NA and consequently the data will not be analysed
-      missing <- hard_dependencies[!hard_dependencies %in% names(x)]
+      missing <- hard_dependencies[!hard_dependencies %in% names(out)]
      generate_warning_abs_missing(missing, any = FALSE)
      return(NA)
    }
  }
  if (!is.null(soft_dependencies)) {
    soft_dependencies <- unique(soft_dependencies)
-    if (info == TRUE & !all(soft_dependencies %in% names(x))) {
+    if (info == TRUE & !all(soft_dependencies %in% names(out))) {
      # missing a soft dependency may lower the reliability
-      missing <- soft_dependencies[!soft_dependencies %in% names(x)]
+      missing <- soft_dependencies[!soft_dependencies %in% names(out)]
      missing_msg <- vector_and(paste0(ab_name(missing, tolower = TRUE, language = NULL), 
                                       " (", font_bold(missing, collapse = NULL), ")"), 
                                quotes = FALSE)
@@ -249,8 +274,9 @@ get_column_abx <- function(x,
  }
  
  pkg_env$get_column_abx.call <- unique_call_id(entire_session = FALSE)
-  pkg_env$get_column_abx.out <- x
-  x
+  pkg_env$get_column_abx.checked_cols <- colnames(x.bak)
+  pkg_env$get_column_abx.out <- out
+  out
 }

 generate_warning_abs_missing <- function(missing, any = FALSE) {
--- a/R/rsi.R
+++ b/R/rsi.R
@@ -284,10 +284,26 @@ as.rsi.default <- function(x, ...) {
      }
    }
    
-    x <- as.character(unlist(x))
+    # trim leading and trailing spaces, new lines, etc.
+    x <- trimws2(as.character(unlist(x)))
    x.bak <- x
-    
    na_before <- length(x[is.na(x) | x == ""])
+    
+    # correct for translations
+    trans_R <- unlist(TRANSLATIONS[which(TRANSLATIONS$pattern == "Resistant"),
+                                   LANGUAGES_SUPPORTED[LANGUAGES_SUPPORTED %in% colnames(TRANSLATIONS)]])
+    trans_S <- unlist(TRANSLATIONS[which(TRANSLATIONS$pattern == "Susceptible"),
+                                   LANGUAGES_SUPPORTED[LANGUAGES_SUPPORTED %in% colnames(TRANSLATIONS)]])
+    trans_I <- unlist(TRANSLATIONS[which(TRANSLATIONS$pattern %in% c("Incr. exposure", "Intermediate")),
+                                   LANGUAGES_SUPPORTED[LANGUAGES_SUPPORTED %in% colnames(TRANSLATIONS)]])
+    x <- gsub(paste0(unique(trans_R[!is.na(trans_R)]), collapse = "|"), "R", x, ignore.case = TRUE)
+    x <- gsub(paste0(unique(trans_S[!is.na(trans_S)]), collapse = "|"), "S", x, ignore.case = TRUE)
+    x <- gsub(paste0(unique(trans_I[!is.na(trans_I)]), collapse = "|"), "I", x, ignore.case = TRUE)
+    # replace all English textual input
+    x <- gsub("res(is(tant)?)?", "R", x, ignore.case = TRUE)
+    x <- gsub("sus(cep(tible)?)?", "S", x, ignore.case = TRUE)
+    x <- gsub("int(er(mediate)?)?", "I", x, ignore.case = TRUE)
+    x <- gsub("inc(r(eased)?)? exp[a-z]*", "I", x, ignore.case = TRUE)
    # remove all spaces
    x <- gsub(" +", "", x)
    # remove all MIC-like values: numbers, operators and periods
--- a/R/rsi_calc.R
+++ b/R/rsi_calc.R
@@ -27,7 +27,12 @@ dots2vars <- function(...) {
  # this function is to give more informative output about 
  # variable names in count_* and proportion_* functions
  dots <- substitute(list(...))
-  vector_and(as.character(dots)[2:length(dots)], quotes = FALSE)
+  agents <- as.character(dots)[2:length(dots)]
+  agents_formatted <- paste0("'", font_bold(agents, collapse = NULL), "'")
+  agents_names <- ab_name(agents, tolower = TRUE, language = NULL)
+  need_name <- generalise_antibiotic_name(agents) != agents_names
+  agents_formatted[need_name] <- paste0(agents_formatted[need_name], " (", agents_names[need_name], ")")
+  vector_and(agents_formatted, quotes = FALSE)
 }

 rsi_calc <- function(...,
@@ -163,8 +168,30 @@ rsi_calc <- function(...,
  if (denominator < minimum) {
    if (data_vars != "") {
      data_vars <- paste(" for", data_vars)
+      # also add group name if used in dplyr::group_by()
+      cur_group <- import_fn("cur_group", "dplyr", error_on_fail = FALSE)
+      if (!is.null(cur_group)) {
+        group_df <- tryCatch(cur_group(), error = function(e) data.frame())
+        if (NCOL(group_df) > 0) {
+          # transform factors to characters
+          group <- vapply(FUN.VALUE = character(1), group_df, function(x) {
+            if (is.numeric(x)) {
+              format(x)
+            } else if (is.logical(x)) {
+              as.character(x)
+            } else {
+              paste0('"', x, '"')
+            }
+          })
+          data_vars <- paste0(data_vars, " in group: ", paste0(names(group), " = ", group, collapse = ", "))
+        }
+      }
    }
-    warning_("Introducing NA: only ", denominator, " results available", data_vars, " (`minimum` = ", minimum, ").", call = FALSE)
+    warning_("Introducing NA: ",
+             ifelse(denominator == 0, "no", paste("only", denominator)),
+             " results available",
+             data_vars,
+             " (`minimum` = ", minimum, ").", call = FALSE)
    fraction <- NA_real_
  } else {
    fraction <- numerator / denominator
--- a/R/sysdata.rda
+++ b/R/sysdata.rda