(v1.5.0.9039) handle first isolates for missing antibiograms

2025-07-12 13:41:49 +02:00 · 2021-03-08 02:38:32 +01:00
parent 461793dc34
commit a7c9b4c295
21 changed files with 79 additions and 62 deletions
--- a/R/ab.R
+++ b/R/ab.R
@ -109,23 +109,23 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
  x <- toupper(x)
  x_nonNA <- x[!is.na(x)]
  
-  # if (all(x_nonNA %in% antibiotics$ab, na.rm = TRUE)) {
-  #   # all valid AB codes, but not yet right class
-  #   return(set_clean_class(x,
-  #                          new_class = c("ab", "character")))
-  # }
-  # if (all(x_nonNA %in% toupper(antibiotics$name), na.rm = TRUE)) {
-  #   # all valid AB names
-  #   out <- antibiotics$ab[match(x, toupper(antibiotics$name))]
-  #   out[is.na(x)] <- NA_character_
-  #   return(out)
-  # }
-  # if (all(x_nonNA %in% antibiotics$atc, na.rm = TRUE)) {
-  #   # all valid ATC codes
-  #   out <- antibiotics$ab[match(x, antibiotics$atc)]
-  #   out[is.na(x)] <- NA_character_
-  #   return(out)
-  # }
+  if (all(x_nonNA %in% antibiotics$ab, na.rm = TRUE)) {
+    # all valid AB codes, but not yet right class
+    return(set_clean_class(x,
+                           new_class = c("ab", "character")))
+  }
+  if (all(x_nonNA %in% toupper(antibiotics$name), na.rm = TRUE)) {
+    # all valid AB names
+    out <- antibiotics$ab[match(x, toupper(antibiotics$name))]
+    out[is.na(x)] <- NA_character_
+    return(out)
+  }
+  if (all(x_nonNA %in% antibiotics$atc, na.rm = TRUE)) {
+    # all valid ATC codes
+    out <- antibiotics$ab[match(x, antibiotics$atc)]
+    out[is.na(x)] <- NA_character_
+    return(out)
+  }
  
  # remove diacritics
  x <- iconv(x, from = "UTF-8", to = "ASCII//TRANSLIT")
--- a/R/first_isolate.R
+++ b/R/first_isolate.R
@ -37,13 +37,14 @@
 #' @param col_keyantibiotics column name of the key antibiotics to determine first (weighted) isolates, see [key_antibiotics()]. Defaults to the first column that starts with 'key' followed by 'ab' or 'antibiotics' (case insensitive). Use `col_keyantibiotics = FALSE` to prevent this.
 #' @param episode_days episode in days after which a genus/species combination will be determined as 'first isolate' again. The default of 365 days is based on the guideline by CLSI, see *Source*. 
 #' @param testcodes_exclude character vector with test codes that should be excluded (case-insensitive)
-#' @param icu_exclude logical whether ICU isolates should be excluded (rows with value `TRUE` in the column set with `col_icu`)
+#' @param icu_exclude logical to indicate whether ICU isolates should be excluded (rows with value `TRUE` in the column set with `col_icu`)
 #' @param specimen_group value in the column set with `col_specimen` to filter on
 #' @param type type to determine weighed isolates; can be `"keyantibiotics"` or `"points"`, see *Details*
-#' @param ignore_I logical to determine whether antibiotic interpretations with `"I"` will be ignored when `type = "keyantibiotics"`, see *Details*
+#' @param ignore_I logical to indicate whether antibiotic interpretations with `"I"` will be ignored when `type = "keyantibiotics"`, see *Details*
 #' @param points_threshold points until the comparison of key antibiotics will lead to inclusion of an isolate when `type = "points"`, see *Details*
 #' @param info print progress
-#' @param include_unknown logical to determine whether 'unknown' microorganisms should be included too, i.e. microbial code `"UNKNOWN"`, which defaults to `FALSE`. For WHONET users, this means that all records with organism code `"con"` (*contamination*) will be excluded at default. Isolates with a microbial ID of `NA` will always be excluded as first isolate.
+#' @param include_unknown logical to indicate whether 'unknown' microorganisms should be included too, i.e. microbial code `"UNKNOWN"`, which defaults to `FALSE`. For WHONET users, this means that all records with organism code `"con"` (*contamination*) will be excluded at default. Isolates with a microbial ID of `NA` will always be excluded as first isolate.
+#' @param include_untested_rsi logical to indicate whether also rows without antibiotic results are still eligible for becoming a first isolate. Use `include_untested_rsi = FALSE` to always return `FALSE` for such rows. This checks the data set for columns of class `<rsi>` and consequently requires transforming columns with antibiotic results using [as.rsi()] first.
 #' @param ... arguments passed on to [first_isolate()] when using [filter_first_isolate()], or arguments passed on to [key_antibiotics()] when using [filter_first_weighted_isolate()]
 #' @details 
 #' These functions are context-aware. This means that then the `x` argument can be left blank, see *Examples*.
@ -159,6 +160,7 @@ first_isolate <- function(x = NULL,
                          points_threshold = 2,
                          info = interactive(),
                          include_unknown = FALSE,
+                          include_untested_rsi = TRUE,
                          ...) {
  if (is_null_or_grouped_tbl(x)) {
    # when `x` is left blank, auto determine it (get_current_data() also contains dplyr::cur_data_all())
@ -188,6 +190,7 @@ first_isolate <- function(x = NULL,
  meet_criteria(points_threshold, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE)
  meet_criteria(info, allow_class = "logical", has_length = 1)
  meet_criteria(include_unknown, allow_class = "logical", has_length = 1)
+  meet_criteria(include_untested_rsi, allow_class = "logical", has_length = 1)
  
  # remove data.table, grouping from tibbles, etc.
  x <- as.data.frame(x, stringsAsFactors = FALSE)
@ -472,6 +475,14 @@ first_isolate <- function(x = NULL,
  }
  x[which(is.na(x$newvar_mo)), "newvar_first_isolate"] <- FALSE
  
+  # handle isolates without antibiogram
+  if (include_untested_rsi == FALSE && any(is.rsi(x))) {
+    rsi_all_NA <- which(unname(vapply(FUN.VALUE = logical(1), 
+                                      as.data.frame(t(x[, is.rsi(x), drop = FALSE])),
+                                      function(rsi_values) all(is.na(rsi_values)))))
+    x[rsi_all_NA, "newvar_first_isolate"] <- FALSE
+  }
+  
  # arrange back according to original sorting again
  x <- x[order(x$newvar_row_index), ]
  rownames(x) <- NULL
--- a/R/rsi.R
+++ b/R/rsi.R
@ -337,18 +337,14 @@ as.rsi.mic <- function(x,
  meet_criteria(reference_data, allow_class = "data.frame")
  check_reference_data(reference_data)
  
-  pkg_env$strange <- list(before = ab)
-  
  # for dplyr's across()
  cur_column_dplyr <- import_fn("cur_column", "dplyr", error_on_fail = FALSE)
-  if (!is.null(cur_column_dplyr)) {
+  if (!is.null(cur_column_dplyr) && tryCatch(is.data.frame(get_current_data("ab", 0)), error = function(e) FALSE)) {
    # try to get current column, which will only be available when in across()
    ab <- tryCatch(cur_column_dplyr(),
                   error = function(e) ab)
  }
  
-  pkg_env$strange$afteracross <- ab
-  
  # for auto-determining mo
  mo_var_found <- ""
  if (is.null(mo)) {
@ -373,7 +369,6 @@ as.rsi.mic <- function(x,
  }
  
  ab_coerced <- suppressWarnings(as.ab(ab))
-  pkg_env$strange$coerced <- ab_coerced
  mo_coerced <- suppressWarnings(as.mo(mo))
  guideline_coerced <- get_guideline(guideline, reference_data)
  if (is.na(ab_coerced)) {
@ -433,7 +428,7 @@ as.rsi.disk <- function(x,
  
  # for dplyr's across()
  cur_column_dplyr <- import_fn("cur_column", "dplyr", error_on_fail = FALSE)
-  if (!is.null(cur_column_dplyr)) {
+  if (!is.null(cur_column_dplyr) && tryCatch(is.data.frame(get_current_data("ab", 0)), error = function(e) FALSE)) {
    # try to get current column, which will only be available when in across()
    ab <- tryCatch(cur_column_dplyr(),
                   error = function(e) ab)
@ -709,9 +704,6 @@ exec_as.rsi <- function(method,
                        conserve_capped_values, 
                        add_intrinsic_resistance,
                        reference_data) {
-  pkg_env$strange$exec <- ab
-  pkg_env$strange$names <- names(pkg_env$strange)
-  
  metadata_mo <- get_mo_failures_uncertainties_renamed()
  
  x_bak <- data.frame(x_mo = paste0(x, mo), stringsAsFactors = FALSE)