(v1.3.0.9038) prefinal 1.4.0

2025-08-25 13:42:10 +02:00 · 2020-10-04 19:26:43 +02:00
parent 3136bc54aa
commit baf510183c
89 changed files with 570 additions and 548 deletions
--- a/R/aa_helper_functions.R
+++ b/R/aa_helper_functions.R
@@ -188,8 +188,9 @@ search_type_in_df <- function(x, type, info = TRUE) {
 }

 is_possibly_regex <- function(x) {
-  sapply(strsplit(x, ""),
-         function(y) any(y %in% c("$", "(", ")", "*", "+", "-", ".", "?", "[", "]", "^", "{", "|", "}", "\\"), na.rm = TRUE))
+  tryCatch(sapply(strsplit(x, ""),
+                  function(y) any(y %in% c("$", "(", ")", "*", "+", "-", ".", "?", "[", "]", "^", "{", "|", "}", "\\"), na.rm = TRUE)),
+           error = function(e) rep(TRUE, length(x)))
 }

 stop_ifnot_installed <- function(package) {
--- a/R/eucast_rules.R
+++ b/R/eucast_rules.R
@@ -39,7 +39,7 @@ EUCAST_VERSION_EXPERT_RULES <- list("3.1" = list(version_txt = "v3.1",
 #' To improve the interpretation of the antibiogram before EUCAST rules are applied, some non-EUCAST rules can applied at default, see Details.
 #' @inheritSection lifecycle Stable lifecycle
 #' @param x data with antibiotic columns, such as `amox`, `AMX` and `AMC`
-#' @param info print progress
+#' @param info a logical to indicate whether progress should be printed to the console, defaults to only print while in interactive sessions
 #' @param rules a character vector that specifies which rules should be applied. Must be one or more of `"breakpoints"`, `"expert"`, `"other"`, `"all"`, and defaults to `c("breakpoints", "expert")`. The default value can be set to another value, e.g. using `options(AMR_eucastrules = "all")`.
 #' @param verbose a [logical] to turn Verbose mode on and off (default is off). In Verbose mode, the function does not apply rules to the data, but instead returns a data set in logbook form with extensive info about which rows and columns would be effected and in which way. Using Verbose mode takes a lot more time.
 #' @param version_breakpoints the version number to use for the EUCAST Clinical Breakpoints guideline. Currently supported: `r paste0(names(EUCAST_VERSION_BREAKPOINTS), collapse = ", ")`.
@@ -132,7 +132,7 @@ eucast_rules <- function(x,
                         ...) {
  
  x_deparsed <- deparse(substitute(x))
-  if (length(x_deparsed) > 0 || !all(x_deparsed %like% "[a-z]")) {
+  if (length(x_deparsed) > 1 || !all(x_deparsed %like% "[a-z]+")) {
    x_deparsed <- "your_data"
  }
  
@@ -188,7 +188,7 @@ eucast_rules <- function(x,
  }
  
  warned <- FALSE
-  warn_lacking_rsi_class <- FALSE
+  warn_lacking_rsi_class <- character(0)
  txt_ok <- function(n_added, n_changed, warned = FALSE) {
    if (warned == FALSE) {
      if (n_added + n_changed == 0) {
@@ -447,7 +447,7 @@ eucast_rules <- function(x,
  if (info == TRUE & NROW(x) > 10000) {
    message(font_blue("NOTE: Preparing data..."), appendLF = FALSE)
  }
-
+  
  # nolint start
  # antibiotic classes ----
  aminoglycosides <- c(AMK, DKB, GEN, ISE, KAN, NEO, NET, RST, SIS, STR, STR1, TOB)
@@ -612,9 +612,7 @@ eucast_rules <- function(x,
        # Set base to R where base + enzyme inhibitor is R
        rule_current <- paste0("Set ", ab_name_base, " (", cols_ab[ab_enzyme[i, ]$base_ab], ") = R where ",
                               ab_name_enzyme, " (", cols_ab[ab_enzyme[i, ]$ab], ") = R")
-        if (info == TRUE) {
-          cat(rule_current)
-        }
+        cat(rule_current)
        run_changes <- edit_rsi(x = x,
                                col_mo = col_mo,
                                to = "R",
@@ -629,7 +627,7 @@ eucast_rules <- function(x,
        n_changed <- n_changed + run_changes$changed
        verbose_info <- run_changes$verbose_info
        x <- run_changes$output
-        warn_lacking_rsi_class <- warn_lacking_rsi_class | run_changes$rsi_warn
+        warn_lacking_rsi_class <- c(warn_lacking_rsi_class, run_changes$rsi_warn)
        # Print number of new changes
        if (info == TRUE) {
          # print only on last one of rules in this group
@@ -659,7 +657,7 @@ eucast_rules <- function(x,
        n_changed <- n_changed + run_changes$changed
        verbose_info <- run_changes$verbose_info
        x <- run_changes$output
-        warn_lacking_rsi_class <- warn_lacking_rsi_class | run_changes$rsi_warn
+        warn_lacking_rsi_class <- c(warn_lacking_rsi_class, run_changes$rsi_warn)
        # Print number of new changes
        if (info == TRUE) {
          # print only on last one of rules in this group
@@ -691,7 +689,7 @@ eucast_rules <- function(x,
  if (any(c("all", "breakpoints") %in% rules)) {
    eucast_rules_df <- subset(eucast_rules_df,
                              !reference.rule_group %like% "breakpoint" |
-                              (reference.rule_group %like% "breakpoint" & reference.version == version_breakpoints))
+                                (reference.rule_group %like% "breakpoint" & reference.version == version_breakpoints))
  }
  if (any(c("all", "expert") %in% rules)) {
    eucast_rules_df <- subset(eucast_rules_df,
@@ -846,7 +844,7 @@ eucast_rules <- function(x,
    }
    
    cols <- get_antibiotic_columns(target_antibiotics, x)
-
+    
    # Apply rule on data ------------------------------------------------------
    # this will return the unique number of changes
    run_changes <- edit_rsi(x = x,
@@ -866,7 +864,7 @@ eucast_rules <- function(x,
    n_changed <- n_changed + run_changes$changed
    verbose_info <- run_changes$verbose_info
    x <- run_changes$output
-    warn_lacking_rsi_class <- warn_lacking_rsi_class | run_changes$rsi_warn
+    warn_lacking_rsi_class <- c(warn_lacking_rsi_class, run_changes$rsi_warn)
    # Print number of new changes ---------------------------------------------
    if (info == TRUE & rule_next != rule_current) {
      # print only on last one of rules in this group
@@ -878,8 +876,7 @@ eucast_rules <- function(x,
  }
  
  # Print overview ----------------------------------------------------------
-  if (info == TRUE) {
-    
+  if (info == TRUE | verbose == TRUE) {
    verbose_info <- x.bak %pm>%
      pm_mutate(row = pm_row_number()) %pm>%
      pm_select(`.rowid`, row) %pm>%
@@ -890,6 +887,9 @@ eucast_rules <- function(x,
      pm_filter(!is.na(new)) %pm>%
      pm_arrange(row, rule_group, rule_name, col)
    rownames(verbose_info) <- NULL
+  }
+  
+  if (info == TRUE) {
    
    if (verbose == TRUE) {
      wouldve <- "would have "
@@ -899,16 +899,16 @@ eucast_rules <- function(x,
    
    cat(paste0("\n", font_grey(strrep("-", 0.95 * options()$width)), "\n"))
    cat(paste0("The rules ", paste0(wouldve, "affected "),
-              font_bold(formatnr(pm_n_distinct(verbose_info$row)),
-                        "out of", formatnr(nrow(x.bak)),
-                        "rows"), 
-              ", making a total of ",
-              font_bold(formatnr(nrow(verbose_info)), "edits\n")))
-
-total_n_added <- verbose_info %pm>% pm_filter(is.na(old)) %pm>% nrow()
-total_n_changed <- verbose_info %pm>% pm_filter(!is.na(old)) %pm>% nrow()
-
-# print added values
+               font_bold(formatnr(pm_n_distinct(verbose_info$row)),
+                         "out of", formatnr(nrow(x.bak)),
+                         "rows"), 
+               ", making a total of ",
+               font_bold(formatnr(nrow(verbose_info)), "edits\n")))
+    
+    total_n_added <- verbose_info %pm>% pm_filter(is.na(old)) %pm>% nrow()
+    total_n_changed <- verbose_info %pm>% pm_filter(!is.na(old)) %pm>% nrow()
+    
+    # print added values
    if (total_n_added == 0) {
      colour <- cat # is function
    } else {
@@ -961,12 +961,14 @@ total_n_changed <- verbose_info %pm>% pm_filter(!is.na(old)) %pm>% nrow()
    }
  }
  
-
-  if (isTRUE(warn_lacking_rsi_class)) {
-    unique_cols <- colnames(x.bak)[colnames(x.bak) %in% verbose_info$col]
+  if (length(warn_lacking_rsi_class) > 0) {
+    warn_lacking_rsi_class <- unique(warn_lacking_rsi_class)
    warning("Not all columns with antimicrobial results are of class <rsi>. Transform them on beforehand, with e.g.:\n",
            "  ", x_deparsed, " %>% mutate_if(is.rsi.eligible, as.rsi)\n",
-            "  ", x_deparsed, " %>% as.rsi(", unique_cols[1], ":", unique_cols[length(unique_cols)], ")",
+            "  ", x_deparsed, " %>% as.rsi(", ifelse(length(warn_lacking_rsi_class) == 1, 
+                                                     warn_lacking_rsi_class,
+                                                     paste0(warn_lacking_rsi_class[1], ":", warn_lacking_rsi_class[length(warn_lacking_rsi_class)])), 
+            ")",
            call. = FALSE)
  }
  
@@ -1004,7 +1006,7 @@ edit_rsi <- function(x,
                        changed = 0,
                        output = x,
                        verbose_info = last_verbose_info,
-                        rsi_warn = FALSE)
+                        rsi_warn = character(0))
  
  txt_error <- function() {
    if (info == TRUE) cat("", font_red_bg(font_white(" ERROR ")), "\n\n") 
@@ -1019,7 +1021,7 @@ edit_rsi <- function(x,
  if (length(rows) > 0 & length(cols) > 0) {
    new_edits <- x
    if (any(!sapply(x[, cols, drop = FALSE], is.rsi), na.rm = TRUE)) {
-      track_changes$rsi_warn <- TRUE
+      track_changes$rsi_warn <- cols[!sapply(x[, cols, drop = FALSE], is.rsi)]
    }
    tryCatch(
      # insert into original table
--- a/R/mdro.R
+++ b/R/mdro.R
@@ -24,7 +24,6 @@
 #' Determine which isolates are multidrug-resistant organisms (MDRO) according to international and national guidelines.
 #' @inheritSection lifecycle Maturing lifecycle
 #' @param guideline a specific guideline to follow. When left empty, the publication by Magiorakos *et al.* (2012, Clinical Microbiology and Infection) will be followed, please see *Details*.
-#' @param info a logical to indicate whether progress should be printed to the console
 #' @inheritParams eucast_rules
 #' @param pct_required_classes minimal required percentage of antimicrobial classes that must be available per isolate, rounded down. For example, with the default guideline, 17 antimicrobial classes must be available for *S. aureus*. Setting this `pct_required_classes` argument to `0.5` (default) means that for every *S. aureus* isolate at least 8 different classes must be available. Any lower number of available classes will return `NA` for that isolate.
 #' @param combine_SI a [logical] to indicate whether all values of S and I must be merged into one, so resistance is only considered when isolates are R, not I. As this is the default behaviour of the [mdro()] function, it follows the redefinition by EUCAST about the interpretation of I (increased exposure) in 2019, see section 'Interpretation of S, I and R' below. When using `combine_SI = FALSE`, resistance is considered when isolates are R or I.
--- a/R/mo.R
+++ b/R/mo.R
@@ -157,7 +157,7 @@ as.mo <- function(x,
  
  check_dataset_integrity()
  
-  if (tryCatch(all(x %in% MO_lookup$mo, na.rm = TRUE)
+  if (tryCatch(all(x[!is.na(x)] %in% MO_lookup$mo)
               & isFALSE(Becker)
               & isFALSE(Lancefield), error = function(e) FALSE)) {
    # don't look into valid MO codes, just return them
@@ -212,7 +212,7 @@ as.mo <- function(x,
        pm_pull("mo")
    )
    
-  } else if (all(x %in% MO_lookup$mo)
+  } else if (all(x[!is.na(x)] %in% MO_lookup$mo)
             & isFALSE(Becker)
             & isFALSE(Lancefield)) {
    y <- x
@@ -1733,7 +1733,9 @@ print.mo_uncertainties <- function(x, ...) {
  if (NROW(x) == 0) {
    return(NULL)
  }
-  cat(font_blue(strwrap(c("Matching scores are based on human pathogenic prevalence and the resemblance between the input and the full taxonomic name. Please see ?mo_matching_score.")), collapse = "\n"))
+  cat(font_blue(strwrap("Matching scores are based on human pathogenic prevalence and the resemblance between the input and the full taxonomic name. Please see ?mo_matching_score.", 
+                        width = 0.98 * getOption("width")), 
+                collapse = "\n"))
  cat("\n")
  
  msg <- ""
@@ -1745,12 +1747,22 @@ print.mo_uncertainties <- function(x, ...) {
      candidates <- candidates[order(1 - scores)]
      scores_formatted <- trimws(formatC(round(scores, 3), format = "f", digits = 3))
      n_candidates <- length(candidates)
-      candidates <- paste0(font_italic(candidates, collapse = NULL), 
-                           " (", scores_formatted[order(1 - scores)], ")")
-      candidates <- paste(candidates, collapse = ", ")
+      candidates <- paste0(candidates, " (", scores_formatted[order(1 - scores)], ")", collapse = ", ")
      # align with input after arrow
-      candidates <- paste0("\n", strrep(" ", nchar(x[i, ]$input) + 6), 
-                           "Also matched", ifelse(n_candidates == 25, " (max 25)", ""), ": ", candidates)
+      candidates <- paste0("\n", 
+                           strwrap(paste0("Also matched",
+                                          ifelse(n_candidates >= 25, " (max 25)", ""), ": ", 
+                                          candidates), # this is already max 25 due to format_uncertainty_as_df()
+                                   indent = nchar(x[i, ]$input) + 6,
+                                   exdent = nchar(x[i, ]$input) + 6, 
+                                   width = 0.98 * getOption("width")),
+                           collapse = "")
+      # after strwrap, make taxonomic names italic
+      candidates <- gsub("([A-Za-z]+)", font_italic("\\1"), candidates)
+      candidates <- gsub(paste(font_italic(c("Also", "matched"), collapse = NULL), collapse = " "), 
+                         "Also matched",
+                         candidates, fixed = TRUE)
+      candidates <- gsub(font_italic("max"), "max", candidates, fixed = TRUE)
    } else {
      candidates <- ""
    }
@@ -1759,14 +1771,20 @@ print.mo_uncertainties <- function(x, ...) {
                                  3),
                            format = "f", digits = 3))
    msg <- paste(msg,
-                 paste0('"', x[i, ]$input, '" -> ',
-                        paste0(font_bold(font_italic(x[i, ]$fullname)),
-                               ifelse(!is.na(x[i, ]$renamed_to), paste(", renamed to", font_italic(x[i, ]$renamed_to)), ""),
-                               " (", x[i, ]$mo,
-                               ", matching score = ", score,
-                               ") "),
-                        candidates),
+                 paste0(
+                   strwrap(
+                     paste0('"', x[i, ]$input, '" -> ',
+                            paste0(font_bold(font_italic(x[i, ]$fullname)),
+                                   ifelse(!is.na(x[i, ]$renamed_to), paste(", renamed to", font_italic(x[i, ]$renamed_to)), ""),
+                                   " (", x[i, ]$mo,
+                                   ", matching score = ", score,
+                                   ") ")),
+                     width = 0.98 * getOption("width"),
+                     exdent = nchar(x[i, ]$input) + 6), 
+                   collapse = "\n"),
+                 candidates,
                 sep = "\n")
+    msg <- paste0(gsub("\n\n", "\n", msg), "\n\n")
  }
  cat(msg)
 }
--- a/R/mo_property.R
+++ b/R/mo_property.R
@@ -415,7 +415,7 @@ mo_validate <- function(x, property, language, ...) {
  
  check_dataset_integrity()
  
-  if (tryCatch(all(x %in% MO_lookup$mo) & length(list(...)) == 0, error = function(e) FALSE)) {
+  if (tryCatch(all(x[!is.na(x)] %in% MO_lookup$mo) & length(list(...)) == 0, error = function(e) FALSE)) {
    # special case for mo_* functions where class is already <mo>
    return(MO_lookup[match(x, MO_lookup$mo), property, drop = TRUE])
  }
--- a/R/mo_source.R
+++ b/R/mo_source.R
@@ -31,7 +31,7 @@
 #' @aliases set_mo_source get_mo_source
 #' @details The reference file can be a text file separated with commas (CSV) or tabs or pipes, an Excel file (either 'xls' or 'xlsx' format) or an R object file (extension '.rds'). To use an Excel file, you will need to have the `readxl` package installed.
 #'
-#' [set_mo_source()] will check the file for validity: it must be a [data.frame], must have a column named `"mo"` which contains values from [`microorganisms$mo`][microorganisms] and must have a reference column with your own defined values. If all tests pass, [set_mo_source()] will read the file into R and export it to `"~/.mo_source.rds"` after the user **specifically confirms and allows** that this file will be created. For this reason, this function only works in interactive sessions.
+#' [set_mo_source()] will check the file for validity: it must be a [data.frame], must have a column named `"mo"` which contains values from [`microorganisms$mo`][microorganisms] and must have a reference column with your own defined values. If all tests pass, [set_mo_source()] will read the file into R and will ask to export it to `"~/.mo_source.rds"`. The CRAN policy disallows packages to write to the file system, although '*exceptions may be allowed in interactive sessions if the package obtains confirmation from the user*'. For this reason, this function only works in interactive sessions so that the user can **specifically confirm and allow** that this file will be created. 
 #' 
 #' The created compressed data file `"~/.mo_source.rds"` will be used at default for MO determination (function [as.mo()] and consequently all `mo_*` functions like [mo_genus()] and [mo_gramstain()]). The location of the original file will be saved as an R option with `options(mo_source = path)`. Its timestamp will be saved with `options(mo_source_datetime = ...)`. 
 #'