(v1.7.1.9070) Better WHONET support

2025-07-08 11:11:54 +02:00 · 2021-12-13 10:18:28 +01:00
parent f90e27c1b0
commit 578e7dfee9
74 changed files with 20758 additions and 47402 deletions
--- a/R/aa_helper_functions.R
+++ b/R/aa_helper_functions.R
@ -1096,14 +1096,30 @@ progress_ticker <- function(n = 1, n_min = 0, print = TRUE, ...) {
    }
    set_clean_class(pb, new_class = "txtProgressBar")
  } else if (n >= n_min) {
-    pb <- utils::txtProgressBar(max = n, style = 3)
-    pb$tick <- function() {
-      pb$up(pb$getVal() + 1)
+    # rely on the progress package if it is available - it has a more verbose output
+    progress_bar <- import_fn("progress_bar", "progress", error_on_fail = FALSE)
+    if (!is.null(progress_bar)) {
+      # so we use progress::progress_bar
+      # a close() method was also added, see below this function
+      pb <- progress_bar$new(format = "[:bar] :percent (:current/:total)",
+                             total = n)
+    } else {
+      pb <- utils::txtProgressBar(max = n, style = 3)
+      pb$tick <- function() {
+        pb$up(pb$getVal() + 1)
+      }
    }
    pb
  }
 }

+#' @method close progress_bar
+#' @export
+#' @noRd
+close.progress_bar <- function(con, ...) {
+  con$terminate()
+}
+
 set_clean_class <- function(x, new_class) {
  # return the object with only the new class and no additional attributes where possible
  if (is.null(x)) {
--- a/R/ab.R
+++ b/R/ab.R
@ -155,7 +155,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
                                                x[known_codes_atc],
                                                function(x_) which(vapply(FUN.VALUE = logical(1),
                                                                          AB_lookup$atc,
-                                                                          function(atc) x_ %in% atc)),
+                                                                          function(atc) x_ %in% atc))[1L],
                                                USE.NAMES = FALSE)]
  x_new[known_codes_cid] <- AB_lookup$ab[match(x[known_codes_cid], AB_lookup$cid)]
  already_known <- known_names | known_codes_ab | known_codes_atc | known_codes_cid
--- a/R/ab_selectors.R
+++ b/R/ab_selectors.R
@ -478,7 +478,7 @@ ab_select_exec <- function(function_name,
                               sort = FALSE, fn = function_name)
  # untreatable drugs
  if (only_treatable == TRUE) {
-    untreatable <- antibiotics[which(antibiotics$name %like% "-high|EDTA|polysorbate|macromethod|screening"), "ab", drop = TRUE]
+    untreatable <- antibiotics[which(antibiotics$name %like% "-high|EDTA|polysorbate|macromethod|screening|/nacubactam"), "ab", drop = TRUE]
    if (any(untreatable %in% names(ab_in_data))) {
      if (message_not_thrown_before(function_name, "ab_class", "untreatable", entire_session = TRUE)) {
        warning_("Some agents in `", function_name, "()` were ignored since they cannot be used for treating patients: ",
--- a/R/data.R
+++ b/R/data.R
@ -246,13 +246,14 @@
 #' - `method`\cr Either `r vector_or(rsi_translation$method)`
 #' - `site`\cr Body site, e.g. "Oral" or "Respiratory"
 #' - `mo`\cr Microbial ID, see [as.mo()]
+#' - `rank_index`\cr Taxonomic rank index of `mo` from 1 (subspecies/infraspecies) to 5 (unknown microorganism)
 #' - `ab`\cr Antibiotic ID, see [as.ab()]
 #' - `ref_tbl`\cr Info about where the guideline rule can be found
 #' - `disk_dose`\cr Dose of the used disk diffusion method
 #' - `breakpoint_S`\cr Lowest MIC value or highest number of millimetres that leads to "S"
 #' - `breakpoint_R`\cr Highest MIC value or lowest number of millimetres that leads to "R"
 #' - `uti`\cr A [logical] value (`TRUE`/`FALSE`) to indicate whether the rule applies to a urinary tract infection (UTI)
-#' @details The repository of this `AMR` package contains a file comprising this exact data set: <https://github.com/msberends/AMR/blob/main/data-raw/rsi_translation.txt>. This file **allows for machine reading EUCAST and CLSI guidelines**, which is almost impossible with the Excel and PDF files distributed by EUCAST and CLSI. The file is updated automatically.
+#' @details The repository of this `AMR` package contains a file comprising this exact data set: <https://github.com/msberends/AMR/blob/main/data-raw/rsi_translation.txt>. This file **allows for machine reading EUCAST and CLSI guidelines**, which is almost impossible with the Excel and PDF files distributed by EUCAST and CLSI. The file is updated automatically and the `mo` and `ab` columns have been transformed to contain the full official names instead of codes.
 #' @inheritSection AMR Reference Data Publicly Available
 #' @inheritSection AMR Read more on Our Website!
 #' @seealso [intrinsic_resistant]
--- a/R/mo.R
+++ b/R/mo.R
@ -469,7 +469,7 @@ exec_as.mo <- function(x,
    x_backup_untouched <- x
    x <- strip_whitespace(x, dyslexia_mode)
    # translate 'unknown' names back to English
-    if (any(x %like% "unbekannt|onbekend|desconocid|sconosciut|iconnu|desconhecid", na.rm = TRUE)) {
+    if (any(tolower(x) %like_case% "unbekannt|onbekend|desconocid|sconosciut|iconnu|desconhecid", na.rm = TRUE)) {
      trns <- subset(TRANSLATIONS, pattern %like% "unknown")
      langs <- LANGUAGES_SUPPORTED[LANGUAGES_SUPPORTED != "en"]
      for (l in langs) {
@ -493,6 +493,11 @@ exec_as.mo <- function(x,
    x_backup[x %like_case% "^(fungus|fungi)$"] <- "(unknown fungus)" # will otherwise become the kingdom
    x_backup[x_backup_untouched == "Fungi"] <- "Fungi" # is literally the kingdom
    
+    # remove spp and species
+    x_backup <- gsub(" +(spp.?|ssp.?|sp.? |ss ?.?|subsp.?|subspecies|biovar |serovar |species)", " ", x_backup, perl = TRUE)
+    x_backup <- gsub("( spp?.?| ss |subsp.?|subspecies|biovar|serovar|species)", "", x_backup, perl = TRUE)
+    x_backup <- strip_whitespace(x_backup, dyslexia_mode)
+    
    # Fill in fullnames and MO codes directly
    known_names <- tolower(x_backup) %in% MO_lookup$fullname_lower
    x[known_names] <- MO_lookup[match(tolower(x_backup)[known_names], MO_lookup$fullname_lower), property, drop = TRUE]
@ -503,23 +508,19 @@ exec_as.mo <- function(x,
                                                                     microorganisms.codes$code), "mo", drop = TRUE],
                                          MO_lookup$mo), property, drop = TRUE]
    already_known <- known_names | known_codes_mo | known_codes_lis
-
+    
    # now only continue where the right taxonomic output is not already known
    if (any(!already_known)) {
      x_known <- x[already_known]
-
-      # remove spp and species
-      x <- gsub(" +(spp.?|ssp.?|sp.? |ss ?.?|subsp.?|subspecies|biovar |serovar |species)", " ", x)
-      x <- gsub("(spp.?|subsp.?|subspecies|biovar|serovar|species)", "", x)
-      x <- gsub("^([a-z]{2,4})(spe.?)$", "\\1", x, perl = TRUE) # when ending in SPE instead of SPP and preceded by 2-4 characters
-      x <- strip_whitespace(x, dyslexia_mode)
      
+      # when ending in SPE instead of SPP and preceded by 2-4 characters
+      x <- gsub("^([a-z]{2,4})(spe.?)$", "\\1", x, perl = TRUE)
+
      x_backup_without_spp <- x
-      x_species <- paste(x, "species")
      # translate to English for supported languages of mo_property
      x <- gsub("(gruppe|groep|grupo|gruppo|groupe)", "group", x, perl = TRUE)
      # no groups and complexes as ending
-      x <- gsub("(complex|group)$", "", x, perl = TRUE)
+      x <- gsub("(complex|group|serotype|serovar|serogroup)[^a-zA-Z]*$", "", x, perl = TRUE)
      x <- gsub("(^|[^a-z])((an)?aero+b)[a-z]*", "", x, perl = TRUE)
      x <- gsub("^atyp[a-z]*", "", x, perl = TRUE)
      x <- gsub("(vergroen)[a-z]*", "viridans", x, perl = TRUE)
@ -546,12 +547,12 @@ exec_as.mo <- function(x,
      # allow characters that resemble others = dyslexia_mode ----
      if (dyslexia_mode == TRUE) {
        x <- tolower(x)
-        x <- gsub("[iy]+", "[iy]+", x)
-        x <- gsub("(c|k|q|qu|s|z|x|ks)+", "(c|k|q|qu|s|z|x|ks)+", x)
-        x <- gsub("(ph|hp|f|v)+", "(ph|hp|f|v)+", x)
-        x <- gsub("(th|ht|t)+", "(th|ht|t)+", x)
-        x <- gsub("a+", "a+", x)
-        x <- gsub("u+", "u+", x)
+        x <- gsub("[iy]+", "[iy]+", x, perl = TRUE)
+        x <- gsub("(c|k|q|qu|s|z|x|ks)+", "(c|k|q|qu|s|z|x|ks)+", x, perl = TRUE)
+        x <- gsub("(ph|hp|f|v)+", "(ph|hp|f|v)+", x, perl = TRUE)
+        x <- gsub("(th|ht|t)+", "(th|ht|t)+", x, perl = TRUE)
+        x <- gsub("a+", "a+", x, perl = TRUE)
+        x <- gsub("u+", "u+", x, perl = TRUE)
        # allow any ending of -um, -us, -ium, -icum, -ius, -icus, -ica, -ia and -a (needs perl for the negative backward lookup):
        x <- gsub("(u\\+\\(c\\|k\\|q\\|qu\\+\\|s\\|z\\|x\\|ks\\)\\+)(?![a-z])",
                  "(u[s|m]|[iy][ck]?u[ms]|[iy]?[ck]?a)", x, perl = TRUE)
@ -561,9 +562,9 @@ exec_as.mo <- function(x,
                  "(u[s|m]|[iy][ck]?u[ms]|[iy]?[ck]?a)", x, perl = TRUE)
        x <- gsub("(\\[iy\\]\\+a\\+)(?![a-z])",
                  "([iy]*a+|[iy]+a*)", x, perl = TRUE)
-        x <- gsub("e+", "e+", x)
-        x <- gsub("o+", "o+", x)
-        x <- gsub("(.)\\1+", "\\1+", x)
+        x <- gsub("e+", "e+", x, perl = TRUE)
+        x <- gsub("o+", "o+", x, perl = TRUE)
+        x <- gsub("(.)\\1+", "\\1+", x, perl = TRUE)
        # allow multiplication of all other consonants
        x <- gsub("([bdgjlnrw]+)", "\\1+", x, perl = TRUE)
        # allow ending in -en or -us
@ -575,6 +576,8 @@ exec_as.mo <- function(x,
        # allow au and ou after all above regex implementations
        x <- gsub("a+[bcdfghjklmnpqrstvwxyz]?u+[bcdfghjklmnpqrstvwxyz]?", "(a+u+|o+u+)[bcdfghjklmnpqrstvwxyz]?", x, fixed = TRUE)
        x <- gsub("o+[bcdfghjklmnpqrstvwxyz]?u+[bcdfghjklmnpqrstvwxyz]?", "(a+u+|o+u+)[bcdfghjklmnpqrstvwxyz]?", x, fixed = TRUE)
+        # correct for a forgotten Latin ae instead of e
+        x <- gsub("e+", "a*e+", x, fixed = TRUE)
      }
      x <- strip_whitespace(x, dyslexia_mode)
      # make sure to remove regex overkill (will lead to errors)
@ -582,10 +585,9 @@ exec_as.mo <- function(x,
      x <- gsub("?+", "?", x, fixed = TRUE)
      
      x_trimmed <- x
-      x_trimmed_species <- paste(x_trimmed, "species")
      x_trimmed_without_group <- gsub(" gro.u.p$", "", x_trimmed, perl = TRUE)
      # remove last part from "-" or "/"
-      x_trimmed_without_group <- gsub("(.*)[-/].*", "\\1", x_trimmed_without_group)
+      x_trimmed_without_group <- gsub("(.*)[-/].*", "\\1", x_trimmed_without_group, perl = TRUE)
      # replace space and dot by regex sign
      x_withspaces <- gsub("[ .]+", ".* ", x, perl = TRUE)
      x <- gsub("[ .]+", ".*", x, perl = TRUE)
@ -598,14 +600,12 @@ exec_as.mo <- function(x,
      
      if (isTRUE(debug)) {
        cat(paste0(font_blue("x"), '                       "', x, '"\n'))
-        cat(paste0(font_blue("x_species"), '               "', x_species, '"\n'))
        cat(paste0(font_blue("x_withspaces_start_only"), ' "', x_withspaces_start_only, '"\n'))
        cat(paste0(font_blue("x_withspaces_end_only"), '   "', x_withspaces_end_only, '"\n'))
        cat(paste0(font_blue("x_withspaces_start_end"), '  "', x_withspaces_start_end, '"\n'))
        cat(paste0(font_blue("x_backup"), '                "', x_backup, '"\n'))
        cat(paste0(font_blue("x_backup_without_spp"), '    "', x_backup_without_spp, '"\n'))
        cat(paste0(font_blue("x_trimmed"), '               "', x_trimmed, '"\n'))
-        cat(paste0(font_blue("x_trimmed_species"), '       "', x_trimmed_species, '"\n'))
        cat(paste0(font_blue("x_trimmed_without_group"), ' "', x_trimmed_without_group, '"\n'))
      }
      
@ -914,20 +914,12 @@ exec_as.mo <- function(x,
                                         d.x_withspaces_start_end,
                                         e.x_withspaces_start_only,
                                         f.x_withspaces_end_only,
-                                         g.x_backup_without_spp,
-                                         h.x_species,
-                                         i.x_trimmed_species) {
+                                         g.x_backup_without_spp) {
          
          # FIRST TRY FULLNAMES AND CODES ----
          # if only genus is available, return only genus
          
          if (all(c(x[i], b.x_trimmed) %unlike_case% " ")) {
-            found <- lookup(fullname_lower %in% c(h.x_species, i.x_trimmed_species),
-                            haystack = data_to_check)
-            if (!is.na(found)) {
-              x[i] <- found[1L]
-              return(x[i])
-            }
            if (nchar(g.x_backup_without_spp) >= 6) {
              found <- lookup(fullname_lower %like_case% paste0("^", unregex(g.x_backup_without_spp), "[a-z]+"),
                              haystack = data_to_check)
@ -1425,14 +1417,11 @@ exec_as.mo <- function(x,
                                     d.x_withspaces_start_end = x_withspaces_start_end[i],
                                     e.x_withspaces_start_only = x_withspaces_start_only[i],
                                     f.x_withspaces_end_only = x_withspaces_end_only[i],
-                                     g.x_backup_without_spp = x_backup_without_spp[i],
-                                     h.x_species = x_species[i],
-                                     i.x_trimmed_species = x_trimmed_species[i])
+                                     g.x_backup_without_spp = x_backup_without_spp[i])
        if (!empty_result(x[i])) {
          next
        }
        
-        
        # no results found: make them UNKNOWN ----
        x[i] <- lookup(mo == "UNKNOWN", uncertainty = -1)
        if (initial_search == TRUE) {
--- a/R/plot.R
+++ b/R/plot.R
@ -146,6 +146,7 @@ plot.mic <- function(x,
      legend_txt <- c(legend_txt, "Resistant")
      legend_col <- c(legend_col, colours_RSI[1])
    }
+    
    legend("top",
           x.intersp = 0.5,
           legend = translate_AMR(legend_txt, language = language),
--- a/R/rsi.R
+++ b/R/rsi.R
@ -836,11 +836,11 @@ exec_as.rsi <- function(method,
      get_record <- get_record %pm>% 
        # be as specific as possible (i.e. prefer species over genus):
        # pm_desc(uti) = TRUE on top and FALSE on bottom
-        pm_arrange(pm_desc(uti), pm_desc(nchar(mo))) # 'uti' is a column in data set 'rsi_translation'
+        pm_arrange(pm_desc(uti), rank_index) # 'uti' is a column in data set 'rsi_translation'
    } else {
      get_record <- get_record %pm>% 
        pm_filter(uti == FALSE) %pm>% # 'uti' is a column in rsi_translation
-        pm_arrange(pm_desc(nchar(mo)))
+        pm_arrange(rank_index)
    }
    
    get_record <- get_record[1L, , drop = FALSE]
@ -851,21 +851,16 @@ exec_as.rsi <- function(method,
      } else if (method == "mic") {
        new_rsi[i] <- quick_case_when(isTRUE(conserve_capped_values) & x[i] %like% "^<[0-9]" ~ "S",
                                      isTRUE(conserve_capped_values) & x[i] %like% "^>[0-9]" ~ "R",
-                                      # start interpreting: EUCAST uses <= S and > R, CLSI uses <=S and >= R
+                                      # these basically call `<=.mic()` and `>=.mic()`:
                                      x[i] <= get_record$breakpoint_S ~ "S",
-                                      guideline_coerced %like% "EUCAST" & x[i] > get_record$breakpoint_R ~ "R",
-                                      guideline_coerced %like% "CLSI" & x[i] >= get_record$breakpoint_R ~ "R",
+                                      x[i] >= get_record$breakpoint_R ~ "R",
                                      # return "I" when not match the bottom or top
                                      !is.na(get_record$breakpoint_S) & !is.na(get_record$breakpoint_R) ~ "I",
                                      # and NA otherwise
                                      TRUE ~ NA_character_)
      } else if (method == "disk") {
        new_rsi[i] <- quick_case_when(isTRUE(as.double(x[i]) >= as.double(get_record$breakpoint_S)) ~ "S",
-                                      # start interpreting: EUCAST uses >= S and < R, CLSI uses >=S and <= R
-                                      guideline_coerced %like% "EUCAST" &
-                                        isTRUE(as.double(x[i]) < as.double(get_record$breakpoint_R)) ~ "R",
-                                      guideline_coerced %like% "CLSI" &
-                                        isTRUE(as.double(x[i]) <= as.double(get_record$breakpoint_R)) ~ "R",
+                                      isTRUE(as.double(x[i]) <= as.double(get_record$breakpoint_R)) ~ "R",
                                      # return "I" when not match the bottom or top
                                      !is.na(get_record$breakpoint_S) & !is.na(get_record$breakpoint_R) ~ "I",
                                      # and NA otherwise
--- a/R/sysdata.rda
+++ b/R/sysdata.rda