(v1.3.0.9029) eucast rules fix, unique()

2026-01-12 12:44:36 +01:00 · 2020-09-25 14:44:50 +02:00
parent 1d982a82b4
commit 9667c2994f
32 changed files with 234 additions and 160 deletions
--- a/R/ab.R
+++ b/R/ab.R
@@ -99,6 +99,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
  # remove diacritics
  x <- iconv(x, from = "UTF-8", to = "ASCII//TRANSLIT")
  x <- gsub('"', "", x, fixed = TRUE)
+  x <- gsub("(specimen|specimen date|specimen_date|spec_date)", "", x, ignore.case = TRUE, perl = TRUE)
  x_bak_clean <- x
  if (already_regex == FALSE) {
    x_bak_clean <- generalise_antibiotic_name(x_bak_clean)
@@ -212,26 +213,26 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
    }
    x_spelling <- x[i]
    if (already_regex == FALSE) {
-      x_spelling <- gsub("[IY]+", "[IY]+", x_spelling)
-      x_spelling <- gsub("(C|K|Q|QU|S|Z|X|KS)+", "(C|K|Q|QU|S|Z|X|KS)+", x_spelling)
-      x_spelling <- gsub("(PH|F|V)+", "(PH|F|V)+", x_spelling)
-      x_spelling <- gsub("(TH|T)+", "(TH|T)+", x_spelling)
-      x_spelling <- gsub("A+", "A+", x_spelling)
-      x_spelling <- gsub("E+", "E+", x_spelling)
-      x_spelling <- gsub("O+", "O+", x_spelling)
+      x_spelling <- gsub("[IY]+", "[IY]+", x_spelling, perl = TRUE)
+      x_spelling <- gsub("(C|K|Q|QU|S|Z|X|KS)+", "(C|K|Q|QU|S|Z|X|KS)+", x_spelling, perl = TRUE)
+      x_spelling <- gsub("(PH|F|V)+", "(PH|F|V)+", x_spelling, perl = TRUE)
+      x_spelling <- gsub("(TH|T)+", "(TH|T)+", x_spelling, perl = TRUE)
+      x_spelling <- gsub("A+", "A+", x_spelling, perl = TRUE)
+      x_spelling <- gsub("E+", "E+", x_spelling, perl = TRUE)
+      x_spelling <- gsub("O+", "O+", x_spelling, perl = TRUE)
      # allow any ending of -in/-ine and -im/-ime
-      x_spelling <- gsub("(\\[IY\\]\\+(N|M)|\\[IY\\]\\+(N|M)E\\+)$", "[IY]+(N|M)E*", x_spelling)
+      x_spelling <- gsub("(\\[IY\\]\\+(N|M)|\\[IY\\]\\+(N|M)E\\+)$", "[IY]+(N|M)E*", x_spelling, perl = TRUE)
      # allow any ending of -ol/-ole
-      x_spelling <- gsub("(O\\+L|O\\+LE\\+)$", "O+LE*", x_spelling)
+      x_spelling <- gsub("(O\\+L|O\\+LE\\+)$", "O+LE*", x_spelling, perl = TRUE)
      # allow any ending of -on/-one
-      x_spelling <- gsub("(O\\+N|O\\+NE\\+)$", "O+NE*", x_spelling)
+      x_spelling <- gsub("(O\\+N|O\\+NE\\+)$", "O+NE*", x_spelling, perl = TRUE)
      # replace multiple same characters to single one with '+', like "ll" -> "l+"
-      x_spelling <- gsub("(.)\\1+", "\\1+", x_spelling)
+      x_spelling <- gsub("(.)\\1+", "\\1+", x_spelling, perl = TRUE)
      # replace spaces and slashes with a possibility on both
-      x_spelling <- gsub("[ /]", "( .*|.*/)", x_spelling)
+      x_spelling <- gsub("[ /]", "( .*|.*/)", x_spelling, perl = TRUE)
      # correct for digital reading text (OCR)
-      x_spelling <- gsub("[NRD8B]", "[NRD8B]", x_spelling)
-      x_spelling <- gsub("(O|0)", "(O|0)+", x_spelling)
+      x_spelling <- gsub("[NRD8B]", "[NRD8B]", x_spelling, perl = TRUE)
+      x_spelling <- gsub("(O|0)", "(O|0)+", x_spelling, perl = TRUE)
      x_spelling <- gsub("++", "+", x_spelling, fixed = TRUE)
    }
    
@@ -264,7 +265,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
      
      # try by removing all spaces
      if (x[i] %like% " ") {
-        found <- suppressWarnings(as.ab(gsub(" +", "", x[i]), initial_search = FALSE))
+        found <- suppressWarnings(as.ab(gsub(" +", "", x[i], perl = TRUE), initial_search = FALSE))
        if (length(found) > 0 & !is.na(found)) {
          x_new[i] <- note_if_more_than_one_found(found, i, from_text)
          next
@@ -273,7 +274,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
      
      # try by removing all spaces and numbers
      if (x[i] %like% " " | x[i] %like% "[0-9]") {
-        found <- suppressWarnings(as.ab(gsub("[ 0-9]", "", x[i]), initial_search = FALSE))
+        found <- suppressWarnings(as.ab(gsub("[ 0-9]", "", x[i], perl = TRUE), initial_search = FALSE))
        if (length(found) > 0 & !is.na(found)) {
          x_new[i] <- note_if_more_than_one_found(found, i, from_text)
          next
@@ -318,7 +319,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
      
      # try by removing all trailing capitals
      if (x[i] %like_case% "[a-z]+[A-Z]+$") {
-        found <- suppressWarnings(as.ab(gsub("[A-Z]+$", "", x[i]), initial_search = FALSE))
+        found <- suppressWarnings(as.ab(gsub("[A-Z]+$", "", x[i], perl = TRUE), initial_search = FALSE))
        if (!is.na(found)) {
          x_new[i] <- note_if_more_than_one_found(found, i, from_text)
          next
@@ -326,7 +327,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
      }
      
      # keep only letters
-      found <- suppressWarnings(as.ab(gsub("[^A-Z]", "", x[i]), initial_search = FALSE))
+      found <- suppressWarnings(as.ab(gsub("[^A-Z]", "", x[i], perl = TRUE), initial_search = FALSE))
      if (!is.na(found)) {
        x_new[i] <- note_if_more_than_one_found(found, i, from_text)
        next
@@ -357,10 +358,10 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
      }
      
      # make all consonants facultative
-      search_str <- gsub("([BCDFGHJKLMNPQRSTVWXZ])", "\\1*", x[i])
+      search_str <- gsub("([BCDFGHJKLMNPQRSTVWXZ])", "\\1*", x[i], perl = TRUE)
      found <- suppressWarnings(as.ab(search_str, initial_search = FALSE, already_regex = TRUE))
      # keep at least 4 normal characters
-      if (nchar(gsub(".\\*", "", search_str)) < 4) {
+      if (nchar(gsub(".\\*", "", search_str, perl = TRUE)) < 4) {
        found <- NA
      }
      if (!is.na(found)) {
@@ -369,10 +370,10 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
      }
      
      # make all vowels facultative
-      search_str <- gsub("([AEIOUY])", "\\1*", x[i])
+      search_str <- gsub("([AEIOUY])", "\\1*", x[i], perl = TRUE)
      found <- suppressWarnings(as.ab(search_str, initial_search = FALSE, already_regex = TRUE))
      # keep at least 5 normal characters
-      if (nchar(gsub(".\\*", "", search_str)) < 5) {
+      if (nchar(gsub(".\\*", "", search_str, perl = TRUE)) < 5) {
        found <- NA
      }
      if (!is.na(found)) {
@@ -529,24 +530,33 @@ c.ab <- function(x, ...) {
  class_integrity_check(y, "antimicrobial code", antibiotics$ab)
 }

+#' @method unique ab
+#' @export
+#' @noRd
+unique.ab <- function(x, incomparables = FALSE, ...) {
+  y <- NextMethod()
+  attributes(y) <- attributes(x)
+  y
+}
+
 generalise_antibiotic_name <- function(x) {
  x <- toupper(x)
  # remove suffices
-  x <- gsub("_(MIC|RSI|DIS[CK])$", "", x)
+  x <- gsub("_(MIC|RSI|DIS[CK])$", "", x, perl = TRUE)
  # remove disk concentrations, like LVX_NM -> LVX
-  x <- gsub("_[A-Z]{2}[0-9_.]{0,3}$", "", x)
+  x <- gsub("_[A-Z]{2}[0-9_.]{0,3}$", "", x, perl = TRUE)
  # remove part between brackets if that's followed by another string
  x <- gsub("(.*)+ [(].*[)]", "\\1", x)
  # keep only max 1 space
-  x <- trimws2(gsub(" +", " ", x))
+  x <- trimws2(gsub(" +", " ", x, perl = TRUE))
  # non-character, space or number should be a slash
-  x <- gsub("[^A-Z0-9 -]", "/", x)
+  x <- gsub("[^A-Z0-9 -]", "/", x, perl = TRUE)
  # spaces around non-characters must be removed: amox + clav -> amox/clav
-  x <- gsub("(.*[A-Z0-9]) ([^A-Z0-9].*)", "\\1\\2", x)
-  x <- gsub("(.*[^A-Z0-9]) ([A-Z0-9].*)", "\\1\\2", x)
+  x <- gsub("(.*[A-Z0-9]) ([^A-Z0-9].*)", "\\1\\2", x, perl = TRUE)
+  x <- gsub("(.*[^A-Z0-9]) ([A-Z0-9].*)", "\\1\\2", x, perl = TRUE)
  # remove hyphen after a starting "co"
-  x <- gsub("^CO-", "CO", x)
+  x <- gsub("^CO-", "CO", x, perl = TRUE)
  # replace operators with a space
-  x <- gsub("(/| AND | WITH | W/|[+]|[-])+", " ", x)
+  x <- gsub("(/| AND | WITH | W/|[+]|[-])+", " ", x, perl = TRUE)
  x
 }
--- a/R/disk.R
+++ b/R/disk.R
@@ -177,3 +177,12 @@ c.disk <- function(x, ...) {
  attributes(y) <- attributes(x)
  y
 }
+
+#' @method unique disk
+#' @export
+#' @noRd
+unique.disk <- function(x, incomparables = FALSE, ...) {
+  y <- NextMethod()
+  attributes(y) <- attributes(x)
+  y
+}
--- a/R/eucast_rules.R
+++ b/R/eucast_rules.R
@@ -137,7 +137,7 @@ eucast_rules <- function(x,
                         ...) {
  
  x_deparsed <- deparse(substitute(x))
-  if (!x_deparsed %like% "[a-z]") {
+  if (length(x_deparsed) > 0 || !all(x_deparsed %like% "[a-z]")) {
    x_deparsed <- "your_data"
  }
  
--- a/R/mic.R
+++ b/R/mic.R
@@ -287,3 +287,12 @@ c.mic <- function(x, ...) {
  x <- as.character(x)
  as.mic(c(x, y))
 }
+
+#' @method unique mic
+#' @export
+#' @noRd
+unique.mic <- function(x, incomparables = FALSE, ...) {
+  y <- NextMethod()
+  attributes(y) <- attributes(x)
+  y
+}
--- a/R/mo.R
+++ b/R/mo.R
@@ -204,11 +204,11 @@ as.mo <- function(x,
    return(to_class_mo(x))
  }
  
-  if (tryCatch(all(tolower(x) %in% MO_lookup$fullname_lower, na.rm = TRUE)
+  if (tryCatch(all(x == "" | gsub(".*(unknown ).*", "unknown name", tolower(x), perl = TRUE) %in% MO_lookup$fullname_lower, na.rm = TRUE)
               & isFALSE(Becker)
               & isFALSE(Lancefield), error = function(e) FALSE)) {
    # to improve speed, special case for taxonomically correct full names (case-insensitive)
-    return(MO_lookup[match(tolower(x), MO_lookup$fullname_lower), "mo", drop = TRUE])
+    return(MO_lookup[match(gsub(".*(unknown ).*", "unknown name", tolower(x), perl = TRUE), MO_lookup$fullname_lower), "mo", drop = TRUE])
  }
  
  # start off with replaced language-specific non-ASCII characters with ASCII characters
@@ -481,8 +481,8 @@ exec_as.mo <- function(x,
      # also, make sure the trailing and leading characters are a-z or 0-9
      # in case of non-regex
      if (dyslexia_mode == FALSE) {
-        trimmed <- gsub("^[^a-zA-Z0-9)(]+", "", trimmed)
-        trimmed <- gsub("[^a-zA-Z0-9)(]+$", "", trimmed)
+        trimmed <- gsub("^[^a-zA-Z0-9)(]+", "", trimmed, perl = TRUE)
+        trimmed <- gsub("[^a-zA-Z0-9)(]+$", "", trimmed, perl = TRUE)
      }
      trimmed
    }
@@ -497,49 +497,49 @@ exec_as.mo <- function(x,
    x_backup[grepl("^(fungus|fungi)$", x)] <- "F_FUNGUS" # will otherwise become the kingdom
    
    # remove spp and species
-    x <- gsub(" +(spp.?|ssp.?|sp.? |ss ?.?|subsp.?|subspecies|biovar |serovar |species)", " ", x)
-    x <- gsub("(spp.?|subsp.?|subspecies|biovar|serovar|species)", "", x)
-    x <- gsub("^([a-z]{2,4})(spe.?)$", "\\1", x) # when ending in SPE instead of SPP and preceded by 2-4 characters
+    x <- gsub(" +(spp.?|ssp.?|sp.? |ss ?.?|subsp.?|subspecies|biovar |serovar |species)", " ", x, perl = TRUE)
+    x <- gsub("(spp.?|subsp.?|subspecies|biovar|serovar|species)", "", x, perl = TRUE)
+    x <- gsub("^([a-z]{2,4})(spe.?)$", "\\1", x, perl = TRUE) # when ending in SPE instead of SPP and preceded by 2-4 characters
    x <- strip_whitespace(x, dyslexia_mode)
    
    x_backup_without_spp <- x
    x_species <- paste(x, "species")
    # translate to English for supported languages of mo_property
-    x <- gsub("(gruppe|groep|grupo|gruppo|groupe)", "group", x)
+    x <- gsub("(gruppe|groep|grupo|gruppo|groupe)", "group", x, perl = TRUE)
    # no groups and complexes as ending
-    x <- gsub("(complex|group)$", "", x)
-    x <- gsub("((an)?aero+b)[a-z]*", "", x)
-    x <- gsub("^atyp[a-z]*", "", x)
-    x <- gsub("(vergroen)[a-z]*", "viridans", x)
-    x <- gsub("[a-z]*diff?erent[a-z]*", "", x)
-    x <- gsub("(hefe|gist|gisten|levadura|lievito|fermento|levure)[a-z]*", "yeast", x)
-    x <- gsub("(schimmels?|mofo|molde|stampo|moisissure|fungi)[a-z]*", "fungus", x)
-    x <- gsub("fungus[ph|f]rya", "fungiphrya", x)
+    x <- gsub("(complex|group)$", "", x, perl = TRUE)
+    x <- gsub("((an)?aero+b)[a-z]*", "", x, perl = TRUE)
+    x <- gsub("^atyp[a-z]*", "", x, perl = TRUE)
+    x <- gsub("(vergroen)[a-z]*", "viridans", x, perl = TRUE)
+    x <- gsub("[a-z]*diff?erent[a-z]*", "", x, perl = TRUE)
+    x <- gsub("(hefe|gist|gisten|levadura|lievito|fermento|levure)[a-z]*", "yeast", x, perl = TRUE)
+    x <- gsub("(schimmels?|mofo|molde|stampo|moisissure|fungi)[a-z]*", "fungus", x, perl = TRUE)
+    x <- gsub("fungus[ph|f]rya", "fungiphrya", x, perl = TRUE)
    # no contamination
-    x <- gsub("(contamination|kontamination|mengflora|contaminaci.n|contamina..o)", "", x)
+    x <- gsub("(contamination|kontamination|mengflora|contaminaci.n|contamina..o)", "", x, perl = TRUE)
    # remove non-text in case of "E. coli" except dots and spaces
-    x <- trimws(gsub("[^.a-zA-Z0-9/ \\-]+", " ", x))
+    x <- trimws(gsub("[^.a-zA-Z0-9/ \\-]+", " ", x, perl = TRUE))
    # but make sure that dots are followed by a space
-    x <- gsub("[.] ?", ". ", x)
+    x <- gsub("[.] ?", ". ", x, perl = TRUE)
    # replace minus by a space
-    x <- gsub("-+", " ", x)
+    x <- gsub("-+", " ", x, perl = TRUE)
    # replace hemolytic by haemolytic
-    x <- gsub("ha?emoly", "haemoly", x)
+    x <- gsub("ha?emoly", "haemoly", x, perl = TRUE)
    # place minus back in streptococci
-    x <- gsub("(alpha|beta|gamma).?ha?emoly", "\\1-haemoly", x)
+    x <- gsub("(alpha|beta|gamma).?ha?emoly", "\\1-haemoly", x, perl = TRUE)
    # remove genus as first word
-    x <- gsub("^genus ", "", x)
+    x <- gsub("^genus ", "", x, perl = TRUE)
    # remove 'uncertain'-like texts
-    x <- trimws(gsub("(uncertain|susp[ie]c[a-z]+|verdacht)", "", x))
+    x <- trimws(gsub("(uncertain|susp[ie]c[a-z]+|verdacht)", "", x, perl = TRUE))
    # allow characters that resemble others = dyslexia_mode ----
    if (dyslexia_mode == TRUE) {
      x <- tolower(x)
-      x <- gsub("[iy]+", "[iy]+", x)
-      x <- gsub("(c|k|q|qu|s|z|x|ks)+", "(c|k|q|qu|s|z|x|ks)+", x)
-      x <- gsub("(ph|hp|f|v)+", "(ph|hp|f|v)+", x)
-      x <- gsub("(th|ht|t)+", "(th|ht|t)+", x)
-      x <- gsub("a+", "a+", x)
-      x <- gsub("u+", "u+", x)
+      x <- gsub("[iy]+", "[iy]+", x, perl = TRUE)
+      x <- gsub("(c|k|q|qu|s|z|x|ks)+", "(c|k|q|qu|s|z|x|ks)+", x, perl = TRUE)
+      x <- gsub("(ph|hp|f|v)+", "(ph|hp|f|v)+", x, perl = TRUE)
+      x <- gsub("(th|ht|t)+", "(th|ht|t)+", x, perl = TRUE)
+      x <- gsub("a+", "a+", x, perl = TRUE)
+      x <- gsub("u+", "u+", x, perl = TRUE)
      # allow any ending of -um, -us, -ium, -icum, -ius, -icus, -ica, -ia and -a (needs perl for the negative backward lookup):
      x <- gsub("(u\\+\\(c\\|k\\|q\\|qu\\+\\|s\\|z\\|x\\|ks\\)\\+)(?![a-z])",
                "(u[s|m]|[iy][ck]?u[ms]|[iy]?[ck]?a)", x, perl = TRUE)
@@ -549,11 +549,11 @@ exec_as.mo <- function(x,
                "(u[s|m]|[iy][ck]?u[ms]|[iy]?[ck]?a)", x, perl = TRUE)
      x <- gsub("(\\[iy\\]\\+a\\+)(?![a-z])",
                "([iy]*a+|[iy]+a*)", x, perl = TRUE)
-      x <- gsub("e+", "e+", x)
-      x <- gsub("o+", "o+", x)
-      x <- gsub("(.)\\1+", "\\1+", x)
+      x <- gsub("e+", "e+", x, perl = TRUE)
+      x <- gsub("o+", "o+", x, perl = TRUE)
+      x <- gsub("(.)\\1+", "\\1+", x, perl = TRUE)
      # allow multiplication of all other consonants
-      x <- gsub("([bdgjlnrw]+)", "\\1+", x)
+      x <- gsub("([bdgjlnrw]+)", "\\1+", x, perl = TRUE)
      # allow ending in -en or -us
      x <- gsub("e\\+n(?![a-z[])", "(e+n|u+(c|k|q|qu|s|z|x|ks)+)", x, perl = TRUE)
      # if the input is longer than 10 characters, allow any forgotten consonant between all characters, as some might just have forgotten one...
@@ -571,12 +571,12 @@ exec_as.mo <- function(x,
    
    x_trimmed <- x
    x_trimmed_species <- paste(x_trimmed, "species")
-    x_trimmed_without_group <- gsub(" gro.u.p$", "", x_trimmed)
+    x_trimmed_without_group <- gsub(" gro.u.p$", "", x_trimmed, perl = TRUE)
    # remove last part from "-" or "/"
    x_trimmed_without_group <- gsub("(.*)[-/].*", "\\1", x_trimmed_without_group)
    # replace space and dot by regex sign
-    x_withspaces <- gsub("[ .]+", ".* ", x)
-    x <- gsub("[ .]+", ".*", x)
+    x_withspaces <- gsub("[ .]+", ".* ", x, perl = TRUE)
+    x <- gsub("[ .]+", ".*", x, perl = TRUE)
    # add start en stop regex
    x <- paste0("^", x, "$")
    
@@ -616,7 +616,7 @@ exec_as.mo <- function(x,
      }
      
      # valid fullname ----
-      found <- lookup(fullname_lower %in% gsub("[^a-zA-Z0-9_. -]", "", tolower(c(x_backup[i], x_backup_without_spp[i]))))
+      found <- lookup(fullname_lower %in% gsub("[^a-zA-Z0-9_. -]", "", tolower(c(x_backup[i], x_backup_without_spp[i])), perl = TRUE))
      # added the gsub() for "(unknown fungus)", since fullname_lower does not contain brackets
      if (!is.na(found)) {
        x[i] <- found[1L]
@@ -670,7 +670,7 @@ exec_as.mo <- function(x,
      
      # very probable: is G. species ----
      found <- lookup(g_species %in% gsub("[^a-z0-9/ \\-]+", "", 
-                                          tolower(c(x_backup[i], x_backup_without_spp[i]))))
+                                          tolower(c(x_backup[i], x_backup_without_spp[i])), perl = TRUE))
      if (!is.na(found)) {
        x[i] <- found[1L]
        next
@@ -1121,8 +1121,8 @@ exec_as.mo <- function(x,
            if (isTRUE(debug)) {
              cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (4) strip values between brackets\n"))
            }
-            a.x_backup_stripped <- gsub("( *[(].*[)] *)", " ", a.x_backup)
-            a.x_backup_stripped <- trimws(gsub(" +", " ", a.x_backup_stripped))
+            a.x_backup_stripped <- gsub("( *[(].*[)] *)", " ", a.x_backup, perl = TRUE)
+            a.x_backup_stripped <- trimws(gsub(" +", " ", a.x_backup_stripped, perl = TRUE))
            if (isTRUE(debug)) {
              message("Running '", a.x_backup_stripped, "'")
            }
@@ -1612,7 +1612,7 @@ format_uncertainty_as_df <- function(uncertainty_level,
 pillar_shaft.mo <- function(x, ...) {
  out <- format(x)
  # grey out the kingdom (part until first "_")
-  out[!is.na(x)] <- gsub("^([A-Z]+_)(.*)", paste0(font_subtle("\\1"), "\\2"), out[!is.na(x)])
+  out[!is.na(x)] <- gsub("^([A-Z]+_)(.*)", paste0(font_subtle("\\1"), "\\2"), out[!is.na(x)], perl = TRUE)
  # and grey out every _
  out[!is.na(x)] <- gsub("_", font_subtle("_"), out[!is.na(x)])
  
@@ -1747,6 +1747,15 @@ c.mo <- function(x, ...) {
                                                   as.character(microorganisms.translation$mo_old)))
 }

+#' @method unique mo
+#' @export
+#' @noRd
+unique.mo <- function(x, incomparables = FALSE, ...) {
+  y <- NextMethod()
+  attributes(y) <- attributes(x)
+  y
+}
+
 #' @rdname as.mo
 #' @export
 mo_failures <- function() {
--- a/R/resistance_predict.R
+++ b/R/resistance_predict.R
@@ -294,7 +294,7 @@ rsi_predict <- resistance_predict

 #' @method plot resistance_predict
 #' @export
-#' @importFrom graphics axis arrows points
+#' @importFrom graphics plot axis arrows points
 #' @rdname resistance_predict
 plot.resistance_predict <- function(x, main = paste("Resistance Prediction of", x_name), ...) {
  x_name <- paste0(ab_name(attributes(x)$ab), " (", attributes(x)$ab, ")")
@@ -304,12 +304,7 @@ plot.resistance_predict <- function(x, main = paste("Resistance Prediction of",
  } else {
    ylab <- "%IR"
  }
-  # get plot() generic; this was moved from the 'graphics' pkg to the 'base' pkg in R 4.0.0
-  if (as.integer(R.Version()$major) >= 4) {
-    plot <- import_fn("plot", "base")
-  } else {
-    plot <- import_fn("plot", "graphics")
-  }
+
  plot(x = x$year,
       y = x$value,
       ylim = c(0, 1),
--- a/R/rsi.R
+++ b/R/rsi.R
@@ -19,7 +19,7 @@
 # Visit our website for more info: https://msberends.github.io/AMR.    #
 # ==================================================================== #

-#' Interpret MIC and disk, or clean raw R/SI data
+#' Interpret MIC and disk values, or clean raw R/SI data
 #'
 #' Interpret minimum inhibitory concentration (MIC) values and disk diffusion diameters according to EUCAST or CLSI, or clean up existing R/SI values. This transforms the input to a new class [`rsi`], which is an ordered factor with levels `S < I < R`. Values that cannot be interpreted will be returned as `NA` with a warning.
 #' @inheritSection lifecycle Stable lifecycle
@@ -339,6 +339,7 @@ as.rsi.mic <- function(x,
                           ab_name(ab_coerced, tolower = TRUE), ")", mo_var_found, 
                           " according to ", font_bold(guideline_coerced), " ... ")),
          appendLF = FALSE)
+  
  result <- exec_as.rsi(method = "mic",
                        x = x,
                        mo = mo_coerced,
@@ -482,7 +483,7 @@ as.rsi.data.frame <- function(x,
    ab <- colnames(x)[i]
    if (length(sel) == 0 || (length(sel) > 0 && ab %in% sel)) {
      ab_coerced <- suppressWarnings(as.ab(ab))
-      if (is.na(ab_coerced) | !ab %in% sel) {
+      if (is.na(ab_coerced) || (length(sel) > 0 & !ab %in% sel)) {
        # not even a valid AB code
        return(FALSE)
      } else {
@@ -494,7 +495,7 @@ as.rsi.data.frame <- function(x,
          message(font_blue(paste0("NOTE: Assuming column `", ab, "` (",
                                   ifelse(ab_coerced != ab, paste0(ab_coerced, ", "), ""),
                                   ab_name(ab_coerced, tolower = TRUE), ") contains disk zones.")))
-        } else if (!is.rsi(y)) {
+        } else if (!check & !is.rsi(y)) {
          message(font_blue(paste0("NOTE: Assuming column `", ab, "` (",
                                   ifelse(ab_coerced != ab, paste0(ab_coerced, ", "), ""),
                                   ab_name(ab_coerced, tolower = TRUE), ") must be cleaned to valid R/SI values.")))
@@ -581,11 +582,20 @@ exec_as.rsi <- function(method,
  warned <- FALSE
  method_param <- toupper(method)
  
-  mo_genus <- as.mo(mo_genus(mo))
+  genera <- mo_genus(mo)
+  mo_genus <- as.mo(genera)
  mo_family <- as.mo(mo_family(mo))
  mo_order <- as.mo(mo_order(mo))
-  mo_becker <- as.mo(mo, Becker = TRUE)
-  mo_lancefield <- as.mo(mo, Lancefield = TRUE)
+  if (any(genera == "Staphylococcus", na.rm = TRUE)) {
+    mo_becker <- as.mo(mo, Becker = TRUE)
+  } else {
+    mo_becker <- mo
+  }
+  if (any(genera == "Streptococcus", na.rm = TRUE)) {
+    mo_lancefield <- as.mo(mo, Lancefield = TRUE)
+  } else {
+    mo_lancefield <- mo
+  }
  mo_other <- as.mo(rep("UNKNOWN", length(mo)))
  
  guideline_coerced <- get_guideline(guideline)
@@ -781,7 +791,7 @@ summary.rsi <- function(object, ...) {

 #' @method plot rsi
 #' @export
-#' @importFrom graphics text axis
+#' @importFrom graphics plot text axis
 #' @rdname plot
 plot.rsi <- function(x,
                     lwd = 2,
@@ -810,12 +820,6 @@ plot.rsi <- function(x,
  
  ymax <- pm_if_else(max(data$s) > 95, 105, 100)
  
-  # get plot() generic; this was moved from the 'graphics' pkg to the 'base' pkg in R 4.0.0
-  if (as.integer(R.Version()$major) >= 4) {
-    plot <- import_fn("plot", "base")
-  } else {
-    plot <- import_fn("plot", "graphics")
-  }
  plot(x = data$x,
       y = data$s,
       lwd = lwd,
@@ -896,3 +900,12 @@ c.rsi <- function(x, ...) {
  x <- as.character(x)
  as.rsi(c(x, y))
 }
+
+#' @method unique rsi
+#' @export
+#' @noRd
+unique.rsi <- function(x, incomparables = FALSE, ...) {
+  y <- NextMethod()
+  attributes(y) <- attributes(x)
+  y
+}
--- a/R/sysdata.rda
+++ b/R/sysdata.rda
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -90,10 +90,10 @@ create_MO_lookup <- function() {
                                                   MO_lookup$subspecies)))
  ind <- MO_lookup$genus == "" | grepl("^[(]unknown ", MO_lookup$fullname)
  MO_lookup[ind, "fullname_lower"] <- tolower(MO_lookup[ind, "fullname"])
-  MO_lookup$fullname_lower <- trimws(gsub("[^.a-z0-9/ \\-]+", "", MO_lookup$fullname_lower))
+  MO_lookup$fullname_lower <- trimws(gsub("[^.a-z0-9/ \\-]+", "", MO_lookup$fullname_lower, perl = TRUE))
  
  # add a column with only "e coli" like combinations
-  MO_lookup$g_species <- gsub("^([a-z])[a-z]+ ([a-z]+) ?.*", "\\1 \\2", MO_lookup$fullname_lower)
+  MO_lookup$g_species <- gsub("^([a-z])[a-z]+ ([a-z]+) ?.*", "\\1 \\2", MO_lookup$fullname_lower, perl = TRUE)
  
  # so arrange data on prevalence first, then kingdom, then full name
  MO_lookup[order(MO_lookup$prevalence, MO_lookup$kingdom_index, MO_lookup$fullname_lower), ]