last unit tests fix?

2026-07-13 05:10:58 +02:00 · 2022-10-04 21:33:04 +02:00
parent 37f6db5ccd
commit aa06aad4ea
22 changed files with 275 additions and 256 deletions
--- a/2
+++ b/2
@@ -1,5 +1,5 @@
 Package: AMR
-Version: 1.8.2.9027
+Version: 1.8.2.9028
 Date: 2022-10-04
 Title: Antimicrobial Resistance Data Analysis
 Description: Functions to simplify and standardise antimicrobial resistance (AMR)
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,4 @@
-# AMR 1.8.2.9027
+# AMR 1.8.2.9028

 This version will eventually become v2.0! We're happy to reach a new major milestone soon!

@@ -9,11 +9,12 @@ This version will eventually become v2.0! We're happy to reach a new major miles
  * Chromista are almost never clinically relevant, thus lacking the secondary scope of this package
 * The `microorganisms` no longer relies on the Catalogue of Life, but now primarily on the List of Prokaryotic names with Standing in Nomenclature (LPSN) and is supplemented with the Global Biodiversity Information Facility (GBIF). The structure of this data set has changed to include separate LPSN and GBIF identifiers. Almost all previous MO codes were retained. It contains over 1,000 taxonomic names from 2022 already.
 * The `microorganisms.old` data set was removed, and all previously accepted names are now included in the `microorganisms` data set. A new column `status` contains `"accepted"` for currently accepted names and `"synonym"` for taxonomic synonyms; currently invalid names. All previously accepted names now have a microorganisms ID and - if available - an LPSN, GBIF and SNOMED CT identifier.
+* The `mo_matching_score()` now count deletions and substitutions as 2 instead of 1, which impacts the outcome of `as.mo()` and any `mo_*()` function

 ### New
 * EUCAST 2022 and CLSI 2022 guidelines have been added for `as.rsi()`. EUCAST 2022 is now the new default guideline for all MIC and disks diffusion interpretations.
 * All new algorithm for `as.mo()` (and thus internally all `mo_*()` functions) while still following our original set-up as described in our paper (DOI 10.18637/jss.v104.i03).
-  * A new argument `keep_synonyms` allows to *not* correct for updated taxonomy
+  * A new argument `keep_synonyms` allows to *not* correct for updated taxonomy, in favour of the now deleted argument `allow_uncertain`
  * It has increased tremendously in speed and returns generally more consequent results
  * Sequential coercion is now extremely fast as results are stored to the package environment, although coercion of unknown values must be run once per session. Previous results can be reset/removed with the new `mo_reset_session()` function.
 * Function `mean_amr_distance()` to calculate the mean AMR distance. The mean AMR distance is a normalised numeric value to compare AMR test results and can help to identify similar isolates, without comparing antibiograms by hand.
--- a/R/aa_helper_functions.R
+++ b/R/aa_helper_functions.R
@@ -41,12 +41,12 @@ pm_left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) {
  if (length(by) == 1) {
    by <- rep(by, 2)
  }
-  
+
  int_x <- colnames(x) %in% colnames(y) & colnames(x) != by[1]
  int_y <- colnames(y) %in% colnames(x) & colnames(y) != by[2]
  colnames(x)[int_x] <- paste0(colnames(x)[int_x], suffix[1L])
  colnames(y)[int_y] <- paste0(colnames(y)[int_y], suffix[2L])
-  
+
  merged <- cbind(
    x,
    y[match(
@@ -57,7 +57,7 @@ pm_left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) {
    drop = FALSE
    ]
  )
-  
+
  rownames(merged) <- NULL
  merged
 }
@@ -93,7 +93,7 @@ quick_case_when <- function(...) {
  if (n == 0L) {
    stop("No cases provided.")
  }
-  
+
  validate_case_when_length <- function(query, value, fs) {
    lhs_lengths <- lengths(query)
    rhs_lengths <- lengths(value)
@@ -112,13 +112,13 @@ quick_case_when <- function(...) {
    problems <- lhs_problems | rhs_problems
    if (any(problems)) {
      stop("The following formulas must be length ", len, " or 1, not ",
-           paste(inconsistent_lengths, collapse = ", "), ".\n    ",
-           paste(fs[problems], collapse = "\n    "),
-           call. = FALSE
+        paste(inconsistent_lengths, collapse = ", "), ".\n    ",
+        paste(fs[problems], collapse = "\n    "),
+        call. = FALSE
      )
    }
  }
-  
+
  replace_with <- function(x, i, val, arg_name) {
    if (is.null(val)) {
      return(x)
@@ -131,7 +131,7 @@ quick_case_when <- function(...) {
    }
    x
  }
-  
+
  query <- vector("list", n)
  value <- vector("list", n)
  default_env <- parent.frame()
@@ -163,13 +163,13 @@ addin_insert_in <- function() {
 # No export, no Rd
 addin_insert_like <- function() {
  # we want Shift + Ctrl/Cmd + L to iterate over %like%, %unlike%, %like_case%, and %unlike_case%
-  
+
  getActiveDocumentContext <- import_fn("getActiveDocumentContext", "rstudioapi")
  insertText <- import_fn("insertText", "rstudioapi")
  modifyRange <- import_fn("modifyRange", "rstudioapi")
  document_range <- import_fn("document_range", "rstudioapi")
  document_position <- import_fn("document_position", "rstudioapi")
-  
+
  context <- getActiveDocumentContext()
  current_row <- context$selection[[1]]$range$end[1]
  current_col <- context$selection[[1]]$range$end[2]
@@ -178,15 +178,15 @@ addin_insert_like <- function() {
    insertText(" %like% ")
    return(invisible())
  }
-  
+
  pos_preceded_by <- function(txt) {
    if (tryCatch(substr(current_row_txt, current_col - nchar(trimws(txt, which = "right")), current_col) == trimws(txt, which = "right"),
-                 error = function(e) FALSE
+      error = function(e) FALSE
    )) {
      return(TRUE)
    }
    tryCatch(substr(current_row_txt, current_col - nchar(txt), current_col) %like% paste0("^", txt),
-             error = function(e) FALSE
+      error = function(e) FALSE
    )
  }
  replace_pos <- function(old, with) {
@@ -198,7 +198,7 @@ addin_insert_like <- function() {
    id = context$id
    )
  }
-  
+
  if (pos_preceded_by(" %like% ")) {
    replace_pos(" %like% ", with = " %unlike% ")
  } else if (pos_preceded_by(" %unlike% ")) {
@@ -215,21 +215,21 @@ addin_insert_like <- function() {
 search_type_in_df <- function(x, type, info = TRUE) {
  meet_criteria(x, allow_class = "data.frame")
  meet_criteria(type, allow_class = "character", has_length = 1)
-  
+
  # try to find columns based on type
  found <- NULL
-  
+
  # remove attributes from other packages
  x <- as.data.frame(x, stringsAsFactors = FALSE)
  colnames_formatted <- tolower(generalise_antibiotic_name(colnames(x)))
-  
+
  # -- mo
  if (type == "mo") {
    if (any(vapply(FUN.VALUE = logical(1), x, is.mo))) {
      # take first <mo> column
      found <- colnames(x)[vapply(FUN.VALUE = logical(1), x, is.mo)]
    } else if ("mo" %in% colnames_formatted &&
-               suppressWarnings(all(x$mo %in% c(NA, AMR::microorganisms$mo)))) {
+      suppressWarnings(all(x$mo %in% c(NA, AMR::microorganisms$mo)))) {
      found <- "mo"
    } else if (any(colnames_formatted %like_case% "^(mo|microorganism|organism|bacteria|ba[ck]terie)s?$")) {
      found <- sort(colnames(x)[colnames_formatted %like_case% "^(mo|microorganism|organism|bacteria|ba[ck]terie)s?$"])
@@ -294,16 +294,16 @@ search_type_in_df <- function(x, type, info = TRUE) {
      # this column should contain logicals
      if (!is.logical(x[, found, drop = TRUE])) {
        message_("Column '", font_bold(found), "' found as input for `col_", type,
-                 "`, but this column does not contain 'logical' values (TRUE/FALSE) and was ignored.",
-                 add_fn = font_red
+          "`, but this column does not contain 'logical' values (TRUE/FALSE) and was ignored.",
+          add_fn = font_red
        )
        found <- NULL
      }
    }
  }
-  
+
  found <- found[1]
-  
+
  if (!is.null(found) && info == TRUE) {
    if (message_not_thrown_before("search_in_type", type)) {
      msg <- paste0("Using column '", font_bold(found), "' as input for `col_", type, "`.")
@@ -349,16 +349,16 @@ stop_ifnot_installed <- function(package) {
  # https://developer.r-project.org/Blog/public/2019/02/14/staged-install/index.html
  vapply(FUN.VALUE = character(1), package, function(pkg) {
    tryCatch(get(".packageName", envir = asNamespace(pkg)),
-             error = function(e) {
-               if (pkg == "rstudioapi") {
-                 stop("This function only works in RStudio when using R >= 3.2.", call. = FALSE)
-               } else if (pkg != "base") {
-                 stop("This requires the '", pkg, "' package.",
-                      "\nTry to install it with: install.packages(\"", pkg, "\")",
-                      call. = FALSE
-                 )
-               }
-             }
+      error = function(e) {
+        if (pkg == "rstudioapi") {
+          stop("This function only works in RStudio when using R >= 3.2.", call. = FALSE)
+        } else if (pkg != "base") {
+          stop("This requires the '", pkg, "' package.",
+            "\nTry to install it with: install.packages(\"", pkg, "\")",
+            call. = FALSE
+          )
+        }
+      }
    )
  })
  return(invisible())
@@ -386,8 +386,8 @@ import_fn <- function(name, pkg, error_on_fail = TRUE) {
    error = function(e) {
      if (isTRUE(error_on_fail)) {
        stop_("function ", name, "() is not an exported object from package '", pkg,
-              "'. Please create an issue at https://github.com/msberends/AMR/issues. Many thanks!",
-              call = FALSE
+          "'. Please create an issue at https://github.com/msberends/AMR/issues. Many thanks!",
+          call = FALSE
        )
      } else {
        return(NULL)
@@ -407,11 +407,11 @@ word_wrap <- function(...,
                      width = 0.95 * getOption("width"),
                      extra_indent = 0) {
  msg <- paste0(c(...), collapse = "")
-  
+
  if (isTRUE(as_note)) {
    msg <- paste0(AMR_env$info_icon, " ", gsub("^note:? ?", "", msg, ignore.case = TRUE))
  }
-  
+
  if (msg %like% "\n") {
    # run word_wraps() over every line here, bind them and return again
    return(paste0(vapply(
@@ -426,7 +426,7 @@ word_wrap <- function(...,
    collapse = "\n"
    ))
  }
-  
+
  # correct for operators (will add the space later on)
  ops <- "([,./><\\]\\[])"
  msg <- gsub(paste0(ops, " ", ops), "\\1\\2", msg, perl = TRUE)
@@ -434,13 +434,13 @@ word_wrap <- function(...,
  msg_stripped <- font_stripstyle(msg)
  # where are the spaces now?
  msg_stripped_wrapped <- paste0(strwrap(msg_stripped,
-                                         simplify = TRUE,
-                                         width = width
+    simplify = TRUE,
+    width = width
  ),
  collapse = "\n"
  )
  msg_stripped_wrapped <- paste0(unlist(strsplit(msg_stripped_wrapped, "(\n|\\*\\|\\*)")),
-                                 collapse = "\n"
+    collapse = "\n"
  )
  msg_stripped_spaces <- which(unlist(strsplit(msg_stripped, "", fixed = TRUE)) == " ")
  msg_stripped_wrapped_spaces <- which(unlist(strsplit(msg_stripped_wrapped, "", fixed = TRUE)) != "\n")
@@ -453,7 +453,7 @@ word_wrap <- function(...,
  msg <- gsub(paste0(ops, ops), "\\1 \\2", msg, perl = TRUE)
  msg <- paste0(msg, collapse = " ")
  msg <- gsub("\n ", "\n", msg, fixed = TRUE)
-  
+
  if (msg_stripped %like% "\u2139 ") {
    indentation <- 2 + extra_indent
  } else if (msg_stripped %like% "^=> ") {
@@ -464,7 +464,7 @@ word_wrap <- function(...,
  msg <- gsub("\n", paste0("\n", strrep(" ", indentation)), msg, fixed = TRUE)
  # remove trailing empty characters
  msg <- gsub("(\n| )+$", "", msg)
-  
+
  if (length(add_fn) > 0) {
    if (!is.list(add_fn)) {
      add_fn <- list(add_fn)
@@ -473,15 +473,15 @@ word_wrap <- function(...,
      msg <- add_fn[[i]](msg)
    }
  }
-  
+
  # format backticks
  msg <- gsub("(`.+?`)", font_grey_bg("\\1"), msg)
-  
+
  # clean introduced whitespace between fullstops
  msg <- gsub("[.] +[.]", "..", msg)
  # remove extra space that was introduced (case: "Smith et al., 2022")
  msg <- gsub(". ,", ".,", msg, fixed = TRUE)
-  
+
  msg
 }

@@ -490,8 +490,8 @@ message_ <- function(...,
                     add_fn = list(font_blue),
                     as_note = TRUE) {
  message(word_wrap(...,
-                    add_fn = add_fn,
-                    as_note = as_note
+    add_fn = add_fn,
+    as_note = as_note
  ),
  appendLF = appendLF
  )
@@ -502,8 +502,8 @@ warning_ <- function(...,
                     immediate = FALSE,
                     call = FALSE) {
  warning(word_wrap(...,
-                    add_fn = add_fn,
-                    as_note = FALSE
+    add_fn = add_fn,
+    as_note = FALSE
  ),
  immediate. = immediate,
  call. = call
@@ -736,13 +736,13 @@ meet_criteria <- function(object,
                          allow_NA = FALSE,
                          ignore.case = FALSE,
                          .call_depth = 0) { # depth in calling
-  
+
  obj_name <- deparse(substitute(object))
  call_depth <- -2 - abs(.call_depth)
-  
+
  # if object is missing, or another error:
  tryCatch(invisible(object),
-           error = function(e) AMR_env$meet_criteria_error_txt <- e$message
+    error = function(e) AMR_env$meet_criteria_error_txt <- e$message
  )
  if (!is.null(AMR_env$meet_criteria_error_txt)) {
    error_txt <- AMR_env$meet_criteria_error_txt
@@ -750,7 +750,7 @@ meet_criteria <- function(object,
    stop(error_txt, call. = FALSE) # don't use stop_() here, our pkg may not be loaded yet
  }
  AMR_env$meet_criteria_error_txt <- NULL
-  
+
  if (is.null(object)) {
    stop_if(allow_NULL == FALSE, "argument `", obj_name, "` must not be NULL", call = call_depth)
    return(invisible())
@@ -759,36 +759,36 @@ meet_criteria <- function(object,
    stop_if(allow_NA == FALSE, "argument `", obj_name, "` must not be NA", call = call_depth)
    return(invisible())
  }
-  
+
  if (!is.null(allow_class)) {
    stop_ifnot(inherits(object, allow_class), "argument `", obj_name,
-               "` must be ", format_class(allow_class, plural = isTRUE(has_length > 1)),
-               ", i.e. not be ", format_class(class(object), plural = isTRUE(has_length > 1)),
-               call = call_depth
+      "` must be ", format_class(allow_class, plural = isTRUE(has_length > 1)),
+      ", i.e. not be ", format_class(class(object), plural = isTRUE(has_length > 1)),
+      call = call_depth
    )
    # check data.frames for data
    if (inherits(object, "data.frame")) {
      stop_if(any(dim(object) == 0),
-              "the data provided in argument `", obj_name,
-              "` must contain rows and columns (current dimensions: ",
-              paste(dim(object), collapse = "x"), ")",
-              call = call_depth
+        "the data provided in argument `", obj_name,
+        "` must contain rows and columns (current dimensions: ",
+        paste(dim(object), collapse = "x"), ")",
+        call = call_depth
      )
    }
  }
  if (!is.null(has_length)) {
    stop_ifnot(length(object) %in% has_length, "argument `", obj_name,
-               "` must ", # ifelse(allow_NULL, "be NULL or must ", ""),
-               "be of length ", vector_or(has_length, quotes = FALSE),
-               ", not ", length(object),
-               call = call_depth
+      "` must ", # ifelse(allow_NULL, "be NULL or must ", ""),
+      "be of length ", vector_or(has_length, quotes = FALSE),
+      ", not ", length(object),
+      call = call_depth
    )
  }
  if (!is.null(looks_like)) {
    stop_ifnot(object %like% looks_like, "argument `", obj_name,
-               "` must ", # ifelse(allow_NULL, "be NULL or must ", ""),
-               "resemble the regular expression \"", looks_like, "\"",
-               call = call_depth
+      "` must ", # ifelse(allow_NULL, "be NULL or must ", ""),
+      "resemble the regular expression \"", looks_like, "\"",
+      call = call_depth
    )
  }
  if (!is.null(is_in)) {
@@ -797,44 +797,44 @@ meet_criteria <- function(object,
      is_in <- tolower(is_in)
    }
    stop_ifnot(all(object %in% is_in, na.rm = TRUE), "argument `", obj_name, "` ",
-               ifelse(!is.null(has_length) && length(has_length) == 1 && has_length == 1,
-                      "must be either ",
-                      "must only contain values "
-               ),
-               vector_or(is_in, quotes = !isTRUE(any(c("double", "numeric", "integer") %in% allow_class))),
-               ifelse(allow_NA == TRUE, ", or NA", ""),
-               call = call_depth
+      ifelse(!is.null(has_length) && length(has_length) == 1 && has_length == 1,
+        "must be either ",
+        "must only contain values "
+      ),
+      vector_or(is_in, quotes = !isTRUE(any(c("double", "numeric", "integer") %in% allow_class))),
+      ifelse(allow_NA == TRUE, ", or NA", ""),
+      call = call_depth
    )
  }
  if (isTRUE(is_positive)) {
    stop_if(is.numeric(object) && !all(object > 0, na.rm = TRUE), "argument `", obj_name,
-            "` must ",
-            ifelse(!is.null(has_length) && length(has_length) == 1 && has_length == 1,
-                   "be a number higher than zero",
-                   "all be numbers higher than zero"
-            ),
-            call = call_depth
+      "` must ",
+      ifelse(!is.null(has_length) && length(has_length) == 1 && has_length == 1,
+        "be a number higher than zero",
+        "all be numbers higher than zero"
+      ),
+      call = call_depth
    )
  }
  if (isTRUE(is_positive_or_zero)) {
    stop_if(is.numeric(object) && !all(object >= 0, na.rm = TRUE), "argument `", obj_name,
-            "` must ",
-            ifelse(!is.null(has_length) && length(has_length) == 1 && has_length == 1,
-                   "be zero or a positive number",
-                   "all be zero or numbers higher than zero"
-            ),
-            call = call_depth
+      "` must ",
+      ifelse(!is.null(has_length) && length(has_length) == 1 && has_length == 1,
+        "be zero or a positive number",
+        "all be zero or numbers higher than zero"
+      ),
+      call = call_depth
    )
  }
  if (isTRUE(is_finite)) {
    stop_if(is.numeric(object) && !all(is.finite(object[!is.na(object)]), na.rm = TRUE), "argument `", obj_name,
-            "` must ",
-            ifelse(!is.null(has_length) && length(has_length) == 1 && has_length == 1,
-                   "be a finite number",
-                   "all be finite numbers"
-            ),
-            " (i.e. not be infinite)",
-            call = call_depth
+      "` must ",
+      ifelse(!is.null(has_length) && length(has_length) == 1 && has_length == 1,
+        "be a finite number",
+        "all be finite numbers"
+      ),
+      " (i.e. not be infinite)",
+      call = call_depth
    )
  }
  if (!is.null(contains_column_class)) {
@@ -868,12 +868,12 @@ get_current_data <- function(arg_name, call) {
      return(out)
    }
  }
-  
+
  # try a manual (base R) method, by going over all underlying environments with sys.frames()
  for (env in sys.frames()) {
    if (!is.null(env$`.Generic`)) {
      # don't check `".Generic" %in% names(env)`, because in R < 3.2, `names(env)` is always NULL
-      
+
      if (valid_df(env$`.data`)) {
        # an element `.data` will be in the environment when using `dplyr::select()`
        # (but not when using `dplyr::filter()`, `dplyr::mutate()` or `dplyr::summarise()`)
@@ -887,7 +887,7 @@ get_current_data <- function(arg_name, call) {
      }
    }
  }
-  
+
  # no data.frame found, so an error  must be returned:
  if (is.na(arg_name)) {
    if (isTRUE(is.numeric(call))) {
@@ -903,8 +903,8 @@ get_current_data <- function(arg_name, call) {
      examples <- ""
    }
    stop_("this function must be used inside a `dplyr` verb or `data.frame` call",
-          examples,
-          call = call
+      examples,
+      call = call
    )
  } else {
    # mimic a base R error that the argument is missing
@@ -921,7 +921,7 @@ get_current_column <- function() {
      return(out)
    }
  }
-  
+
  # cur_column() doesn't always work (only allowed for certain conditions set by dplyr), but it's probably still possible:
  frms <- lapply(sys.frames(), function(env) {
    if (!is.null(env$i)) {
@@ -941,7 +941,7 @@ get_current_column <- function() {
      NULL
    }
  })
-  
+
  vars <- unlist(frms)
  if (length(vars) > 0) {
    vars[length(vars)]
@@ -960,7 +960,7 @@ unique_call_id <- function(entire_session = FALSE, match_fn = NULL) {
  if (entire_session == TRUE) {
    return(c(envir = "session", call = "session"))
  }
-  
+
  # combination of environment ID (such as "0x7fed4ee8c848")
  # and relevant system call (where 'match_fn' is being called in)
  calls <- sys.calls()
@@ -1011,7 +1011,7 @@ message_not_thrown_before <- function(fn, ..., entire_session = FALSE) {

 has_colour <- function() {
  # this is a base R version of crayon::has_color, but disables colours on emacs
-  
+
  if (Sys.getenv("EMACS") != "" || Sys.getenv("INSIDE_EMACS") != "") {
    # disable on emacs, which only supports 8 colours
    return(FALSE)
@@ -1293,7 +1293,7 @@ round2 <- function(x, digits = 1, force_zero = TRUE) {

 # percentage from our other package: 'cleaner'
 percentage <- function(x, digits = NULL, ...) {
-  
+
  # getdecimalplaces() function
  getdecimalplaces <- function(x, minimum = 0, maximum = 3) {
    if (maximum < minimum) {
@@ -1310,14 +1310,14 @@ percentage <- function(x, digits = NULL, ...) {
      function(y) ifelse(length(y) == 2, nchar(y[2]), 0)
    )), na.rm = TRUE)
    max(min(max_places,
-            maximum,
-            na.rm = TRUE
+      maximum,
+      na.rm = TRUE
    ),
    minimum,
    na.rm = TRUE
    )
  }
-  
+
  # format_percentage() function
  format_percentage <- function(x, digits = NULL, ...) {
    if (is.null(digits)) {
@@ -1326,19 +1326,19 @@ percentage <- function(x, digits = NULL, ...) {
    if (is.null(digits) || is.na(digits) || !is.numeric(digits)) {
      digits <- 2
    }
-    
+
    # round right: percentage(0.4455) and format(as.percentage(0.4455), 1) should return "44.6%", not "44.5%"
    x_formatted <- format(round2(as.double(x), digits = digits + 2) * 100,
-                          scientific = FALSE,
-                          digits = max(1, digits),
-                          nsmall = digits,
-                          ...
+      scientific = FALSE,
+      digits = max(1, digits),
+      nsmall = digits,
+      ...
    )
    x_formatted <- paste0(x_formatted, "%")
    x_formatted[!grepl(pattern = "^[0-9.,e-]+$", x = x)] <- NA_character_
    x_formatted
  }
-  
+
  # the actual working part
  x <- as.double(x)
  if (is.null(digits)) {
@@ -1495,9 +1495,9 @@ if (getRversion() < "3.5.0") {
    which <- match.arg(which)
    mysub <- function(re, x) sub(re, "", x, perl = TRUE)
    switch(which,
-           left = mysub(paste0("^", whitespace, "+"), x),
-           right = mysub(paste0(whitespace, "+$"), x),
-           both = mysub(paste0(whitespace, "+$"), mysub(paste0("^", whitespace, "+"), x))
+      left = mysub(paste0("^", whitespace, "+"), x),
+      right = mysub(paste0(whitespace, "+$"), x),
+      both = mysub(paste0(whitespace, "+$"), mysub(paste0("^", whitespace, "+"), x))
    )
  }
 }
--- a/R/ab_selectors.R
+++ b/R/ab_selectors.R
@@ -566,7 +566,7 @@ ab_select_exec <- function(function_name,
    message_("No antimicrobial agents found in the data.")
    return(NULL)
  }
-  
+
  if (is.null(ab_class_args) || isTRUE(function_name %in% c("antifungals", "antimycobacterials"))) {
    ab_group <- NULL
    if (isTRUE(function_name == "antifungals")) {
@@ -580,8 +580,8 @@ ab_select_exec <- function(function_name,
      ab_group <- function_name
    }
    examples <- paste0(" (such as ", vector_or(ab_name(sample(abx, size = min(2, length(abx)), replace = FALSE),
-                                                       tolower = TRUE,
-                                                       language = NULL
+      tolower = TRUE,
+      language = NULL
    ),
    quotes = FALSE
    ), ")")
--- a/R/amr.R
+++ b/R/amr.R
@@ -31,9 +31,9 @@
 #'
 #' @description
 #' Welcome to the `AMR` package.
-#' 
+#'
 #' `AMR` is a free, open-source and independent \R package to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. Our aim is to provide a standard for clean and reproducible antimicrobial resistance data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting.
-#' 
+#'
 #' This work was published in the Journal of Statistical Software (Volume 104(3); \doi{10.18637/jss.v104.i03}) and formed the basis of two PhD theses (\doi{10.33612/diss.177417131} and \doi{10.33612/diss.192486375}).
 #'
 #' After installing this package, \R knows `r format_included_data_number(microorganisms)` distinct microbial species and all `r format_included_data_number(rbind(antibiotics[, "atc", drop = FALSE], antivirals[, "atc", drop = FALSE]))` antibiotic, antimycotic and antiviral drugs by name and code (including ATC, EARS-NET, LOINC and SNOMED CT), and knows all about valid R/SI and MIC values. It supports any data format, including WHONET/EARS-Net data.
@@ -62,11 +62,11 @@
 #' All data sets in this `AMR` package (about microorganisms, antibiotics, R/SI interpretation, EUCAST rules, etc.) are publicly and freely available for download in the following formats: R, MS Excel, Apache Feather, Apache Parquet, SPSS, SAS, and Stata. We also provide tab-separated plain text files that are machine-readable and suitable for input in any software program, such as laboratory information systems. Please visit [our website for the download links](https://msberends.github.io/AMR/articles/datasets.html). The actual files are of course available on [our GitHub repository](https://github.com/msberends/AMR/tree/main/data-raw).
 #' @source
 #' To cite AMR in publications use:
-#' 
+#'
 #' Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C (2022). "AMR: An R Package for Working with Antimicrobial Resistance Data." _Journal of Statistical Software_, *104*(3), 1-31. \doi{10.18637/jss.v104.i03}.
-#' 
+#'
 #' A BibTeX entry for LaTeX users is:
-#' 
+#'
 #' \preformatted{
 #' `r format(citation("AMR"), style = "bib")`
 #' }
--- a/R/custom_eucast_rules.R
+++ b/R/custom_eucast_rules.R
@@ -49,10 +49,10 @@
 #' ```r
 #' x
 #' #> A set of custom EUCAST rules:
-#' #> 
+#' #>
 #' #>   1. If TZP is "S" then set to  S :
 #' #>      amoxicillin (AMX), ampicillin (AMP)
-#' #> 
+#' #>
 #' #>   2. If TZP is "R" then set to  R :
 #' #>      amoxicillin (AMX), ampicillin (AMP)
 #' ```
@@ -68,7 +68,7 @@
 #' #>                      mo TZP ampi cipro
 #' #> 1      Escherichia coli   R    S     S
 #' #> 2 Klebsiella pneumoniae   R    S     S
-#' 
+#'
 #' eucast_rules(df, rules = "custom", custom_rules = x, info = FALSE)
 #' #>                      mo TZP ampi cipro
 #' #> 1      Escherichia coli   R    R     S
--- a/R/mic.R
+++ b/R/mic.R
@@ -167,7 +167,7 @@ valid_mic_levels <- c(
 as.mic <- function(x, na.rm = FALSE) {
  meet_criteria(x, allow_class = c("mic", "character", "numeric", "integer", "factor"), allow_NA = TRUE)
  meet_criteria(na.rm, allow_class = "logical", has_length = 1)
-  
+
  if (is.mic(x)) {
    x
  } else {
--- a/R/mo.R
+++ b/R/mo.R
@@ -67,13 +67,13 @@
 #' Values that cannot be coerced will be considered 'unknown' and will be returned as the MO code `UNKNOWN` with a warning.
 #'
 #' Use the [`mo_*`][mo_property()] functions to get properties based on the returned code, see *Examples*.
-#' 
+#'
 #' The [as.mo()] function uses a novel [matching score algorithm][mo_matching_score()] (see *Matching Score for Microorganisms* below) to match input against the [available microbial taxonomy][microoganisms] in this package. This will lead to the effect that e.g. `"E. coli"` (a microorganism highly prevalent in humans) will return the microbial ID of *Escherichia coli* and not *Entamoeba coli* (a microorganism less prevalent in humans), although the latter would alphabetically come first. The algorithm uses data from the List of Prokaryotic names with Standing in Nomenclature (LPSN) and the Global Biodiversity Information Facility (GBIF) (see [microorganisms]).
 #'
 #' ### Coping with Uncertain Results
 #'
 #' Results of non-exact taxonomic input are based on their [matching score][mo_matching_score()]. The lowest allowed score can be set with the `minimum_matching_score` argument. At default this will be determined based on the character length of the input, and the [taxonomic kingdom][microorganisms] and [human pathogenicity][mo_matching_score()] of the taxonomic outcome. If values are matched with uncertainty, a message will be shown to suggest the user to evaluate the results with [mo_uncertainties()], which returns a [data.frame] with all specifications.
-#' 
+#'
 #' To increase the quality of matching, the `remove_from_input` argument can be used to clean the input (i.e., `x`). This must be a [regular expression][base::regex] that matches parts of the input that should be removed before the input is matched against the [available microbial taxonomy][microoganisms]. It will be matched Perl-compatible and case-insensitive. The default value of `remove_from_input` is the outcome of the helper function [mo_cleaning_regex()].
 #'
 #' There are three helper functions that can be run after using the [as.mo()] function:
@@ -85,7 +85,7 @@
 #'
 #' The coercion rules consider the prevalence of microorganisms in humans grouped into three groups, which is available as the `prevalence` columns in the [microorganisms] data set. The grouping into human pathogenic prevalence is explained in the section *Matching Score for Microorganisms* below.
 #' @inheritSection mo_matching_score Matching Score for Microorganisms
-#' 
+#'
 #  (source as a section here, so it can be inherited by other man pages)
 #' @section Source:
 #' 1. Berends MS *et al.* (2022). **AMR: An R Package for Working with Antimicrobial Resistance Data**. *Journal of Statistical Software*, 104(3), 1-31; \doi{10.18637/jss.v104.i03}
@@ -120,7 +120,7 @@
 #'   "VISA", # Vancomycin Intermediate S. aureus
 #'   "VRSA", # Vancomycin Resistant S. aureus
 #'   115329001 # SNOMED CT code
-#' )) 
+#' ))
 #'
 #' # Dyslexia is no problem - these all work:
 #' as.mo(c(
@@ -163,7 +163,7 @@ as.mo <- function(x,
  meet_criteria(ignore_pattern, allow_class = "character", has_length = 1, allow_NULL = TRUE)
  language <- validate_language(language)
  meet_criteria(info, allow_class = "logical", has_length = 1)
-  
+
  if (tryCatch(all(x %in% c(MO_lookup$mo, NA)) &&
    isFALSE(Becker) &&
    isFALSE(Lancefield), error = function(e) FALSE)) {
@@ -171,7 +171,7 @@ as.mo <- function(x,
    # is.mo() won't work - MO codes might change between package versions
    return(set_clean_class(x, new_class = c("mo", "character")))
  }
-  
+

  # start off with replaced language-specific non-ASCII characters with ASCII characters
  x <- parse_and_convert(x)
@@ -179,14 +179,14 @@ as.mo <- function(x,
  x <- replace_old_mo_codes(x, property = "mo")
  # ignore cases that match the ignore pattern
  x <- replace_ignore_pattern(x, ignore_pattern)
-  
+
  x_lower <- tolower(x)
-  
+
  # WHONET: xxx = no growth
  x[x_lower %in% c("", "xxx", "na", "nan")] <- NA_character_

  out <- rep(NA_character_, length(x))
-  
+
  # below we use base R's match(), known for powering '%in%', and incredibly fast!

  # From reference_df ----
@@ -220,7 +220,7 @@ as.mo <- function(x,
      " for ", vector_and(x[is.na(old) & !is.na(new)]), ". Run `mo_reset_session()` to reset this."
    )
  }
-  
+
  # For all other input ----
  if (any(is.na(out) & !is.na(x))) {
    # reset uncertainties
@@ -249,16 +249,16 @@ as.mo <- function(x,
      x_out <- trimws2(gsub(" +", " ", x_out, perl = TRUE))
      x_search_cleaned <- x_out
      x_out <- tolower(x_out)
-      
+
      # input must not be too short
      if (nchar(x_out) < 3) {
        return("UNKNOWN")
      }
-      
-      
+
+
      # take out the parts, split by space
      x_parts <- strsplit(gsub("-", " ", x_out, fixed = TRUE), " ", fixed = TRUE)[[1]]
-      
+
      # do a pre-match on first character (and if it contains a space, first chars of first two terms)
      if (length(x_parts) %in% c(2, 3)) {
        # for genus + species + subspecies
@@ -301,7 +301,7 @@ as.mo <- function(x,
        minimum_matching_score_current <- minimum_matching_score
      }
      m[m < minimum_matching_score_current] <- NA_real_
-      
+
      top_hits <- mo_to_search[order(m, decreasing = TRUE, na.last = NA)] # na.last = NA will remove the NAs
      if (length(top_hits) == 0) {
        warning_("No hits found for \"", x_search, "\" with minimum_matching_score = ", ifelse(is.null(minimum_matching_score), paste0("NULL (=", round(min(minimum_matching_score_current, na.rm = TRUE), 3), ")"), minimum_matching_score), ". Try setting this value lower or even to 0.")
@@ -371,9 +371,11 @@ as.mo <- function(x,
  gbif_matches[!gbif_matches %in% AMR::microorganisms$gbif] <- NA
  lpsn_matches <- AMR::microorganisms$lpsn_renamed_to[match(out, AMR::microorganisms$mo)]
  lpsn_matches[!lpsn_matches %in% AMR::microorganisms$lpsn] <- NA
-  AMR_env$mo_renamed <- list(old = out[!is.na(gbif_matches) | !is.na(lpsn_matches)],
-                             gbif_matches = gbif_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)],
-                             lpsn_matches = lpsn_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)])
+  AMR_env$mo_renamed <- list(
+    old = out[!is.na(gbif_matches) | !is.na(lpsn_matches)],
+    gbif_matches = gbif_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)],
+    lpsn_matches = lpsn_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)]
+  )
  if (isFALSE(keep_synonyms)) {
    out[which(!is.na(gbif_matches))] <- AMR::microorganisms$mo[match(gbif_matches[which(!is.na(gbif_matches))], AMR::microorganisms$gbif)]
    out[which(!is.na(lpsn_matches))] <- AMR::microorganisms$mo[match(lpsn_matches[which(!is.na(lpsn_matches))], AMR::microorganisms$lpsn)]
@@ -384,7 +386,7 @@ as.mo <- function(x,
    # keep synonyms is TRUE, so check if any do have synonyms
    warning_("Function `as.mo()` returned ", nr2char(length(unique(AMR_env$mo_renamed$old))), " old taxonomic name", ifelse(length(unique(AMR_env$mo_renamed$old)) > 1, "s", ""), ". Use `as.mo(..., keep_synonyms = FALSE)` to clean the input to currently accepted taxonomic names, or set the R option `AMR_keep_synonyms` to `FALSE`. This warning will be shown once per session.")
  }
-  
+
  # Apply Becker ----
  if (isTRUE(Becker) || Becker == "all") {
    # warn when species found that are not in:
@@ -436,7 +438,7 @@ as.mo <- function(x,
    out[out %like_case% "^B_STRPT_SLVR(_|$)"] <- "B_STRPT_GRPK"
    # group L - only S. dysgalactiae which is also group C, so ignore it here
  }
-  
+
  # All unknowns ----
  out[is.na(out) & !is.na(x)] <- "UNKNOWN"
  AMR_env$mo_failures <- unique(x[out == "UNKNOWN" & x != "UNKNOWN" & !is.na(x)])
@@ -468,18 +470,20 @@ mo_uncertainties <- function() {
 #' @export
 mo_renamed <- function() {
  x <- AMR_env$mo_renamed
-  
+
  x$new <- synonym_mo_to_accepted_mo(x$old)
  mo_old <- AMR::microorganisms$fullname[match(x$old, AMR::microorganisms$mo)]
  mo_new <- AMR::microorganisms$fullname[match(x$new, AMR::microorganisms$mo)]
  ref_old <- AMR::microorganisms$ref[match(x$old, AMR::microorganisms$mo)]
  ref_new <- AMR::microorganisms$ref[match(x$new, AMR::microorganisms$mo)]
-  
-  df_renamed <- data.frame(old = mo_old,
-                           new = mo_new,
-                           ref_old = ref_old,
-                           ref_new = ref_new,
-                           stringsAsFactors = FALSE)
+
+  df_renamed <- data.frame(
+    old = mo_old,
+    new = mo_new,
+    ref_old = ref_old,
+    ref_new = ref_new,
+    stringsAsFactors = FALSE
+  )
  df_renamed <- unique(df_renamed)
  df_renamed <- df_renamed[order(df_renamed$old), , drop = FALSE]
  set_clean_class(df_renamed, new_class = c("mo_renamed", "data.frame"))
@@ -512,7 +516,8 @@ mo_cleaning_regex <- function() {
    "|",
    "([({]|\\[).+([})]|\\])",
    "|",
-    "(^| )(e?spp|e?ssp|e?ss|e?sp|e?subsp|sube?species|biovar|biotype|serovar|e?species)[.]*( |$))")
+    "(^| )(e?spp|e?ssp|e?ss|e?sp|e?subsp|sube?species|biovar|biotype|serovar|e?species)[.]*( |$))"
+  )
 }

 # UNDOCUMENTED METHODS ----------------------------------------------------
@@ -754,7 +759,7 @@ print.mo_uncertainties <- function(x, ...) {
    cat(word_wrap("No uncertainties to show. Only uncertainties of the last call of `as.mo()` or any `mo_*()` function are stored.\n", add_fn = font_blue))
    return(invisible(NULL))
  }
-  
+
  cat(word_wrap("Matching scores are based on the resemblance between the input and the full taxonomic name, and the pathogenicity in humans. See `?mo_matching_score`.\n\n", add_fn = font_blue))
  if (has_colour()) {
    cat(word_wrap("Colour keys: ",
@@ -815,27 +820,30 @@ print.mo_uncertainties <- function(x, ...) {
    )
    score_formatted <- trimws(formatC(round(score, 3), format = "f", digits = 3))
    txt <- paste(txt,
-                 paste0(
-                   paste0(
-                     '"', x[i, ]$original_input, '"',
-                     " -> ",
-                     paste0(
-                       font_bold(font_italic(x[i, ]$fullname)),
-                       ifelse(!is.na(x[i, ]$renamed_to), paste(", renamed to", font_italic(x[i, ]$renamed_to)), ""),
-                       " (", x[i, ]$mo, ", ", score_set_colour(score_formatted, score), ")"
-                     )
-                   ),
-                   collapse = "\n"
-                 ),
-                 # Add "Based on {input}" text if it differs from the original input
-                 ifelse(x[i, ]$original_input != x[i, ]$input, paste0(strrep(" ", nchar(x[i, ]$original_input) + 6), "Based on input \"", x[i, ]$input, "\""), ""),
-                 # Add note if result was coerced to accepted taxonomic name
-                 ifelse(x[i, ]$keep_synonyms == FALSE & x[i, ]$mo %in% AMR::microorganisms$mo[which(AMR::microorganisms$status == "synonym")],
-                        paste0(strrep(" ", nchar(x[i, ]$original_input) + 6),
-                               font_red(paste0("This old taxonomic name was converted to ", font_italic(AMR::microorganisms$fullname[match(synonym_mo_to_accepted_mo(x[i, ]$mo), AMR::microorganisms$mo)], collapse = NULL), " (", synonym_mo_to_accepted_mo(x[i, ]$mo), ")."), collapse = NULL)),
-                        ""),
-                 candidates,
-                 sep = "\n"
+      paste0(
+        paste0(
+          '"', x[i, ]$original_input, '"',
+          " -> ",
+          paste0(
+            font_bold(font_italic(x[i, ]$fullname)),
+            ifelse(!is.na(x[i, ]$renamed_to), paste(", renamed to", font_italic(x[i, ]$renamed_to)), ""),
+            " (", x[i, ]$mo, ", ", score_set_colour(score_formatted, score), ")"
+          )
+        ),
+        collapse = "\n"
+      ),
+      # Add "Based on {input}" text if it differs from the original input
+      ifelse(x[i, ]$original_input != x[i, ]$input, paste0(strrep(" ", nchar(x[i, ]$original_input) + 6), "Based on input \"", x[i, ]$input, "\""), ""),
+      # Add note if result was coerced to accepted taxonomic name
+      ifelse(x[i, ]$keep_synonyms == FALSE & x[i, ]$mo %in% AMR::microorganisms$mo[which(AMR::microorganisms$status == "synonym")],
+        paste0(
+          strrep(" ", nchar(x[i, ]$original_input) + 6),
+          font_red(paste0("This old taxonomic name was converted to ", font_italic(AMR::microorganisms$fullname[match(synonym_mo_to_accepted_mo(x[i, ]$mo), AMR::microorganisms$mo)], collapse = NULL), " (", synonym_mo_to_accepted_mo(x[i, ]$mo), ")."), collapse = NULL)
+        ),
+        ""
+      ),
+      candidates,
+      sep = "\n"
    )
    txt <- paste0(gsub("\n\n", "\n", txt), "\n\n")
  }
@@ -850,19 +858,19 @@ print.mo_renamed <- function(x, extra_txt = "", n = 25, ...) {
    cat(word_wrap("No renamed taxonomy to show. Only renamed taxonomy of the last call of `as.mo()` or any `mo_*()` function are stored.\n", add_fn = font_blue))
    return(invisible(NULL))
  }
-  
+
  x$ref_old[!is.na(x$ref_old)] <- paste0(" (", gsub("et al.", font_italic("et al."), x$ref_old[!is.na(x$ref_old)], fixed = TRUE), ")")
  x$ref_new[!is.na(x$ref_new)] <- paste0(" (", gsub("et al.", font_italic("et al."), x$ref_new[!is.na(x$ref_new)], fixed = TRUE), ")")
  x$ref_old[is.na(x$ref_old)] <- " (author unknown)"
  x$ref_new[is.na(x$ref_new)] <- " (author unknown)"
-  
+
  rows <- seq_len(min(NROW(x), n))
-  
+
  message_(
    "The following microorganism", ifelse(NROW(x) > 1, "s were", " was"), " taxonomically renamed", extra_txt, ":\n",
    paste0("  \u2022 ", font_italic(x$old[rows], collapse = NULL), x$ref_old[rows],
-           "  ->  ", font_italic(x$new[rows], collapse = NULL), x$ref_new[rows],
-           collapse = "\n"
+      "  ->  ", font_italic(x$new[rows], collapse = NULL), x$ref_new[rows],
+      collapse = "\n"
    ),
    ifelse(NROW(x) > n, paste0("\n\nOnly the first ", n, " (out of ", NROW(x), ") are shown. Run `print(mo_renamed(), n = ...)` to view more entries (might be slow), or save `mo_renamed()` to an object."), "")
  )
@@ -874,50 +882,54 @@ convert_colloquial_input <- function(x) {
  x.bak <- trimws2(x)
  x <- trimws2(tolower(x))
  out <- rep(NA_character_, length(x))
-  
+
  # Streptococci, like GBS = Group B Streptococci (B_STRPT_GRPB)
  out[x %like_case% "^g[abcdfghkl]s$"] <- gsub("g([abcdfghkl])s",
-                                               "B_STRPT_GRP\\U\\1",
-                                               x[x %like_case% "^g[abcdfghkl]s$"],
-                                               perl = TRUE)
+    "B_STRPT_GRP\\U\\1",
+    x[x %like_case% "^g[abcdfghkl]s$"],
+    perl = TRUE
+  )
  # Streptococci in different languages, like "estreptococos grupo B"
  out[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"] <- gsub(".*e?strepto[ck]o[ck].* ([abcdfghkl])$",
-                                                               "B_STRPT_GRP\\U\\1",
-                                                               x[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"],
-                                                               perl = TRUE)
+    "B_STRPT_GRP\\U\\1",
+    x[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"],
+    perl = TRUE
+  )
  out[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"] <- gsub(".*group ([abcdfghkl]) strepto[ck]o[ck].*",
-                                                                  "B_STRPT_GRP\\U\\1",
-                                                                  x[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"],
-                                                                  perl = TRUE)
+    "B_STRPT_GRP\\U\\1",
+    x[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"],
+    perl = TRUE
+  )
  out[x %like_case% "ha?emoly.*strep"] <- "B_STRPT_HAEM"
  out[x %like_case% "(strepto.* mil+er+i|^mgs[^a-z]*$)"] <- "B_STRPT_MILL"
  out[x %like_case% "mil+er+i gr"] <- "B_STRPT_MILL"
  out[x %like_case% "((strepto|^s).* viridans|^vgs[^a-z]*$)"] <- "B_STRPT_VIRI"
-  
+  out[x %like_case% "(viridans.* (strepto|^s).*|^vgs[^a-z]*$)"] <- "B_STRPT_VIRI"
+
  # CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese)
  out[x %like_case% "([ck]oagulas[ea].negatie?[vf]|^[ck]o?ns[^a-z]*$)"] <- "B_STPHY_CONS"
  out[x %like_case% "([ck]oagulas[ea].positie?[vf]|^[ck]o?ps[^a-z]*$)"] <- "B_STPHY_COPS"
-  
+
  # Gram stains
  out[x %like_case% "gram[ -]?neg.*|negatie?[vf]"] <- "B_GRAMN"
  out[x %like_case% "gram[ -]?pos.*|positie?[vf]"] <- "B_GRAMP"
-  
+
  # yeasts and fungi
  out[x %like_case% "^yeast?"] <- "F_YEAST"
  out[x %like_case% "^fung(us|i)"] <- "F_FUNGUS"
-  
+
  # Salmonella city names, starting with capital species name - they are all S. enterica
  out[x.bak %like_case% "[sS]almonella [A-Z][a-z]+ ?.*" & x %unlike% "typhi"] <- "B_SLMNL_ENTR"
  out[x %like_case% "salmonella group"] <- "B_SLMNL"
-  
+
  # trivial names known to the field
  out[x %like_case% "meningo[ck]o[ck]"] <- "B_NESSR_MNNG"
  out[x %like_case% "gono[ck]o[ck]"] <- "B_NESSR_GNRR"
  out[x %like_case% "pneumo[ck]o[ck]"] <- "B_STRPT_PNMN"
-  
+
  # unexisting names (xxx and con are WHONET codes)
  out[x %in% c("con", "other", "none", "unknown") | x %like_case% "virus"] <- "UNKNOWN"
-  
+
  out
 }

@@ -1085,8 +1097,9 @@ synonym_mo_to_accepted_mo <- function(x) {
  x_lpsn <- AMR::microorganisms$lpsn_renamed_to[match(x, AMR::microorganisms$mo)]
  x_gbif[!x_gbif %in% AMR::microorganisms$gbif] <- NA
  x_lpsn[!x_lpsn %in% AMR::microorganisms$lpsn] <- NA
-  
+
  ifelse(is.na(x_lpsn),
-         AMR::microorganisms$mo[match(x_gbif, AMR::microorganisms$gbif)],
-         AMR::microorganisms$mo[match(x_lpsn, AMR::microorganisms$lpsn)])
+    AMR::microorganisms$mo[match(x_gbif, AMR::microorganisms$gbif)],
+    AMR::microorganisms$mo[match(x_lpsn, AMR::microorganisms$lpsn)]
+  )
 }
--- a/R/mo_matching_score.R
+++ b/R/mo_matching_score.R
@@ -79,10 +79,10 @@ mo_matching_score <- function(x, n) {

  # only keep one space
  x <- gsub(" +", " ", x)
-  
-  # start with a capital letter
+
+  # force a capital letter, so this conversion will not count as a substitution
  substr(x, 1, 1) <- toupper(substr(x, 1, 1))
-  
+
  # n is always a taxonomically valid full name
  if (length(n) == 1) {
    n <- rep(n, length(x))
@@ -90,19 +90,20 @@ mo_matching_score <- function(x, n) {
  if (length(x) == 1) {
    x <- rep(x, length(n))
  }
-  
+
  # length of fullname
  l_n <- nchar(n)
  lev <- double(length = length(x))
  l_n.lev <- double(length = length(x))
  lev <- unlist(Map(f = function(a, b) {
-    as.double(utils::adist(a, b, 
-                           ignore.case = FALSE,
-                           fixed = TRUE,
-                           costs = c(insertions = 1, deletions = 2, substitutions = 2),
-                           counts = FALSE))
+    as.double(utils::adist(a, b,
+      ignore.case = FALSE,
+      fixed = TRUE,
+      costs = c(insertions = 1, deletions = 2, substitutions = 2),
+      counts = FALSE
+    ))
  }, x, n, USE.NAMES = FALSE))
-  
+
  l_n.lev[l_n < lev] <- l_n[l_n < lev]
  l_n.lev[lev < l_n] <- lev[lev < l_n]
  l_n.lev[lev == l_n] <- lev[lev == l_n]
--- a/R/mo_property.R
+++ b/R/mo_property.R
@@ -376,7 +376,7 @@ mo_status <- function(x, language = get_AMR_locale(), keep_synonyms = getOption(
  meet_criteria(x, allow_NA = TRUE)
  language <- validate_language(language)
  meet_criteria(keep_synonyms, allow_class = "logical", has_length = 1)
-  
+
  translate_into_language(mo_validate(x = x, property = "status", language = language, keep_synonyms = keep_synonyms, ...), language = language, only_unknown = TRUE)
 }

@@ -603,7 +603,7 @@ mo_gbif <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("A
  meet_criteria(x, allow_NA = TRUE)
  language <- validate_language(language)
  meet_criteria(keep_synonyms, allow_class = "logical", has_length = 1)
-  
+
  mo_validate(x = x, property = "gbif", language = language, keep_synonyms = keep_synonyms, ...)
 }

@@ -796,7 +796,7 @@ mo_validate <- function(x, property, language, keep_synonyms = keep_synonyms, ..
    Lancefield <- FALSE
  }
  has_Becker_or_Lancefield <- Becker %in% c(TRUE, "all") || Lancefield %in% c(TRUE, "all")
-  
+
  # get microorganisms data set, but remove synonyms if keep_synonyms is FALSE
  mo_data_check <- AMR::microorganisms[which(AMR::microorganisms$status %in% if (isTRUE(keep_synonyms)) c("synonym", "accepted") else "accepted"), , drop = FALSE]

--- a/R/rsi.R
+++ b/R/rsi.R
@@ -344,7 +344,7 @@ as.rsi.default <- function(x, ...) {
    x <- gsub("^R+$", "R", x)
    x[!x %in% c("S", "I", "R")] <- NA_character_
    na_after <- length(x[is.na(x) | x == ""])
-    
+
    if (!isFALSE(list(...)$warn)) { # so as.rsi(..., warn = FALSE) will never throw a warning
      if (na_before != na_after) {
        list_missing <- x.bak[is.na(x) & !is.na(x.bak) & x.bak != ""] %pm>%
@@ -353,13 +353,13 @@ as.rsi.default <- function(x, ...) {
          vector_and(quotes = TRUE)
        cur_col <- get_current_column()
        warning_("in `as.rsi()`: ", na_after - na_before, " result",
-                 ifelse(na_after - na_before > 1, "s", ""),
-                 ifelse(is.null(cur_col), "", paste0(" in column '", cur_col, "'")),
-                 " truncated (",
-                 round(((na_after - na_before) / length(x)) * 100),
-                 "%) that were invalid antimicrobial interpretations: ",
-                 list_missing,
-                 call = FALSE
+          ifelse(na_after - na_before > 1, "s", ""),
+          ifelse(is.null(cur_col), "", paste0(" in column '", cur_col, "'")),
+          " truncated (",
+          round(((na_after - na_before) / length(x)) * 100),
+          "%) that were invalid antimicrobial interpretations: ",
+          list_missing,
+          call = FALSE
        )
      }
      if (any(toupper(x.bak[!is.na(x.bak)]) == "U") && message_not_thrown_before("as.rsi", "U")) {
--- a/R/sysdata.rda
+++ b/R/sysdata.rda
--- a/R/translate.R
+++ b/R/translate.R
@@ -124,11 +124,14 @@ set_AMR_locale <- function(language) {
  options(AMR_locale = language)
  if (interactive() || identical(Sys.getenv("IN_PKGDOWN"), "true")) {
    # show which language to use now
-    message_("Using ", LANGUAGES_SUPPORTED_NAMES[[language]]$exonym,
-             ifelse(language != "en",
-                    paste0(" (", LANGUAGES_SUPPORTED_NAMES[[language]]$endonym, ")"),
-                    ""), 
-             " for the AMR package for this session.")
+    message_(
+      "Using ", LANGUAGES_SUPPORTED_NAMES[[language]]$exonym,
+      ifelse(language != "en",
+        paste0(" (", LANGUAGES_SUPPORTED_NAMES[[language]]$endonym, ")"),
+        ""
+      ),
+      " for the AMR package for this session."
+    )
  }
 }

--- a/R/zzz.R
+++ b/R/zzz.R
@@ -128,12 +128,12 @@ if (utf8_supported && !is_latex) {
  s3_register("vctrs::vec_cast", "character.mic")
  s3_register("vctrs::vec_cast", "double.mic")
  s3_register("vctrs::vec_math", "mic")
-  
+
  # if mo source exists, fire it up (see mo_source())
  if (tryCatch(file.exists(getOption("AMR_mo_source", "~/mo_source.rds")), error = function(e) FALSE)) {
    invisible(get_mo_source())
  }
-  
+
  # be sure to print tibbles as tibbles
  if (pkg_is_available("tibble", also_load = FALSE)) {
    loadNamespace("tibble")
--- a/data-raw/_pre_commit_hook.R
+++ b/data-raw/_pre_commit_hook.R
@@ -104,7 +104,7 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
    MO_staph[which(MO_staph$species %in% c(
      "coagulase-negative", "argensis", "arlettae",
      "auricularis", "borealis", "caeli", "capitis", "caprae",
-      "carnosus", "casei", "chromogenes", "cohnii", "condimenti",
+      "carnosus", "casei", "caseolyticus", "chromogenes", "cohnii", "condimenti",
      "croceilyticus",
      "debuckii", "devriesei", "edaphicus", "epidermidis",
      "equorum", "felis", "fleurettii", "gallinarum",
@@ -118,7 +118,7 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
      "vitulinus", "vitulus", "warneri", "xylosus",
      "caledonicus", "canis",
      "durrellii", "lloydii",
-      "ratti", "taiwanensis"
+      "ratti", "taiwanensis", "veratri", "urealyticus"
    ) |
      # old, now renamed to S. schleiferi (but still as synonym in our data of course):
      (MO_staph$species == "schleiferi" & MO_staph$subspecies %in% c("schleiferi", ""))),
--- a/data-raw/reproduction_of_microorganisms.R
+++ b/data-raw/reproduction_of_microorganisms.R
@@ -1005,7 +1005,7 @@ taxonomy <- taxonomy %>%
 # Remove unwanted taxonomic entries from Protoza/Fungi --------------------

 # this must be done after the microbial ID generation, since it will otherwise generate a lot of different IDs
-taxonomy <- taxonomy %>% 
+taxonomy <- taxonomy %>%
  filter(
    # Protozoa:
    !(phylum %in% c("Choanozoa", "Mycetozoa") & prevalence == 3),
@@ -1016,7 +1016,8 @@ taxonomy <- taxonomy %>%
    # Animalia:
    !genus %in% c("Lucilia", "Lumbricus"),
    !(genus %in% c("Aedes", "Anopheles") & rank %in% c("species", "subspecies")), # only genus of the many hundreds of mosquitoes species
-    kingdom != "Plantae") # this kingdom only contained Curvularia and Hymenolepis, which have coincidental twin names with Fungi
+    kingdom != "Plantae"
+  ) # this kingdom only contained Curvularia and Hymenolepis, which have coincidental twin names with Fungi

 message("\nCongratulations! The new taxonomic table will contain ", format(nrow(taxonomy), big.mark = ","), " rows.\n")

@@ -1071,7 +1072,7 @@ taxonomy$lpsn_renamed_to[which(taxonomy$fullname == "Moraxella catarrhalis")]
 taxonomy$status[which(taxonomy$fullname == "Moraxella catarrhalis")] <- "accepted"
 taxonomy$lpsn_renamed_to[which(taxonomy$fullname == "Moraxella catarrhalis")] <- NA_character_

-taxonomy <- taxonomy %>% 
+taxonomy <- taxonomy %>%
  AMR:::dataset_UTF8_to_ASCII()


--- a/inst/tinytest/test-data.R
+++ b/inst/tinytest/test-data.R
@@ -68,10 +68,10 @@ df <- AMR:::MO_lookup
 expect_true(nrow(df[which(df$prevalence == 1), , drop = FALSE]) < nrow(df[which(df$prevalence == 2), , drop = FALSE]))
 expect_true(nrow(df[which(df$prevalence == 2), , drop = FALSE]) < nrow(df[which(df$prevalence == 3), , drop = FALSE]))
 expect_true(all(c(
-  "mo", "fullname",
-  "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies",
-  "rank", "ref", "lpsn", "gbif", "status", "source", "prevalence", "snomed",
-  "kingdom_index", "fullname_lower", "g_species"
+  "mo", "fullname", "status", "kingdom", "phylum", "class", "order", 
+  "family", "genus", "species", "subspecies", "rank", "ref", "source", 
+  "lpsn", "lpsn_parent", "lpsn_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence", 
+  "snomed", "kingdom_index", "fullname_lower", "full_first", "species_first"
 ) %in% colnames(df)))

 expect_inherits(AMR:::MO_CONS, "mo")
@@ -87,7 +87,8 @@ expect_true(NROW(uncategorised) == 0,
    "All staphylococcal species categorised as CoNS/CoPS.",
    paste0(
      "Staphylococcal species not categorised as CoNS/CoPS: S. ",
-      uncategorised$species, " (", uncategorised$mo, ")"
+      uncategorised$species, " (", uncategorised$mo, ")",
+      collapse = "\n"
    )
  )
 )
--- a/inst/tinytest/test-eucast_rules.R
+++ b/inst/tinytest/test-eucast_rules.R
@@ -41,7 +41,7 @@ expect_equal(
 )
 MOs_mentioned <- unique(AMR:::EUCAST_RULES_DF$this_value)
 MOs_mentioned <- sort(trimws(unlist(strsplit(MOs_mentioned[!AMR:::is_valid_regex(MOs_mentioned)], ",", fixed = TRUE))))
-MOs_test <- suppressWarnings(suppressMessages(mo_name(MOs_mentioned)))
+MOs_test <- suppressWarnings(suppressMessages(mo_name(MOs_mentioned, keep_synonyms = TRUE, language = NULL)))
 expect_true(length(MOs_mentioned[MOs_test != MOs_mentioned]) == 0)

 expect_error(suppressWarnings(eucast_rules(example_isolates, col_mo = "Non-existing")))
--- a/inst/tinytest/test-mo_property.R
+++ b/inst/tinytest/test-mo_property.R
@@ -59,7 +59,7 @@ expect_inherits(mo_synonyms(c("Candida albicans", "Escherichia coli")), "list")
 expect_equal(names(mo_info("Escherichia coli")), c(
  "kingdom", "phylum", "class", "order",
  "family", "genus", "species", "subspecies",
-  "synonyms", "gramstain", "url", "ref",
+  "status", "synonyms", "gramstain", "url", "ref",
  "snomed"
 ))
 expect_inherits(mo_info(c("Escherichia coli", "Staphylococcus aureus")), "list")
@@ -73,7 +73,7 @@ expect_true(mo_url("Escherichia coli") %like% "lpsn.dsmz.de")

 # test integrity
 MOs <- microorganisms
-expect_identical(MOs$fullname, mo_fullname(MOs$fullname, language = "en"))
+expect_identical(MOs$fullname, mo_fullname(MOs$fullname, language = "en", keep_synonyms = TRUE))

 # check languages
 expect_equal(mo_type("Escherichia coli", language = "de"), "Bakterien")
@@ -81,13 +81,13 @@ expect_equal(mo_gramstain("Escherichia coli", language = "nl"), "Gram-negatief")

 gr <- mo_gramstain("Escherichia coli", language = NULL)
 for (l in AMR:::LANGUAGES_SUPPORTED[-1]) {
-  expect_false(mo_gramstain("Escherichia coli", language = l) == gr, info = paste("Gram-stain in langauge", l))
+  expect_false(mo_gramstain("Escherichia coli", language = l) == gr, info = paste("Gram-stain in language", l))
 }

 expect_error(mo_gramstain("Escherichia coli", language = "UNKNOWN"))
-dutch <- mo_name(microorganisms$fullname[which(microorganisms$fullname %unlike% "unknown|coagulase")], language = "nl") # should be transformable to English again
-expect_identical(mo_name(dutch, language = NULL),
-                 microorganisms$fullname[which(microorganisms$fullname %unlike% "unknown|coagulase")]) # gigantic test - will run ALL names
+dutch <- mo_name(microorganisms$fullname[which(microorganisms$fullname %unlike% "unknown|coagulase|Fungi")], language = "nl", keep_synonyms = TRUE) # should be transformable to English again
+expect_identical(mo_name(dutch, language = NULL, keep_synonyms = TRUE),
+                 microorganisms$fullname[which(microorganisms$fullname %unlike% "unknown|coagulase|Fungi")]) # gigantic test - will run ALL names

 # manual property function
 expect_error(mo_property("Escherichia coli", property = c("genus", "fullname")))
--- a/man/as.mo.Rd
+++ b/man/as.mo.Rd
@@ -177,7 +177,7 @@ as.mo(c(
  "VISA", # Vancomycin Intermediate S. aureus
  "VRSA", # Vancomycin Resistant S. aureus
  115329001 # SNOMED CT code
-)) 
+))

 # Dyslexia is no problem - these all work:
 as.mo(c(
--- a/man/custom_eucast_rules.Rd
+++ b/man/custom_eucast_rules.Rd
@@ -32,10 +32,10 @@ These are two custom EUCAST rules: if TZP (piperacillin/tazobactam) is "S", all

 \if{html}{\out{<div class="sourceCode r">}}\preformatted{x
 #> A set of custom EUCAST rules:
-#> 
+#>
 #>   1. If TZP is "S" then set to  S :
 #>      amoxicillin (AMX), ampicillin (AMP)
-#> 
+#>
 #>   2. If TZP is "R" then set to  R :
 #>      amoxicillin (AMX), ampicillin (AMP)
 }\if{html}{\out{</div>}}
--- a/tests/tinytest.R
+++ b/tests/tinytest.R
@@ -76,4 +76,3 @@ if (identical(Sys.getenv("R_RUN_TINYTEST"), "true")) {
    print(summary(out))
  }
 }
-