remove MO_lookup from sysdata.rda

2026-03-14 16:47:48 +01:00 · 2023-01-21 19:55:19 +01:00
parent 79c8415d3e
commit 8a92c5a869
16 changed files with 89 additions and 236 deletions
--- a/R/aa_amr-package.R
+++ b/R/aa_amr-package.R
@@ -32,7 +32,7 @@
 #' @description
 #' Welcome to the `AMR` package.
 #'
-#' The `AMR` package is a [free and open-source](https://msberends.github.io/AMR#copyright) R package with [zero dependencies](https://en.wikipedia.org/wiki/Dependency_hell) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. **Our aim is to provide a standard** for clean and reproducible AMR data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting. [Many different researchers](https://msberends.github.io/AMR/authors.html) from around the globe are continually helping us to make this a successful and durable project!
+#' The `AMR` package is a [free and open-source](https://msberends.github.io/AMR/#copyright) R package with [zero dependencies](https://en.wikipedia.org/wiki/Dependency_hell) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. **Our aim is to provide a standard** for clean and reproducible AMR data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting. [Many different researchers](https://msberends.github.io/AMR/authors.html) from around the globe are continually helping us to make this a successful and durable project!
 #' 
 #' This work was published in the Journal of Statistical Software (Volume 104(3); [DOI 10.18637/jss.v104.i03](https://doi.org/10.18637/jss.v104.i03)) and formed the basis of two PhD theses ([DOI 10.33612/diss.177417131](https://doi.org/10.33612/diss.177417131) and [DOI 10.33612/diss.192486375](https://doi.org/10.33612/diss.192486375)).
 #' 
--- a/R/aa_helper_functions.R
+++ b/R/aa_helper_functions.R
@@ -225,6 +225,8 @@ search_type_in_df <- function(x, type, info = TRUE) {

  # -- mo
  if (type == "mo") {
+    add_MO_lookup_to_AMR_env()
+    
    if (any(vapply(FUN.VALUE = logical(1), x, is.mo))) {
      # take first 'mo' column
      found <- colnames(x)[vapply(FUN.VALUE = logical(1), x, is.mo)]
@@ -1379,6 +1381,38 @@ add_intrinsic_resistance_to_AMR_env <- function() {
  }
 }

+add_MO_lookup_to_AMR_env <- function() {
+  # for all MO functions, saves a lot of time on package load and in package size
+  if (is.null(AMR_env$MO_lookup)) {
+    MO_lookup <- AMR::microorganisms
+    
+    MO_lookup$kingdom_index <- NA_real_
+    MO_lookup[which(MO_lookup$kingdom == "Bacteria" | MO_lookup$mo == "UNKNOWN"), "kingdom_index"] <- 1
+    MO_lookup[which(MO_lookup$kingdom == "Fungi"), "kingdom_index"] <- 2
+    MO_lookup[which(MO_lookup$kingdom == "Protozoa"), "kingdom_index"] <- 3
+    MO_lookup[which(MO_lookup$kingdom == "Archaea"), "kingdom_index"] <- 4
+    # all the rest
+    MO_lookup[which(is.na(MO_lookup$kingdom_index)), "kingdom_index"] <- 5
+    
+    # the fullname lowercase, important for the internal algorithms in as.mo()
+    MO_lookup$fullname_lower <- tolower(trimws(paste(
+      MO_lookup$genus,
+      MO_lookup$species,
+      MO_lookup$subspecies
+    )))
+    ind <- MO_lookup$genus == "" | grepl("^[(]unknown ", MO_lookup$fullname, perl = TRUE)
+    MO_lookup[ind, "fullname_lower"] <- tolower(MO_lookup[ind, "fullname", drop = TRUE])
+    MO_lookup$fullname_lower <- trimws(gsub("[^.a-z0-9/ \\-]+", "", MO_lookup$fullname_lower, perl = TRUE))
+    # special for Salmonella - they have cities as subspecies but not the species (enterica) in the fullname:
+    MO_lookup$fullname_lower[which(MO_lookup$subspecies %like_case% "^[A-Z]")] <- gsub(" enterica ", " ", MO_lookup$fullname_lower[which(MO_lookup$subspecies %like_case% "^[A-Z]")], fixed = TRUE)
+    
+    MO_lookup$full_first <- substr(MO_lookup$fullname_lower, 1, 1)
+    MO_lookup$species_first <- tolower(substr(MO_lookup$species, 1, 1)) # tolower for groups (Streptococcus, Salmonella)
+    MO_lookup$subspecies_first <- tolower(substr(MO_lookup$subspecies, 1, 1)) # tolower for Salmonella serovars
+    AMR_env$MO_lookup <- MO_lookup
+  }
+}
+
 trimws2 <- function(..., whitespace = "[\u0009\u000A\u000B\u000C\u000D\u0020\u0085\u00A0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u200C\u200D\u2028\u2029\u202F\u205F\u2060\u3000\uFEFF]") {
  # this is even faster than trimws() itself which sets " \t\n\r".
  trimws(..., whitespace = whitespace)
--- a/R/custom_microorganisms.R
+++ b/R/custom_microorganisms.R
@@ -124,6 +124,8 @@ add_custom_microorganisms <- function(x) {
  meet_criteria(x, allow_class = "data.frame")
  stop_ifnot("genus" %in% tolower(colnames(x)), paste0("`x` must contain column 'genus'."))
  
+  add_MO_lookup_to_AMR_env()
+  
  # remove any extra class/type, such as grouped tbl, or data.table:
  x <- as.data.frame(x, stringsAsFactors = FALSE)
  colnames(x) <- tolower(colnames(x))
@@ -269,7 +271,11 @@ add_custom_microorganisms <- function(x) {
 #' @export
 clear_custom_microorganisms <- function() {
  n <- nrow(AMR_env$MO_lookup)
-  AMR_env$MO_lookup <- create_MO_lookup()
+  
+  # reset
+  AMR_env$MO_lookup <- NULL
+  add_MO_lookup_to_AMR_env()
+  
  n2 <- nrow(AMR_env$MO_lookup)
  AMR_env$custom_mo_codes <- character(0)
  AMR_env$mo_previously_coerced <- AMR_env$mo_previously_coerced[which(AMR_env$mo_previously_coerced$mo %in% AMR_env$MO_lookup$mo), , drop = FALSE]
--- a/R/eucast_rules.R
+++ b/R/eucast_rules.R
@@ -182,6 +182,8 @@ eucast_rules <- function(x,
  meet_criteria(only_sir_columns, allow_class = "logical", has_length = 1)
  meet_criteria(custom_rules, allow_class = "custom_eucast_rules", allow_NULL = TRUE)

+  add_MO_lookup_to_AMR_env()
+
  if ("custom" %in% rules && is.null(custom_rules)) {
    warning_("in `eucast_rules()`: no custom rules were set with the `custom_rules` argument",
      immediate = TRUE
--- a/R/italicise_taxonomy.R
+++ b/R/italicise_taxonomy.R
@@ -51,6 +51,8 @@ italicise_taxonomy <- function(string, type = c("markdown", "ansi")) {
  meet_criteria(string, allow_class = "character")
  meet_criteria(type, allow_class = "character", has_length = 1, is_in = c("markdown", "ansi"))

+  add_MO_lookup_to_AMR_env()
+
  if (type == "markdown") {
    before <- "*"
    after <- "*"
--- a/R/join_microorganisms.R
+++ b/R/join_microorganisms.R
@@ -127,6 +127,8 @@ anti_join_microorganisms <- function(x, by = NULL, ...) {
 }

 join_microorganisms <- function(type, x, by, suffix, ...) {
+  add_MO_lookup_to_AMR_env()
+  
  if (!is.data.frame(x)) {
    if (pkg_is_available("tibble", also_load = FALSE)) {
      x <- import_fn("tibble", "tibble")(mo = x)
--- a/R/mdro.R
+++ b/R/mdro.R
@@ -325,7 +325,7 @@ mdro <- function(x = NULL,
      "No column found as input for `col_mo`, ",
      font_bold(paste0("assuming all rows contain ", font_italic("Mycobacterium tuberculosis"), "."))
    )
-    x$mo <- as.mo("Mycobacterium tuberculosis") # consider overkill at all times: AMR_env$MO_lookup[which(AMR_env$MO_lookup$fullname == "Mycobacterium tuberculosis"), "mo", drop = TRUE]
+    x$mo <- as.mo("Mycobacterium tuberculosis", keep_synonyms = TRUE)
    col_mo <- "mo"
  }
  stop_if(is.null(col_mo), "`col_mo` must be set")
@@ -382,120 +382,16 @@ mdro <- function(x = NULL,
    cols_ab <- get_column_abx(
      x = x,
      soft_dependencies = c(
-        # [table] 1 (S aureus):
-        "GEN",
-        "RIF",
-        "CPT",
-        "OXA",
-        "CIP",
-        "MFX",
-        "SXT",
-        "FUS",
-        "VAN",
-        "TEC",
-        "TLV",
-        "TGC",
-        "CLI",
-        "DAP",
-        "ERY",
-        "LNZ",
-        "CHL",
-        "FOS",
-        "QDA",
-        "TCY",
-        "DOX",
-        "MNO",
+        # [table] 1 (S aureus)
+        "GEN", "RIF", "CPT", "OXA", "CIP", "MFX", "SXT", "FUS", "VAN", "TEC", "TLV", "TGC", "CLI", "DAP", "ERY", "LNZ", "CHL", "FOS", "QDA", "TCY", "DOX", "MNO",
        # [table] 2 (Enterococcus)
-        "GEH",
-        "STH",
-        "IPM",
-        "MEM",
-        "DOR",
-        "CIP",
-        "LVX",
-        "MFX",
-        "VAN",
-        "TEC",
-        "TGC",
-        "DAP",
-        "LNZ",
-        "AMP",
-        "QDA",
-        "DOX",
-        "MNO",
+        "GEH", "STH", "IPM", "MEM", "DOR", "CIP", "LVX", "MFX", "VAN", "TEC", "TGC", "DAP", "LNZ", "AMP", "QDA", "DOX", "MNO",
        # [table] 3 (Enterobacteriaceae)
-        "GEN",
-        "TOB",
-        "AMK",
-        "NET",
-        "CPT",
-        "TCC",
-        "TZP",
-        "ETP",
-        "IPM",
-        "MEM",
-        "DOR",
-        "CZO",
-        "CXM",
-        "CTX",
-        "CAZ",
-        "FEP",
-        "FOX",
-        "CTT",
-        "CIP",
-        "SXT",
-        "TGC",
-        "ATM",
-        "AMP",
-        "AMC",
-        "SAM",
-        "CHL",
-        "FOS",
-        "COL",
-        "TCY",
-        "DOX",
-        "MNO",
+        "GEN", "TOB", "AMK", "NET", "CPT", "TCC", "TZP", "ETP", "IPM", "MEM", "DOR", "CZO", "CXM", "CTX", "CAZ", "FEP", "FOX", "CTT", "CIP", "SXT", "TGC", "ATM", "AMP", "AMC", "SAM", "CHL", "FOS", "COL", "TCY", "DOX", "MNO",
        # [table] 4 (Pseudomonas)
-        "GEN",
-        "TOB",
-        "AMK",
-        "NET",
-        "IPM",
-        "MEM",
-        "DOR",
-        "CAZ",
-        "FEP",
-        "CIP",
-        "LVX",
-        "TCC",
-        "TZP",
-        "ATM",
-        "FOS",
-        "COL",
-        "PLB",
+        "GEN", "TOB", "AMK", "NET", "IPM", "MEM", "DOR", "CAZ", "FEP", "CIP", "LVX", "TCC", "TZP", "ATM", "FOS", "COL", "PLB",
        # [table] 5 (Acinetobacter)
-        "GEN",
-        "TOB",
-        "AMK",
-        "NET",
-        "IPM",
-        "MEM",
-        "DOR",
-        "CIP",
-        "LVX",
-        "TZP",
-        "TCC",
-        "CTX",
-        "CRO",
-        "CAZ",
-        "FEP",
-        "SXT",
-        "SAM",
-        "COL",
-        "PLB",
-        "TCY",
-        "DOX",
-        "MNO"
+        "GEN", "TOB", "AMK", "NET", "IPM", "MEM", "DOR", "CIP", "LVX", "TZP", "TCC", "CTX", "CRO", "CAZ", "FEP", "SXT", "SAM", "COL", "PLB", "TCY", "DOX", "MNO"
      ),
      verbose = verbose,
      info = info,
@@ -506,30 +402,7 @@ mdro <- function(x = NULL,
  } else if (guideline$code == "eucast3.2") {
    cols_ab <- get_column_abx(
      x = x,
-      soft_dependencies = c(
-        "AMP",
-        "AMX",
-        "CIP",
-        "DAL",
-        "DAP",
-        "ERV",
-        "FDX",
-        "GEN",
-        "LNZ",
-        "MEM",
-        "MTR",
-        "OMC",
-        "ORI",
-        "PEN",
-        "QDA",
-        "RIF",
-        "TEC",
-        "TGC",
-        "TLV",
-        "TOB",
-        "TZD",
-        "VAN"
-      ),
+      soft_dependencies = c("AMP", "AMX", "CIP", "DAL", "DAP", "ERV", "FDX", "GEN", "LNZ", "MEM", "MTR", "OMC", "ORI", "PEN", "QDA", "RIF", "TEC", "TGC", "TLV", "TOB", "TZD", "VAN"),
      verbose = verbose,
      info = info,
      only_sir_columns = only_sir_columns,
@@ -539,30 +412,7 @@ mdro <- function(x = NULL,
  } else if (guideline$code == "eucast3.3") {
    cols_ab <- get_column_abx(
      x = x,
-      soft_dependencies = c(
-        "AMP",
-        "AMX",
-        "CIP",
-        "DAL",
-        "DAP",
-        "ERV",
-        "FDX",
-        "GEN",
-        "LNZ",
-        "MEM",
-        "MTR",
-        "OMC",
-        "ORI",
-        "PEN",
-        "QDA",
-        "RIF",
-        "TEC",
-        "TGC",
-        "TLV",
-        "TOB",
-        "TZD",
-        "VAN"
-      ),
+      soft_dependencies = c("AMP", "AMX", "CIP", "DAL", "DAP", "ERV", "FDX", "GEN", "LNZ", "MEM", "MTR", "OMC", "ORI", "PEN", "QDA", "RIF", "TEC", "TGC", "TLV", "TOB", "TZD", "VAN"),
      verbose = verbose,
      info = info,
      only_sir_columns = only_sir_columns,
@@ -572,16 +422,7 @@ mdro <- function(x = NULL,
  } else if (guideline$code == "tb") {
    cols_ab <- get_column_abx(
      x = x,
-      soft_dependencies = c(
-        "CAP",
-        "ETH",
-        "GAT",
-        "INH",
-        "PZA",
-        "RIF",
-        "RIB",
-        "RFP"
-      ),
+      soft_dependencies = c("CAP", "ETH", "GAT", "INH", "PZA", "RIF", "RIB", "RFP"),
      verbose = verbose,
      info = info,
      only_sir_columns = only_sir_columns,
@@ -591,14 +432,7 @@ mdro <- function(x = NULL,
  } else if (guideline$code == "mrgn") {
    cols_ab <- get_column_abx(
      x = x,
-      soft_dependencies = c(
-        "PIP",
-        "CTX",
-        "CAZ",
-        "IPM",
-        "MEM",
-        "CIP"
-      ),
+      soft_dependencies = c("PIP", "CTX", "CAZ", "IPM", "MEM", "CIP"),
      verbose = verbose,
      info = info,
      only_sir_columns = only_sir_columns,
--- a/R/mo.R
+++ b/R/mo.R
@@ -164,6 +164,8 @@ as.mo <- function(x,
  language <- validate_language(language)
  meet_criteria(info, allow_class = "logical", has_length = 1)

+  add_MO_lookup_to_AMR_env()
+
  if (tryCatch(all(x %in% c(AMR_env$MO_lookup$mo, NA)) &&
    isFALSE(Becker) &&
    isFALSE(Lancefield), error = function(e) FALSE)) {
@@ -492,6 +494,7 @@ mo_uncertainties <- function() {
 #' @rdname as.mo
 #' @export
 mo_renamed <- function() {
+  add_MO_lookup_to_AMR_env()
  x <- AMR_env$mo_renamed

  x$new <- synonym_mo_to_accepted_mo(x$old)
@@ -547,6 +550,7 @@ mo_cleaning_regex <- function() {

 # will be exported using s3_register() in R/zzz.R
 pillar_shaft.mo <- function(x, ...) {
+  add_MO_lookup_to_AMR_env()
  out <- format(x)
  # grey out the kingdom (part until first "_")
  out[!is.na(x)] <- gsub("^([A-Z]+_)(.*)", paste0(font_subtle("\\1"), "\\2"), out[!is.na(x)], perl = TRUE)
@@ -664,6 +668,7 @@ get_skimmers.mo <- function(column) {
 #' @export
 #' @noRd
 print.mo <- function(x, print.shortnames = FALSE, ...) {
+  add_MO_lookup_to_AMR_env()
  cat("Class 'mo'\n")
  x_names <- names(x)
  if (is.null(x_names) & print.shortnames == TRUE) {
@@ -704,6 +709,7 @@ summary.mo <- function(object, ...) {
 #' @export
 #' @noRd
 as.data.frame.mo <- function(x, ...) {
+  add_MO_lookup_to_AMR_env()
  if (!all(x %in% c(AMR_env$MO_lookup$mo, NA))) {
    warning_(
      "The data contains old MO codes (from a previous AMR package version). ",
@@ -741,6 +747,7 @@ as.data.frame.mo <- function(x, ...) {
  y <- NextMethod()
  attributes(y) <- attributes(i)
  # must only contain valid MOs
+  add_MO_lookup_to_AMR_env()
  return_after_integrity_check(y, "microorganism code", as.character(AMR_env$MO_lookup$mo))
 }
 #' @method [[<- mo
@@ -750,6 +757,7 @@ as.data.frame.mo <- function(x, ...) {
  y <- NextMethod()
  attributes(y) <- attributes(i)
  # must only contain valid MOs
+  add_MO_lookup_to_AMR_env()
  return_after_integrity_check(y, "microorganism code", as.character(AMR_env$MO_lookup$mo))
 }
 #' @method c mo
@@ -759,6 +767,7 @@ c.mo <- function(...) {
  x <- list(...)[[1L]]
  y <- NextMethod()
  attributes(y) <- attributes(x)
+  add_MO_lookup_to_AMR_env()
  return_after_integrity_check(y, "microorganism code", as.character(AMR_env$MO_lookup$mo))
 }

@@ -788,6 +797,8 @@ print.mo_uncertainties <- function(x, ...) {
    cat(word_wrap("No uncertainties to show. Only uncertainties of the last call of `as.mo()` or any `mo_*()` function are stored.\n\n", add_fn = font_blue))
    return(invisible(NULL))
  }
+  
+  add_MO_lookup_to_AMR_env()

  cat(word_wrap("Matching scores are based on the resemblance between the input and the full taxonomic name, and the pathogenicity in humans. See `?mo_matching_score`.\n\n", add_fn = font_blue))
  if (has_colour()) {
@@ -1049,6 +1060,7 @@ replace_old_mo_codes <- function(x, property) {
  # B_ESCH_COL (AMR v0.5.0) -> B_ESCHR_COLI
  ind <- x %like_case% "^[A-Z]_[A-Z_]+$" & !x %in% AMR_env$MO_lookup$mo
  if (any(ind, na.rm = TRUE)) {
+    add_MO_lookup_to_AMR_env()
    # get the ones that match
    affected <- x[ind]
    affected_unique <- unique(affected)
--- a/R/mo_matching_score.R
+++ b/R/mo_matching_score.R
@@ -84,6 +84,8 @@ mo_matching_score <- function(x, n) {
  meet_criteria(x, allow_class = c("character", "data.frame", "list"))
  meet_criteria(n, allow_class = "character")

+  add_MO_lookup_to_AMR_env()
+
  x <- parse_and_convert(x)
  # no dots and other non-whitespace characters
  x <- gsub("[^a-zA-Z0-9 \\(\\)]+", "", x)
--- a/R/mo_property.R
+++ b/R/mo_property.R
@@ -417,6 +417,8 @@ mo_pathogenicity <- function(x, language = get_AMR_locale(), keep_synonyms = get
  language <- validate_language(language)
  meet_criteria(keep_synonyms, allow_class = "logical", has_length = 1)

+  add_MO_lookup_to_AMR_env()
+
  x.mo <- as.mo(x, language = language, keep_synonyms = keep_synonyms, ...)
  metadata <- get_mo_uncertainties()

@@ -725,6 +727,8 @@ mo_synonyms <- function(x, language = get_AMR_locale(), keep_synonyms = getOptio
  meet_criteria(x, allow_NA = TRUE)
  language <- validate_language(language)
  meet_criteria(keep_synonyms, allow_class = "logical", has_length = 1)
+  
+  add_MO_lookup_to_AMR_env()

  x.mo <- as.mo(x, language = language, keep_synonyms = keep_synonyms, ...)
  metadata <- get_mo_uncertainties()
@@ -811,6 +815,8 @@ mo_url <- function(x, open = FALSE, language = get_AMR_locale(), keep_synonyms =
  meet_criteria(open, allow_class = "logical", has_length = 1)
  language <- validate_language(language)
  meet_criteria(keep_synonyms, allow_class = "logical", has_length = 1)
+  
+  add_MO_lookup_to_AMR_env()

  x.mo <- as.mo(x = x, language = language, keep_synonyms = keep_synonyms, ... = ...)
  metadata <- get_mo_uncertainties()
@@ -847,7 +853,7 @@ mo_property <- function(x, property = "fullname", language = get_AMR_locale(), k
    x <- find_mo_col(fn = "mo_property")
  }
  meet_criteria(x, allow_NA = TRUE)
-  meet_criteria(property, allow_class = "character", has_length = 1, is_in = colnames(AMR_env$MO_lookup))
+  meet_criteria(property, allow_class = "character", has_length = 1, is_in = colnames(AMR::microorganisms))
  language <- validate_language(language)
  meet_criteria(keep_synonyms, allow_class = "logical", has_length = 1)

@@ -855,7 +861,8 @@ mo_property <- function(x, property = "fullname", language = get_AMR_locale(), k
 }

 mo_validate <- function(x, property, language, keep_synonyms = keep_synonyms, ...) {
-
+  add_MO_lookup_to_AMR_env()
+  
  # try to catch an error when inputting an invalid argument
  # so the 'call.' can be set to FALSE
  tryCatch(x[1L] %in% unlist(AMR_env$MO_lookup[1, property, drop = TRUE]),
--- a/R/mo_source.R
+++ b/R/mo_source.R
@@ -261,6 +261,8 @@ get_mo_source <- function(destination = getOption("AMR_mo_source", "~/mo_source.
 }

 check_validity_mo_source <- function(x, refer_to_name = "`reference_df`", stop_on_error = TRUE) {
+  add_MO_lookup_to_AMR_env()
+  
  if (paste(deparse(substitute(x)), collapse = "") == "get_mo_source()") {
    return(TRUE)
  }
--- a/R/sysdata.rda
+++ b/R/sysdata.rda
--- a/R/vctrs.R
+++ b/R/vctrs.R
@@ -95,6 +95,7 @@ vec_cast.character.mo <- function(x, to, ...) {
  as.character(x)
 }
 vec_cast.mo.character <- function(x, to, ...) {
+  add_MO_lookup_to_AMR_env()
  return_after_integrity_check(x, "microorganism code", as.character(AMR_env$MO_lookup$mo))
 }

--- a/R/zzz.R
+++ b/R/zzz.R
@@ -181,11 +181,10 @@ if (utf8_supported && !is_latex) {
    try(loadNamespace("tibble"), silent = TRUE)
  }

-  # reference data - they have additional columns compared to `antibiotics` and `microorganisms` to improve speed
+  # reference data - they have additional to improve algorithm speed
  # they cannot be part of R/sysdata.rda since CRAN thinks it would make the package too large (+3 MB)
-  AMR_env$AB_lookup <- create_AB_lookup()
-  AMR_env$AV_lookup <- create_AV_lookup()
-  AMR_env$MO_lookup <- create_MO_lookup()
+  AMR_env$AB_lookup <- cbind(AMR::antibiotics, AB_LOOKUP)
+  AMR_env$AV_lookup <- cbind(AMR::antivirals, AV_LOOKUP)
 }

 .onAttach <- function(lib, pkg) {
@@ -208,36 +207,3 @@ if (utf8_supported && !is_latex) {
    }, error = function(e) packageStartupMessage("Failed: ", e$message))
  }
 }
-
-# Helper functions --------------------------------------------------------
-
-create_AB_lookup <- function() {
-  cbind(AMR::antibiotics, AB_LOOKUP)
-}
-
-create_AV_lookup <- function() {
-  cbind(AMR::antivirals, AV_LOOKUP)
-}
-
-create_MO_lookup <- function() {
-  MO_lookup <- AMR::microorganisms
-
-  MO_lookup$kingdom_index <- NA_real_
-  MO_lookup[which(MO_lookup$kingdom == "Bacteria" | MO_lookup$mo == "UNKNOWN"), "kingdom_index"] <- 1
-  MO_lookup[which(MO_lookup$kingdom == "Fungi"), "kingdom_index"] <- 2
-  MO_lookup[which(MO_lookup$kingdom == "Protozoa"), "kingdom_index"] <- 3
-  MO_lookup[which(MO_lookup$kingdom == "Archaea"), "kingdom_index"] <- 4
-  # all the rest
-  MO_lookup[which(is.na(MO_lookup$kingdom_index)), "kingdom_index"] <- 5
-  
-  if (length(MO_FULLNAME_LOWER) != nrow(MO_lookup)) {
-    packageStartupMessage("fullname_lower not same size - applied tolower(), update sysdata.rda!")
-    MO_lookup$fullname_lower <- tolower(MO_lookup$fullname)
-  } else {
-    MO_lookup$fullname_lower <- MO_FULLNAME_LOWER
-  }
-  MO_lookup$full_first <- substr(MO_lookup$fullname_lower, 1, 1)
-  MO_lookup$species_first <- tolower(substr(MO_lookup$species, 1, 1)) # tolower for groups (Streptococcus, Salmonella)
-  MO_lookup$subspecies_first <- tolower(substr(MO_lookup$subspecies, 1, 1)) # tolower for Salmonella serovars
-  MO_lookup
-}
--- a/data-raw/_pre_commit_hook.R
+++ b/data-raw/_pre_commit_hook.R
@@ -144,21 +144,6 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
    ]
  }
 }
-create_MO_fullname_lower <- function() {
-  AMR_env$MO_lookup <- AMR::microorganisms
-  # use this paste instead of `fullname` to work with Viridans Group Streptococci, etc.
-  AMR_env$MO_lookup$fullname_lower <- tolower(trimws(paste(
-    AMR_env$MO_lookup$genus,
-    AMR_env$MO_lookup$species,
-    AMR_env$MO_lookup$subspecies
-  )))
-  ind <- AMR_env$MO_lookup$genus == "" | grepl("^[(]unknown ", AMR_env$MO_lookup$fullname, perl = TRUE)
-  AMR_env$MO_lookup[ind, "fullname_lower"] <- tolower(AMR_env$MO_lookup[ind, "fullname", drop = TRUE])
-  AMR_env$MO_lookup$fullname_lower <- trimws(gsub("[^.a-z0-9/ \\-]+", "", AMR_env$MO_lookup$fullname_lower, perl = TRUE))
-  # special for Salmonella - they have cities as subspecies but not the species (enterica) in the fullname:
-  AMR_env$MO_lookup$fullname_lower[which(AMR_env$MO_lookup$subspecies %like_case% "^[A-Z]")] <- gsub(" enterica ", " ", AMR_env$MO_lookup$fullname_lower[which(AMR_env$MO_lookup$subspecies %like_case% "^[A-Z]")], fixed = TRUE)
-  AMR_env$MO_lookup$fullname_lower
-}
 MO_CONS <- create_species_cons_cops("CoNS")
 MO_COPS <- create_species_cons_cops("CoPS")
 MO_STREP_ABCG <- AMR_env$MO_lookup$mo[which(AMR_env$MO_lookup$genus == "Streptococcus" &
@@ -166,7 +151,6 @@ MO_STREP_ABCG <- AMR_env$MO_lookup$mo[which(AMR_env$MO_lookup$genus == "Streptoc
    "pyogenes", "agalactiae", "dysgalactiae", "equi", "canis",
    "group A", "group B", "group C", "group G"
  ))]
-MO_FULLNAME_LOWER <- create_MO_fullname_lower()
 MO_PREVALENT_GENERA <- c(
  "Absidia", "Acanthamoeba", "Acremonium", "Aedes", "Alternaria", "Amoeba", "Ancylostoma", "Angiostrongylus",
  "Anisakis", "Anopheles", "Apophysomyces", "Aspergillus", "Aureobasidium", "Basidiobolus", "Beauveria",
@@ -298,7 +282,6 @@ suppressMessages(usethis::use_data(EUCAST_RULES_DF,
  MO_CONS,
  MO_COPS,
  MO_STREP_ABCG,
-  MO_FULLNAME_LOWER,
  MO_PREVALENT_GENERA,
  AB_LOOKUP,
  AV_LOOKUP,
--- a/man/AMR.Rd
+++ b/man/AMR.Rd
@@ -28,7 +28,7 @@ A BibTeX entry for LaTeX users is:
 \description{
 Welcome to the \code{AMR} package.

-The \code{AMR} package is a \href{https://msberends.github.io/AMR#copyright}{free and open-source} R package with \href{https://en.wikipedia.org/wiki/Dependency_hell}{zero dependencies} to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. \strong{Our aim is to provide a standard} for clean and reproducible AMR data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting. \href{https://msberends.github.io/AMR/authors.html}{Many different researchers} from around the globe are continually helping us to make this a successful and durable project!
+The \code{AMR} package is a \href{https://msberends.github.io/AMR/#copyright}{free and open-source} R package with \href{https://en.wikipedia.org/wiki/Dependency_hell}{zero dependencies} to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. \strong{Our aim is to provide a standard} for clean and reproducible AMR data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting. \href{https://msberends.github.io/AMR/authors.html}{Many different researchers} from around the globe are continually helping us to make this a successful and durable project!

 This work was published in the Journal of Statistical Software (Volume 104(3); \href{https://doi.org/10.18637/jss.v104.i03}{DOI 10.18637/jss.v104.i03}) and formed the basis of two PhD theses (\href{https://doi.org/10.33612/diss.177417131}{DOI 10.33612/diss.177417131} and \href{https://doi.org/10.33612/diss.192486375}{DOI 10.33612/diss.192486375}).