speed improvement as.mo, freq title

2025-07-08 16:02:02 +02:00 · 2018-10-31 12:10:49 +01:00
parent 3d4c4c678b
commit 9cd4ab928a
27 changed files with 289 additions and 224 deletions
--- a/R/data.R
+++ b/R/data.R
@ -19,7 +19,7 @@
 #' Data set with 423 antibiotics
 #'
 #' A data set containing all antibiotics with a J0 code and some other antimicrobial agents, with their DDDs. Except for trade names and abbreviations, all properties were downloaded from the WHO, see Source.
-#' @format A \code{\link{tibble}} with 423 observations and 18 variables:
+#' @format A \code{\link{data.frame}} with 423 observations and 18 variables:
 #' \describe{
 #'   \item{\code{atc}}{ATC code, like \code{J01CR02}}
 #'   \item{\code{certe}}{Certe code, like \code{amcl}}
@ -139,7 +139,7 @@
 #'   \item{\code{subkingdom}}{Taxonomic subkingdom of the microorganism as found in ITIS, see Source}
 #'   \item{\code{gramstain}}{Gram of microorganism, like \code{"Gram negative"}}
 #'   \item{\code{type}}{Type of microorganism, like \code{"Bacteria"} and \code{"Fungi"}}
-#'   \item{\code{prevalence}}{A rounded integer based on prevalence of the microorganism. Used internally by \code{\link{as.mo}}, otherwise quite meaningless.}
+#'   \item{\code{prevalence}}{An integer based on estimated prevalence of the microorganism in humans. Used internally by \code{\link{as.mo}}, otherwise quite meaningless. It has a value of 25 for manually added items and a value of 1000 for all unprevalent microorganisms whose genus was somewhere in the top 250 (with another species).}
 #'   \item{\code{ref}}{Author(s) and year of concerning publication as found in ITIS, see Source}
 #' }
 #' @source [3] Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}.
@ -164,7 +164,7 @@
 #' Translation table for UMCG
 #'
 #' A data set containing all bacteria codes of UMCG MMB. These codes can be joined to data with an ID from \code{\link{microorganisms}$mo} (using \code{\link{left_join_microorganisms}}). GLIMS codes can also be translated to valid \code{MO}s with \code{\link{guess_mo}}.
-#' @format A \code{\link{tibble}} with 1,095 observations and 2 variables:
+#' @format A \code{\link{data.frame}} with 1,095 observations and 2 variables:
 #' \describe{
 #'   \item{\code{umcg}}{Code of microorganism according to UMCG MMB}
 #'   \item{\code{certe}}{Code of microorganism according to Certe MMB}
@ -175,7 +175,7 @@
 #' Translation table for Certe
 #'
 #' A data set containing all bacteria codes of Certe MMB. These codes can be joined to data with an ID from \code{\link{microorganisms}$mo} (using \code{\link{left_join_microorganisms}}). GLIMS codes can also be translated to valid \code{MO}s with \code{\link{guess_mo}}.
-#' @format A \code{\link{tibble}} with 2,665 observations and 2 variables:
+#' @format A \code{\link{data.frame}} with 2,665 observations and 2 variables:
 #' \describe{
 #'   \item{\code{certe}}{Code of microorganism according to Certe MMB}
 #'   \item{\code{mo}}{Code of microorganism in \code{\link{microorganisms}}}
@ -239,3 +239,31 @@
 #'   summarise(n = n_rsi(amcl),
 #'             p = portion_IR(amcl, minimum = 20))
 "septic_patients"
+
+#' Supplementary Data
+#'
+#' These \code{\link{data.table}s} are transformed from the \code{\link{microorganisms}} and \code{\link{microorganisms}} data sets to improve speed of \code{\link{as.mo}}. They are meant for internal use only, and are only mentioned here for reference.
+#' @rdname supplementary_data
+#' @name supplementary_data
+# # Renew data:
+# microorganismsDT <- data.table::as.data.table(AMR::microorganisms)
+# # sort on (1) bacteria, (2) fungi, (3) protozoa and then human pathogenic prevalence and then TSN:
+# data.table::setkey(microorganismsDT, type, prevalence, fullname)
+# microorganisms.prevDT <- microorganismsDT[prevalence == 9999,]
+# microorganisms.unprevDT <- microorganismsDT[prevalence != 9999,]
+# microorganisms.oldDT <- data.table::as.data.table(AMR::microorganisms.old)
+# data.table::setkey(microorganisms.oldDT, tsn, name)
+# devtools::use_data(microorganismsDT, overwrite = TRUE)
+# devtools::use_data(microorganisms.prevDT, overwrite = TRUE)
+# devtools::use_data(microorganisms.unprevDT, overwrite = TRUE)
+# devtools::use_data(microorganisms.oldDT, overwrite = TRUE)
+"microorganismsDT"
+
+#' @rdname supplementary_data
+"microorganisms.prevDT"
+
+#' @rdname supplementary_data
+"microorganisms.unprevDT"
+
+#' @rdname supplementary_data
+"microorganisms.oldDT"
--- a/R/freq.R
+++ b/R/freq.R
@ -29,6 +29,7 @@
 #' @param digits how many significant digits are to be used for numeric values in the header (not for the items themselves, that depends on \code{\link{getOption}("digits")})
 #' @param quote a logical value indicating whether or not strings should be printed with surrounding quotes
 #' @param header a logical value indicating whether an informative header should be printed
+#' @param title text to show above frequency table, at default to tries to coerce from the variables passed to \code{x}
 #' @param na a character string to should be used to show empty (\code{NA}) values (only useful when \code{na.rm = FALSE})
 #' @param sep a character string to separate the terms when selecting multiple columns
 #' @param f a frequency table
@ -151,6 +152,7 @@ frequency_tbl <- function(x,
                          digits = 2,
                          quote = FALSE,
                          header = !markdown,
+                          title = NULL,
                          na = "<NA>",
                          sep = " ") {

@ -395,6 +397,11 @@ frequency_tbl <- function(x,
    tbl_format <- 'pandoc'
  }

+  if (!is.null(title)) {
+    x.name <- trimws(gsub("^Frequency table of", "", title[1L], ignore.case = TRUE))
+    cols <- NULL
+  }
+
  structure(.Data = df,
            class = c('frequency_tbl', class(df)),
            opt = list(data = x.name,
@ -522,7 +529,7 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
    }
  }

-  title <- paste("Frequency table", title)
+  title <- paste("Frequency table", trimws(title))

  # bold title
  if (opt$tbl_format == "pandoc") {
--- a/R/globals.R
+++ b/R/globals.R
@ -49,6 +49,10 @@ globalVariables(c(".",
                  "mic",
                  "microorganisms",
                  "microorganisms.old",
+                  "microorganismsDT",
+                  "microorganisms.prevDT",
+                  "microorganisms.unprevDT",
+                  "microorganisms.oldDT",
                  "mo",
                  "mo.old",
                  "n",
--- a/R/mo.R
+++ b/R/mo.R
@ -49,7 +49,15 @@
 #'
 #' Use the \code{\link{mo_property}} functions to get properties based on the returned code, see Examples.
 #'
-#' This function uses Artificial Intelligence (AI) to help getting more logical results, based on type of input and known prevalence of human pathogens. For example:
+#' This function uses Artificial Intelligence (AI) to help getting fast and logical results. It tries to find matches in this order:
+#' \itemize{
+#'   \item{Taxonomic kingdom: it first searches in bacteria, then fungi, then protozoa}
+#'   \item{Human pathogenic prevalence: it first searches in more prevalent microorganisms, then less prevalent ones}
+#'   \item{Valid MO codes and full names: it first searches in already valid MO code and genus/species combinations}
+#'   \item{Breakdown of input values: from here it starts to breakdown input values to find possible matches}
+#' }
+#'
+#' A couple of effects because of these rules
 #' \itemize{
 #'   \item{\code{"E. coli"} will return the ID of \emph{Escherichia coli} and not \emph{Entamoeba coli}, although the latter would alphabetically come first}
 #'   \item{\code{"H. influenzae"} will return the ID of \emph{Haemophilus influenzae} and not \emph{Haematobacter influenzae} for the same reason}
@ -63,10 +71,11 @@
 #' \if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr}
 #' This package contains the \strong{complete microbial taxonomic data} (with all  seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, \url{https://www.itis.gov}).
 #'
-#' All (sub)species from the taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. Furthermore, the responsible authors and year of publication are available. This allows users to use authoritative taxonomic information for their data analysis on any microorganism, not only human pathogens.
+#' All (sub)species from the \strong{taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package}, as well as all previously accepted names known to ITIS. Furthermore, the responsible authors and year of publication are available. This \strong{allows users to use authoritative taxonomic information} for their data analysis on any microorganism, not only human pathogens. It also helps to \strong{quickly determine the Gram stain of bacteria}, since all bacteria are classified into subkingdom Negibacteria or Posibacteria.
 #'
 #' ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3].
-#  (source as a section, so it can be inherited by other man pages:)
+#'
+#  (source as a section, so it can be inherited by other man pages)
 #' @section Source:
 #' [1] Becker K \emph{et al.} \strong{Coagulase-Negative Staphylococci}. 2014. Clin Microbiol Rev. 27(4): 870–926. \url{https://dx.doi.org/10.1128/CMR.00109-13}
 #'
@ -129,9 +138,10 @@
 #'   mutate(mo = guess_mo(paste(genus, species)))
 #' }
 as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE, reference_df = NULL) {
-  exec_as.mo(x = x, Becker = Becker, Lancefield = Lancefield,
-             allow_uncertain = allow_uncertain, reference_df = reference_df,
-             property = "mo")
+  structure(mo_validate(x = x, property = "mo",
+                        Becker = Becker, Lancefield = Lancefield,
+                        allow_uncertain = allow_uncertain, reference_df = reference_df),
+            class = "mo")
 }

 #' @rdname as.mo
@ -147,8 +157,15 @@ is.mo <- function(x) {
 guess_mo <- as.mo

 #' @importFrom dplyr %>% pull left_join
-#' @importFrom data.table as.data.table setkey
+#' @importFrom data.table data.table as.data.table setkey
 exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE, reference_df = NULL, property = "mo") {
+
+  # These data.tables are available as data sets when the AMR package is loaded:
+  #   microorganismsDT        # this one is sorted by kingdom (B<F<P), prevalence, TSN
+  #   microorganisms.prevDT   # same as microorganismsDT, but with prevalence != 9999
+  #   microorganisms.unprevDT # same as microorganismsDT, but with prevalence == 9999
+  #   microorganisms.oldDT    # old taxonomic names, sorted by name (genus+species), TSN
+
  if (NCOL(x) == 2) {
    # support tidyverse selection like: df %>% select(colA, colB)
    # paste these columns together
@ -176,12 +193,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
  # remove empty values (to later fill them in again)
  x <- x[!is.na(x) & !is.null(x) & !identical(x, "")]

-  # These data.tables are available because of .onAttach:
-  #   MOs
-  #   MOs_mostprevalent
-  #   MOs_allothers
-  #   MOs_old
-
  # defined df to check for
  if (!is.null(reference_df)) {
    if (!is.data.frame(reference_df) | NCOL(reference_df) < 2) {
@ -193,18 +204,12 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
    )
  }

-  if (all(x %in% AMR::microorganisms[, property])) {
-    # already existing mo
-  } else if (all(x %in% AMR::microorganisms[, "mo"])) {
-    # existing mo codes when not looking for property "mo"
-    suppressWarnings(
-      x <- data.frame(mo = x, stringsAsFactors = FALSE) %>%
-        left_join(AMR::microorganisms, by = "mo") %>%
-        pull(property)
-    )
+  if (all(x %in% microorganismsDT[["mo"]])) {
+    # existing mo codes when not looking for property "mo", like mo_genus("B_ESCHR_COL")
+    x <- microorganismsDT[data.table(mo = x), on = "mo", ..property][[1]]
  } else if (!is.null(reference_df)
             & all(x %in% reference_df[, 1])
-             & all(reference_df[, 2] %in% AMR::microorganisms$mo)) {
+             & all(reference_df[, 2] %in% microorganismsDT[["mo"]])) {
    # manually defined reference
    colnames(reference_df)[1] <- "x"
    colnames(reference_df)[2] <- "mo"
@ -214,24 +219,12 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
        left_join(AMR::microorganisms, by = "mo") %>%
        pull(property)
    )
-  } else if (all(x %in% AMR::microorganisms.certe[, "certe"])) {
+  } else if (all(toupper(x) %in% AMR::microorganisms.certe[, "certe"])) {
    # old Certe codes
-    suppressWarnings(
-      x <- data.frame(certe = x, stringsAsFactors = FALSE) %>%
-        left_join(AMR::microorganisms.certe, by = "certe") %>%
-        left_join(AMR::microorganisms, by = "mo") %>%
-        pull(property)
-    )
-  } else if (all(x %in% AMR::microorganisms.umcg[, "umcg"])) {
-    # old UMCG codes
-    suppressWarnings(
-      x <- data.frame(umcg = x, stringsAsFactors = FALSE) %>%
-        left_join(AMR::microorganisms.umcg, by = "umcg") %>%
-        left_join(AMR::microorganisms.certe, by = "certe") %>%
-        left_join(AMR::microorganisms, by = "mo") %>%
-        pull(property)
-    )
-  } else {
+    y <- as.data.table(AMR::microorganisms.certe)[data.table(certe = toupper(x)), on = "certe", ]
+    x <- microorganismsDT[data.table(mo = y[["mo"]]), on = "mo", ..property][[1]]
+
+  } else if (!all(x %in% microorganismsDT[[property]])) {

    x_backup <- trimws(x, which = "both")
    x_species <- paste(x_backup, "species")
@ -280,36 +273,36 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
            | toupper(x_trimmed[i]) == 'MSSA'
            | toupper(x_trimmed[i]) == 'VISA'
            | toupper(x_trimmed[i]) == 'VRSA') {
-          x[i] <- MOs[mo == 'B_STPHY_AUR', ..property][[1]][1L]
+          x[i] <- microorganismsDT[mo == 'B_STPHY_AUR', ..property][[1]][1L]
          next
        }
        if (toupper(x_trimmed[i]) == 'MRSE'
            | toupper(x_trimmed[i]) == 'MSSE') {
-          x[i] <- MOs[mo == 'B_STPHY_EPI', ..property][[1]][1L]
+          x[i] <- microorganismsDT[mo == 'B_STPHY_EPI', ..property][[1]][1L]
          next
        }
        if (toupper(x_trimmed[i]) == 'VRE') {
-          x[i] <- MOs[mo == 'B_ENTRC', ..property][[1]][1L]
+          x[i] <- microorganismsDT[mo == 'B_ENTRC', ..property][[1]][1L]
          next
        }
        if (toupper(x_trimmed[i]) == 'MRPA') {
          # multi resistant P. aeruginosa
-          x[i] <- MOs[mo == 'B_PDMNS_AER', ..property][[1]][1L]
+          x[i] <- microorganismsDT[mo == 'B_PDMNS_AER', ..property][[1]][1L]
          next
        }
        if (toupper(x_trimmed[i]) == 'CRS'
            | toupper(x_trimmed[i]) == 'CRSM') {
          # co-trim resistant S. maltophilia
-          x[i] <- MOs[mo == 'B_STNTR_MAL', ..property][[1]][1L]
+          x[i] <- microorganismsDT[mo == 'B_STNTR_MAL', ..property][[1]][1L]
          next
        }
        if (toupper(x_trimmed[i]) %in% c('PISP', 'PRSP', 'VISP', 'VRSP')) {
          # peni I, peni R, vanco I, vanco R: S. pneumoniae
-          x[i] <- MOs[mo == 'B_STRPTC_PNE', ..property][[1]][1L]
+          x[i] <- microorganismsDT[mo == 'B_STRPTC_PNE', ..property][[1]][1L]
          next
        }
        if (toupper(x_trimmed[i]) %like% '^G[ABCDFGHK]S$') {
-          x[i] <- MOs[mo == gsub("G([ABCDFGHK])S", "B_STRPTC_GR\\1", x_trimmed[i]), ..property][[1]][1L]
+          x[i] <- microorganismsDT[mo == gsub("G([ABCDFGHK])S", "B_STRPTC_GR\\1", x_trimmed[i]), ..property][[1]][1L]
          next
        }
        # CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese) ----
@ -317,14 +310,14 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
            | tolower(x_trimmed[i]) %like% '[ck]oagulas[ea] negatie?[vf]'
            | tolower(x[i]) %like% '[ck]o?ns[^a-z]?$') {
          # coerce S. coagulase negative
-          x[i] <- MOs[mo == 'B_STPHY_CNS', ..property][[1]][1L]
+          x[i] <- microorganismsDT[mo == 'B_STPHY_CNS', ..property][[1]][1L]
          next
        }
        if (tolower(x[i]) %like% '[ck]oagulas[ea] positie?[vf]'
            | tolower(x_trimmed[i]) %like% '[ck]oagulas[ea] positie?[vf]'
            | tolower(x[i]) %like% '[ck]o?ps[^a-z]?$') {
          # coerce S. coagulase positive
-          x[i] <- MOs[mo == 'B_STPHY_CPS', ..property][[1]][1L]
+          x[i] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L]
          next
        }
      }
@ -332,14 +325,14 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
      # FIRST TRY FULLNAMES AND CODES
      # if only genus is available, return only genus
      if (all(!c(x[i], x_trimmed[i]) %like% " ")) {
-        found <- MOs[tolower(fullname) %in% tolower(c(x_species[i], x_trimmed_species[i])), ..property][[1]]
+        found <- microorganismsDT[tolower(fullname) %in% tolower(c(x_species[i], x_trimmed_species[i])), ..property][[1]]
        if (length(found) > 0) {
          x[i] <- found[1L]
          next
        }
        if (nchar(x_trimmed[i]) > 4) {
          # not when abbr is esco, stau, klpn, etc.
-          found <- MOs[tolower(fullname) %like% gsub(" ", ".*", x_trimmed_species[i], fixed = TRUE), ..property][[1]]
+          found <- microorganismsDT[tolower(fullname) %like% gsub(" ", ".*", x_trimmed_species[i], fixed = TRUE), ..property][[1]]
          if (length(found) > 0) {
            x[i] <- found[1L]
            next
@ -348,20 +341,22 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
      }

      # TRY OTHER SOURCES ----
-      if (x_backup[i] %in% AMR::microorganisms.certe[, 1]) {
-        x[i] <- MOs[mo == AMR::microorganisms.certe[AMR::microorganisms.certe[, 1] == x_backup[i], 2], ..property][[1]][1L]
-        next
+      if (x_backup[i] %in% AMR::microorganisms.certe$certe) {
+        x[i] <- microorganismsDT[mo == AMR::microorganisms.certe[AMR::microorganisms.certe[, 1] == x_backup[i], 2], ..property][[1]][1L]
+        # x[i] <- exec_as.mo(x = AMR::microorganisms.certe[AMR::microorganisms.certe$certe == x_backup[i], "mo"],
+        #                    property = property)
+        # next
      }
      if (x_backup[i] %in% AMR::microorganisms.umcg[, 1]) {
        ref_certe <- AMR::microorganisms.umcg[AMR::microorganisms.umcg[, 1] == x_backup[i], 2]
        ref_mo <- AMR::microorganisms.certe[AMR::microorganisms.certe[, 1] == ref_certe, 2]
-        x[i] <- MOs[mo == ref_mo, ..property][[1]][1L]
+        x[i] <- microorganismsDT[mo == ref_mo, ..property][[1]][1L]
        next
      }
      if (x_backup[i] %in% reference_df[, 1]) {
        ref_mo <- reference_df[reference_df[, 1] == x_backup[i], 2]
-        if (ref_mo %in% MOs[, mo]) {
-          x[i] <- MOs[mo == ref_mo, ..property][[1]][1L]
+        if (ref_mo %in% microorganismsDT[, mo]) {
+          x[i] <- microorganismsDT[mo == ref_mo, ..property][[1]][1L]
          next
        } else {
          warning("Value '", x_backup[i], "' was found in reference_df, but '", ref_mo, "' is not a valid MO code.", call. = FALSE)
@ -369,20 +364,19 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
      }

      # TRY FIRST THOUSAND MOST PREVALENT IN HUMAN INFECTIONS ----
-
-      found <- MOs_mostprevalent[tolower(fullname) %in% tolower(c(x_backup[i], x_trimmed[i])), ..property][[1]]
+      found <- microorganisms.prevDT[tolower(fullname) %in% tolower(c(x_backup[i], x_trimmed[i])), ..property][[1]]
      # most probable: is exact match in fullname
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
      }
-      found <- MOs_mostprevalent[tsn == x_trimmed[i], ..property][[1]]
+      found <- microorganisms.prevDT[tsn == x_trimmed[i], ..property][[1]]
      # is a valid TSN
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
      }
-      found <- MOs_mostprevalent[mo == toupper(x_backup[i]), ..property][[1]]
+      found <- microorganisms.prevDT[mo == toupper(x_backup[i]), ..property][[1]]
      # is a valid mo
      if (length(found) > 0) {
        x[i] <- found[1L]
@ -390,21 +384,21 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
      }

      # try any match keeping spaces ----
-      found <- MOs_mostprevalent[fullname %like% x_withspaces[i], ..property][[1]]
+      found <- microorganisms.prevDT[fullname %like% x_withspaces[i], ..property][[1]]
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
      }

      # try any match keeping spaces, not ending with $ ----
-      found <- MOs_mostprevalent[fullname %like% x_withspaces_start[i], ..property][[1]]
+      found <- microorganisms.prevDT[fullname %like% x_withspaces_start[i], ..property][[1]]
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
      }

      # try any match diregarding spaces ----
-      found <- MOs_mostprevalent[fullname %like% x[i], ..property][[1]]
+      found <- microorganisms.prevDT[fullname %like% x[i], ..property][[1]]
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
@ -412,7 +406,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =

      # try fullname without start and stop regex, to also find subspecies ----
      # like "K. pneu rhino" -> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH
-      found <- MOs_mostprevalent[fullname %like% x_withspaces_start[i], ..property][[1]]
+      found <- microorganisms.prevDT[fullname %like% x_withspaces_start[i], ..property][[1]]
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
@ -427,7 +421,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
        x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(),
                             '.* ',
                             x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
-        found <- MOs_mostprevalent[fullname %like% paste0('^', x_split[i]), ..property][[1]]
+        found <- microorganisms.prevDT[fullname %like% paste0('^', x_split[i]), ..property][[1]]
        if (length(found) > 0) {
          x[i] <- found[1L]
          next
@ -442,7 +436,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
      #   x_trimmed[i] <- trimws(x_trimmed[i], which = "both")
      # }
      # if (!is.na(x_trimmed[i])) {
-      #   found <- MOs_mostprevalent[fullname %like% x_trimmed[i], ..property][[1]]
+      #   found <- microorganisms.prevDT[fullname %like% x_trimmed[i], ..property][[1]]
      #   if (length(found) > 0) {
      #     x[i] <- found[1L]
      #     next
@ -450,25 +444,25 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
      # }

      # THEN TRY ALL OTHERS ----
-      found <- MOs_allothers[tolower(fullname) == tolower(x_backup[i]), ..property][[1]]
+      found <- microorganisms.unprevDT[tolower(fullname) == tolower(x_backup[i]), ..property][[1]]
      # most probable: is exact match in fullname
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
      }
-      found <- MOs_allothers[tolower(fullname) == tolower(x_trimmed[i]), ..property][[1]]
+      found <- microorganisms.unprevDT[tolower(fullname) == tolower(x_trimmed[i]), ..property][[1]]
      # most probable: is exact match in fullname
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
      }
-      found <- MOs_allothers[tsn == x_trimmed[i], ..property][[1]]
+      found <- microorganisms.unprevDT[tsn == x_trimmed[i], ..property][[1]]
      # is a valid TSN
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
      }
-      found <- MOs_allothers[mo == toupper(x_backup[i]), ..property][[1]]
+      found <- microorganisms.unprevDT[mo == toupper(x_backup[i]), ..property][[1]]
      # is a valid mo
      if (length(found) > 0) {
        x[i] <- found[1L]
@ -476,21 +470,21 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
      }

      # try any match keeping spaces ----
-      found <- MOs_allothers[fullname %like% x_withspaces[i], ..property][[1]]
+      found <- microorganisms.unprevDT[fullname %like% x_withspaces[i], ..property][[1]]
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
      }

      # try any match keeping spaces, not ending with $ ----
-      found <- MOs_allothers[fullname %like% x_withspaces_start[i], ..property][[1]]
+      found <- microorganisms.unprevDT[fullname %like% x_withspaces_start[i], ..property][[1]]
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
      }

      # try any match diregarding spaces ----
-      found <- MOs_allothers[fullname %like% x[i], ..property][[1]]
+      found <- microorganisms.unprevDT[fullname %like% x[i], ..property][[1]]
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
@ -498,7 +492,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =

      # try fullname without start and stop regex, to also find subspecies ----
      # like "K. pneu rhino" -> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH
-      found <- MOs_allothers[fullname %like% x_withspaces_start[i], ..property][[1]]
+      found <- microorganisms.unprevDT[fullname %like% x_withspaces_start[i], ..property][[1]]
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
@ -513,7 +507,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
        x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(),
                             '.* ',
                             x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
-        found <- MOs_allothers[fullname %like% paste0('^', x_split[i]), ..property][[1]]
+        found <- microorganisms.unprevDT[fullname %like% paste0('^', x_split[i]), ..property][[1]]
        if (length(found) > 0) {
          x[i] <- found[1L]
          next
@ -528,7 +522,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
      #   x_trimmed[i] <- trimws(x_trimmed[i], which = "both")
      # }
      # if (!is.na(x_trimmed[i])) {
-      #   found <- MOs_allothers[fullname %like% x_trimmed[i], ..property][[1]]
+      #   found <- microorganisms.unprevDT[fullname %like% x_trimmed[i], ..property][[1]]
      #   if (length(found) > 0) {
      #     x[i] <- found[1L]
      #     next
@ -538,33 +532,33 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
      # MISCELLANEOUS ----

      # look for old taxonomic names ----
-      found <- MOs_old[tolower(name) == tolower(x_backup[i])
-                       | tsn == x_trimmed[i]
-                       | name %like% x_withspaces[i],]
+      found <- microorganisms.oldDT[tolower(name) == tolower(x_backup[i])
+                                    | tsn == x_trimmed[i]
+                                    | name %like% x_withspaces[i],]
      if (NROW(found) > 0) {
-        x[i] <- MOs[tsn == found[1, tsn_new], ..property][[1]]
+        x[i] <- microorganismsDT[tsn == found[1, tsn_new], ..property][[1]]
        renamed_note(name_old = found[1, name],
-                     name_new = MOs[tsn == found[1, tsn_new], fullname],
+                     name_new = microorganismsDT[tsn == found[1, tsn_new], fullname],
                     ref_old = found[1, ref],
-                     ref_new = MOs[tsn == found[1, tsn_new], ref])
+                     ref_new = microorganismsDT[tsn == found[1, tsn_new], ref])
        next
      }

      # check for uncertain results ----
      if (allow_uncertain == TRUE) {
        # (1) look again for old taxonomic names, now for G. species ----
-        found <- MOs_old[name %like% x_withspaces[i]
-                         | name %like% x_withspaces_start[i]
-                         | name %like% x[i],]
+        found <- microorganisms.oldDT[name %like% x_withspaces[i]
+                                      | name %like% x_withspaces_start[i]
+                                      | name %like% x[i],]
        if (NROW(found) > 0) {
-          x[i] <- MOs[tsn == found[1, tsn_new], ..property][[1]]
+          x[i] <- microorganismsDT[tsn == found[1, tsn_new], ..property][[1]]
          warning("Uncertain interpretation: '",
                  x_backup[i], "' -> '", found[1, name], "'",
                  call. = FALSE, immediate. = TRUE)
          renamed_note(name_old = found[1, name],
-                       name_new = MOs[tsn == found[1, tsn_new], fullname],
+                       name_new = microorganismsDT[tsn == found[1, tsn_new], fullname],
                       ref_old = found[1, ref],
-                       ref_new = MOs[tsn == found[1, tsn_new], ref])
+                       ref_new = microorganismsDT[tsn == found[1, tsn_new], ref])
          next
        }

@ -574,7 +568,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
        x[i] <- suppressWarnings(suppressMessages(as.mo(x_strip)))
        if (!is.na(x[i])) {
          warning("Uncertain interpretation: '",
-                  x_backup[i], "' -> '", MOs[mo == x[i], fullname], "' (", x[i], ")",
+                  x_backup[i], "' -> '", microorganismsDT[mo == x[i], fullname], "' (", x[i], ")",
                  call. = FALSE, immediate. = TRUE)
          next
        }
@ -599,7 +593,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
  if (Becker == TRUE | Becker == "all") {
    # See Source. It's this figure:
    # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4187637/figure/F3/
-    MOs_staph <- MOs[genus == "Staphylococcus"]
+    MOs_staph <- microorganismsDT[genus == "Staphylococcus"]
    setkey(MOs_staph, species)
    CoNS <- MOs_staph[species %in% c("arlettae", "auricularis", "capitis",
                                     "caprae", "carnosus", "cohnii", "condimenti",
@ -617,35 +611,35 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
                                     "hyicus", "intermedius",
                                     "pseudintermedius", "pseudointermedius",
                                     "schleiferi"), ..property][[1]]
-    x[x %in% CoNS] <- MOs[mo == 'B_STPHY_CNS', ..property][[1]][1L]
-    x[x %in% CoPS] <- MOs[mo == 'B_STPHY_CPS', ..property][[1]][1L]
+    x[x %in% CoNS] <- microorganismsDT[mo == 'B_STPHY_CNS', ..property][[1]][1L]
+    x[x %in% CoPS] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L]
    if (Becker == "all") {
-      x[x == MOs[mo == 'B_STPHY_AUR', ..property][[1]][1L]] <- MOs[mo == 'B_STPHY_CPS', ..property][[1]][1L]
+      x[x == microorganismsDT[mo == 'B_STPHY_AUR', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L]
    }
  }

  # Lancefield ----
  if (Lancefield == TRUE | Lancefield == "all") {
    # group A - S. pyogenes
-    x[x == MOs[mo == 'B_STRPTC_PYO', ..property][[1]][1L]] <- MOs[mo == 'B_STRPTC_GRA', ..property][[1]][1L]
+    x[x == microorganismsDT[mo == 'B_STRPTC_PYO', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPTC_GRA', ..property][[1]][1L]
    # group B - S. agalactiae
-    x[x == MOs[mo == 'B_STRPTC_AGA', ..property][[1]][1L]] <- MOs[mo == 'B_STRPTC_GRB', ..property][[1]][1L]
+    x[x == microorganismsDT[mo == 'B_STRPTC_AGA', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPTC_GRB', ..property][[1]][1L]
    # group C
-    S_groupC <- MOs %>% filter(genus == "Streptococcus",
-                               species %in% c("equisimilis", "equi",
-                                              "zooepidemicus", "dysgalactiae")) %>%
+    S_groupC <- microorganismsDT %>% filter(genus == "Streptococcus",
+                                            species %in% c("equisimilis", "equi",
+                                                           "zooepidemicus", "dysgalactiae")) %>%
      pull(property)
-    x[x %in% S_groupC] <- MOs[mo == 'B_STRPTC_GRC', ..property][[1]][1L]
+    x[x %in% S_groupC] <- microorganismsDT[mo == 'B_STRPTC_GRC', ..property][[1]][1L]
    if (Lancefield == "all") {
      # all Enterococci
-      x[x %like% "^(Enterococcus|B_ENTRC)"] <- MOs[mo == 'B_STRPTC_GRD', ..property][[1]][1L]
+      x[x %like% "^(Enterococcus|B_ENTRC)"] <- microorganismsDT[mo == 'B_STRPTC_GRD', ..property][[1]][1L]
    }
    # group F - S. anginosus
-    x[x == MOs[mo == 'B_STRPTC_ANG', ..property][[1]][1L]] <- MOs[mo == 'B_STRPTC_GRF', ..property][[1]][1L]
+    x[x == microorganismsDT[mo == 'B_STRPTC_ANG', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPTC_GRF', ..property][[1]][1L]
    # group H - S. sanguinis
-    x[x == MOs[mo == 'B_STRPTC_SAN', ..property][[1]][1L]] <- MOs[mo == 'B_STRPTC_GRH', ..property][[1]][1L]
+    x[x == microorganismsDT[mo == 'B_STRPTC_SAN', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPTC_GRH', ..property][[1]][1L]
    # group K - S. salivarius
-    x[x == MOs[mo == 'B_STRPTC_SAL', ..property][[1]][1L]] <- MOs[mo == 'B_STRPTC_GRK', ..property][[1]][1L]
+    x[x == microorganismsDT[mo == 'B_STRPTC_SAL', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPTC_GRK', ..property][[1]][1L]
  }

  # comply to x, which is also unique and without empty values
@ -700,7 +694,7 @@ print.mo <- function(x, ...) {
 #' @export
 #' @noRd
 as.data.frame.mo <- function (x, ...) {
-  # same as as.data.frame.character but with removed stringsAsFactors
+  # same as as.data.frame.character but with removed stringsAsFactors, since it will be class "mo"
  nm <- paste(deparse(substitute(x), width.cutoff = 500L),
              collapse = " ")
  if (!"nm" %in% names(list(...))) {
--- a/R/mo_property.R
+++ b/R/mo_property.R
@ -383,7 +383,9 @@ mo_validate <- function(x, property, ...) {
    Lancefield <- FALSE
  }

-  if (!all(x %in% AMR::microorganisms[, property]) | Becker %in% c(TRUE, "all") | Lancefield == TRUE) {
+  if (!all(x %in% microorganismsDT[[property]])
+      | Becker %in% c(TRUE, "all")
+      | Lancefield %in% c(TRUE, "all")) {
    exec_as.mo(x, property = property, ...)
  } else {
    x
--- a/R/zzz.R
+++ b/R/zzz.R
@ -50,21 +50,25 @@ NULL

 .onAttach <- function(libname, pkgname) {
  # save data.tables to improve speed of as.mo:
-  MOs <- data.table::as.data.table(AMR::microorganisms)
-  data.table::setkey(MOs, prevalence, tsn)

-  base::assign(x = "MOs",
-         value = MOs,
-         envir = base::as.environment("package:AMR"))
-  base::assign(x = "MOs_mostprevalent",
-         value = MOs[prevalence != 9999,],
-         envir = base::as.environment("package:AMR"))
-  base::assign(x = "MOs_allothers",
-         value = MOs[prevalence == 9999,],
-         envir = base::as.environment("package:AMR"))
+  # microorganismsDT <- data.table::as.data.table(AMR::microorganisms)
+  # microorganisms.oldDT <- data.table::as.data.table(AMR::microorganisms.old)
+  #
+  # data.table::setkey(microorganismsDT, prevalence, tsn)
+  # data.table::setkey(microorganisms.oldDT, tsn, name)

-  base::assign(x = "MOs_old",
-         value = data.table::as.data.table(AMR::microorganisms.old),
-         envir = base::as.environment("package:AMR"))
+  base::assign(x = "microorganismsDT",
+               value = microorganismsDT,
+               envir = base::as.environment("package:AMR"))
+  base::assign(x = "microorganisms.prevDT",
+               value = microorganismsDT[prevalence != 9999,],
+               envir = base::as.environment("package:AMR"))
+  base::assign(x = "microorganisms.unprevDT",
+               value = microorganismsDT[prevalence == 9999,],
+               envir = base::as.environment("package:AMR"))
+
+  base::assign(x = "microorganisms.oldDT",
+               value = microorganisms.oldDT,
+               envir = base::as.environment("package:AMR"))

 }