speed improvement as.mo, freq title

2025-07-19 01:03:17 +02:00 · 2018-10-31 12:10:49 +01:00
parent 3d4c4c678b
commit 9cd4ab928a
27 changed files with 289 additions and 224 deletions
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@ -8,17 +8,20 @@ stages:
    - build
    - deploy

+variables:
+  WARNINGS_ARE_ERRORS: 1
+
 R 3:
    image: rocker/r-ver:3 # rocker/r-base
    stage: build
    allow_failure: false
    script:
+    - Rscript -e "options()" # to check for variables to circumvent testthat::skip_on_cran
    - apt-get update
    # install dependencies for package
    - apt-get install --yes --no-install-recommends libxml2-dev libssl-dev libcurl4-openssl-dev zlib1g-dev
    - R -e 'install.packages(c("devtools", "rlang"))'
    - R -e 'devtools::install_deps(dependencies = c("Depends", "Imports", "Suggests"), repos = "https://cran.rstudio.com")'
-    - R -e 'print(as.data.frame(installed.packages(), row.names = FALSE)[, c("Package", "Version")])'
    # remove vignettes folder and get VignetteBuilder field out of DESCRIPTION file
    - rm -rf vignettes
    - R -e 'd <- read.dcf("DESCRIPTION"); d[, colnames(d) == "VignetteBuilder"] <- NA; write.dcf(d, "DESCRIPTION")'
@ -27,13 +30,17 @@ R 3:
    - R CMD check "${PKG_FILE_NAME}" --no-build-vignettes --no-manual --as-cran
    # code coverage
    - apt-get install --yes git
-    - R -e 'covr::codecov(token = "50ffa0aa-fee0-4f8b-a11d-8c7edc6d32ca")'
+    - Rscript -e 'cc <- covr::covr::package_coverage(); covr::codecov(coverage = cc, token = "50ffa0aa-fee0-4f8b-a11d-8c7edc6d32ca"); cat("Code coverage:", covr::percent_coverage(cc))'
+    coverage: '/Code coverage: \d+\.\d+/'
    artifacts:
      paths:
-      - '*.Rcheck/*'
-      name: 'Latest Rcheck log'
+      - '*.Rcheck\**\*.log'
+      - '*.Rcheck\**\*.out'
+      - '*.Rcheck\**\*.fail'
+      - '*.Rcheck\**\*.Rout'
+      name: 'Rcheck log'
      expire_in: '1 month'
-      
+
 R 3.4:
    image: rocker/r-ver:3.4 # rocker/r-base
    stage: build
@ -44,14 +51,13 @@ R 3.4:
    - apt-get install --yes --no-install-recommends libxml2-dev libssl-dev libcurl4-openssl-dev zlib1g-dev
    - R -e 'install.packages("devtools")'
    - R -e 'devtools::install_deps(dependencies = c("Depends", "Imports", "Suggests"))'
-    - R -e 'print(as.data.frame(installed.packages(), row.names = FALSE)[, c("Package", "Version")])'
    # remove vignettes folder and get VignetteBuilder field out of DESCRIPTION file
    - rm -rf vignettes
    - R -e 'd <- read.dcf("DESCRIPTION"); d[, colnames(d) == "VignetteBuilder"] <- NA; write.dcf(d, "DESCRIPTION")'
    - R CMD build . --no-build-vignettes --no-manual
    - PKG_FILE_NAME=$(ls -1t *.tar.gz | head -n 1)
    - R CMD check "${PKG_FILE_NAME}" --no-build-vignettes --no-manual --as-cran
-    
+
 R 3.5:
    image: rocker/r-ver:3.5 # rocker/r-base
    stage: build
@ -62,49 +68,15 @@ R 3.5:
    - apt-get install --yes --no-install-recommends libxml2-dev libssl-dev libcurl4-openssl-dev zlib1g-dev
    - R -e 'install.packages("devtools")'
    - R -e 'devtools::install_deps(dependencies = c("Depends", "Imports", "Suggests"))'
-    - R -e 'print(as.data.frame(installed.packages(), row.names = FALSE)[, c("Package", "Version")])'
    # remove vignettes folder and get VignetteBuilder field out of DESCRIPTION file
    - rm -rf vignettes
    - R -e 'd <- read.dcf("DESCRIPTION"); d[, colnames(d) == "VignetteBuilder"] <- NA; write.dcf(d, "DESCRIPTION")'
    - R CMD build . --no-build-vignettes --no-manual
    - PKG_FILE_NAME=$(ls -1t *.tar.gz | head -n 1)
    - R CMD check "${PKG_FILE_NAME}" --no-build-vignettes --no-manual --as-cran
-    
-#R latest:
- #   image: rocker/r-ver:latest # rocker/r-base
-  #  stage: build
- #   allow_failure: true
-  #  script:
-  #  - apt-get update
-    # install dependencies for package
-#    - apt-get install --yes --no-install-recommends libxml2-dev libssl-dev libcurl4-openssl-dev zlib1g-dev
-   # - R -e 'install.packages("devtools")'
-  #  - R -e 'devtools::install_deps(dependencies = c("Depends", "Imports", "Suggests"))'
-   # - R -e 'print(as.data.frame(installed.packages(), row.names = FALSE)[, c("Package", "Version")])'
-    # remove vignettes folder and get VignetteBuilder field out of DESCRIPTION file
- #   - rm -rf vignettes
-   # - R -e 'd <- read.dcf("DESCRIPTION"); d[, colnames(d) == "VignetteBuilder"] <- NA; write.dcf(d, "DESCRIPTION")'
-#    - R CMD build . --no-build-vignettes --no-manual
-   # - PKG_FILE_NAME=$(ls -1t *.tar.gz | head -n 1)
-   # - R CMD check "${PKG_FILE_NAME}" --no-build-vignettes --no-manual --as-cran
-    
-# R devel:
-  #  image: rocker/r-ver:devel # rocker/r-base
-  #  stage: build
-   # allow_failure: true
-  #  script:
-   # - apt-get update
-    # install dependencies for package
-    #- apt-get install --yes --no-install-recommends libxml2-dev libssl-dev libcurl4-openssl-dev zlib1g-dev
-    #- R -e 'install.packages("devtools")'
- #   - R -e 'devtools::install_deps(dependencies = c("Depends", "Imports", "Suggests"))'
-    #- R -e 'print(as.data.frame(installed.packages(), row.names = FALSE)[, c("Package", "Version")])'
-    # remove vignettes folder and get VignetteBuilder field out of DESCRIPTION file
-    #- rm -rf vignettes
-  #  - R -e 'd <- read.dcf("DESCRIPTION"); d[, colnames(d) == "VignetteBuilder"] <- NA; write.dcf(d, "DESCRIPTION")'
-    
+
 coverage_job:
    # image: rocker/r-ver:3
    stage: deploy
    script:
-    - echo "future packaging part"
+    - ls
--- a/4
+++ b/4
@ -1,6 +1,6 @@
 Package: AMR
-Version: 0.4.0.9007
-Date: 2018-10-29
+Version: 0.4.0.9008
+Date: 2018-10-31
 Title: Antimicrobial Resistance Analysis
 Authors@R: c(
    person(
--- a/NEWS.md
+++ b/NEWS.md
@ -3,6 +3,7 @@
 #### New
 * Repository moved to GitLab: https://gitlab.com/msberends/AMR
 * Function `count_all` to get all available isolates (that like all `portion_*` and `count_*` functions also supports `summarise` and `group_by`), the old `n_rsi` is now an alias of `count_all`
+* Data sets `microorganismsDT`, `microorganisms.prevDT`, `microorganisms.unprevDT` and `microorganisms.oldDT` to improve the speed of `as.mo`. They are for reference only, since they are primarily for internal use of `as.mo`.

 #### Changed
 * Big changes to the `EUCAST_rules` function:
@ -10,12 +11,12 @@
  * New parameter `rules` to specify which rules should be applied (expert rules, breakpoints, others or all)
  * New parameter `verbose` which can be set to `TRUE` to get very specific messages about which columns and rows were affected
  * Better error handling when rules cannot be applied (i.e. new values could not be inserted)
-  * The amount of affected values will now only be measured once per row/column combination
+  * The number of affected values will now only be measured once per row/column combination
  * Data set `septic_patients` now reflects these changes
-* Tremendous speed improvement for `as.mo` (and consequently all `mo_*` functions), as empty values wil be ignored a priori
+* Tremendous speed improvement for `as.mo` (and subsequently all `mo_*` functions), as empty values wil be ignored *a priori*
 * Fewer than 3 characters as input for `as.mo` will return NA
 * Added parameter `combine_IR` (TRUE/FALSE) to functions `portion_df` and `count_df`, to indicate that all values of I and R must be merged into one, so the output only consists of S vs. IR (susceptible vs. non-susceptible)
-* Fix for `portion_*(..., as_percent = TRUE)` when minimal amount of isolates would not be met
+* Fix for `portion_*(..., as_percent = TRUE)` when minimal number of isolates would not be met
 * Added parameter `also_single_tested` for `portion_*` and `count_*` functions to also include cases where not all antibiotics were tested but at least one of the tested antibiotics includes the target antimicribial interpretation, see `?portion`
 * Using `portion_*` functions now throws a warning when total available isolate is below parameter `minimum`
 * Functions `as.mo`, `as.rsi`, `as.mic`, `as.atc` and `freq` will not set package name as attribute anymore
@ -26,6 +27,7 @@
  * Gained `na` parameter, to choose with character to print for empty values
  * Support for class `difftime`
  * New parameter `header` to turn it off (default when `markdown = TRUE`)
+  * New parameter `title` to replace the automatically set title
 * `first_isolate` now tries to find columns to use as input when parameters are left blank
 * Improvement for MDRO algorithm
 * Data set `septic_patients` is now a `data.frame`, not a tibble anymore
--- a/R/data.R
+++ b/R/data.R
@ -19,7 +19,7 @@
 #' Data set with 423 antibiotics
 #'
 #' A data set containing all antibiotics with a J0 code and some other antimicrobial agents, with their DDDs. Except for trade names and abbreviations, all properties were downloaded from the WHO, see Source.
-#' @format A \code{\link{tibble}} with 423 observations and 18 variables:
+#' @format A \code{\link{data.frame}} with 423 observations and 18 variables:
 #' \describe{
 #'   \item{\code{atc}}{ATC code, like \code{J01CR02}}
 #'   \item{\code{certe}}{Certe code, like \code{amcl}}
@ -139,7 +139,7 @@
 #'   \item{\code{subkingdom}}{Taxonomic subkingdom of the microorganism as found in ITIS, see Source}
 #'   \item{\code{gramstain}}{Gram of microorganism, like \code{"Gram negative"}}
 #'   \item{\code{type}}{Type of microorganism, like \code{"Bacteria"} and \code{"Fungi"}}
-#'   \item{\code{prevalence}}{A rounded integer based on prevalence of the microorganism. Used internally by \code{\link{as.mo}}, otherwise quite meaningless.}
+#'   \item{\code{prevalence}}{An integer based on estimated prevalence of the microorganism in humans. Used internally by \code{\link{as.mo}}, otherwise quite meaningless. It has a value of 25 for manually added items and a value of 1000 for all unprevalent microorganisms whose genus was somewhere in the top 250 (with another species).}
 #'   \item{\code{ref}}{Author(s) and year of concerning publication as found in ITIS, see Source}
 #' }
 #' @source [3] Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}.
@ -164,7 +164,7 @@
 #' Translation table for UMCG
 #'
 #' A data set containing all bacteria codes of UMCG MMB. These codes can be joined to data with an ID from \code{\link{microorganisms}$mo} (using \code{\link{left_join_microorganisms}}). GLIMS codes can also be translated to valid \code{MO}s with \code{\link{guess_mo}}.
-#' @format A \code{\link{tibble}} with 1,095 observations and 2 variables:
+#' @format A \code{\link{data.frame}} with 1,095 observations and 2 variables:
 #' \describe{
 #'   \item{\code{umcg}}{Code of microorganism according to UMCG MMB}
 #'   \item{\code{certe}}{Code of microorganism according to Certe MMB}
@ -175,7 +175,7 @@
 #' Translation table for Certe
 #'
 #' A data set containing all bacteria codes of Certe MMB. These codes can be joined to data with an ID from \code{\link{microorganisms}$mo} (using \code{\link{left_join_microorganisms}}). GLIMS codes can also be translated to valid \code{MO}s with \code{\link{guess_mo}}.
-#' @format A \code{\link{tibble}} with 2,665 observations and 2 variables:
+#' @format A \code{\link{data.frame}} with 2,665 observations and 2 variables:
 #' \describe{
 #'   \item{\code{certe}}{Code of microorganism according to Certe MMB}
 #'   \item{\code{mo}}{Code of microorganism in \code{\link{microorganisms}}}
@ -239,3 +239,31 @@
 #'   summarise(n = n_rsi(amcl),
 #'             p = portion_IR(amcl, minimum = 20))
 "septic_patients"
+
+#' Supplementary Data
+#'
+#' These \code{\link{data.table}s} are transformed from the \code{\link{microorganisms}} and \code{\link{microorganisms}} data sets to improve speed of \code{\link{as.mo}}. They are meant for internal use only, and are only mentioned here for reference.
+#' @rdname supplementary_data
+#' @name supplementary_data
+# # Renew data:
+# microorganismsDT <- data.table::as.data.table(AMR::microorganisms)
+# # sort on (1) bacteria, (2) fungi, (3) protozoa and then human pathogenic prevalence and then TSN:
+# data.table::setkey(microorganismsDT, type, prevalence, fullname)
+# microorganisms.prevDT <- microorganismsDT[prevalence == 9999,]
+# microorganisms.unprevDT <- microorganismsDT[prevalence != 9999,]
+# microorganisms.oldDT <- data.table::as.data.table(AMR::microorganisms.old)
+# data.table::setkey(microorganisms.oldDT, tsn, name)
+# devtools::use_data(microorganismsDT, overwrite = TRUE)
+# devtools::use_data(microorganisms.prevDT, overwrite = TRUE)
+# devtools::use_data(microorganisms.unprevDT, overwrite = TRUE)
+# devtools::use_data(microorganisms.oldDT, overwrite = TRUE)
+"microorganismsDT"
+
+#' @rdname supplementary_data
+"microorganisms.prevDT"
+
+#' @rdname supplementary_data
+"microorganisms.unprevDT"
+
+#' @rdname supplementary_data
+"microorganisms.oldDT"
--- a/R/freq.R
+++ b/R/freq.R
@ -29,6 +29,7 @@
 #' @param digits how many significant digits are to be used for numeric values in the header (not for the items themselves, that depends on \code{\link{getOption}("digits")})
 #' @param quote a logical value indicating whether or not strings should be printed with surrounding quotes
 #' @param header a logical value indicating whether an informative header should be printed
+#' @param title text to show above frequency table, at default to tries to coerce from the variables passed to \code{x}
 #' @param na a character string to should be used to show empty (\code{NA}) values (only useful when \code{na.rm = FALSE})
 #' @param sep a character string to separate the terms when selecting multiple columns
 #' @param f a frequency table
@ -151,6 +152,7 @@ frequency_tbl <- function(x,
                          digits = 2,
                          quote = FALSE,
                          header = !markdown,
+                          title = NULL,
                          na = "<NA>",
                          sep = " ") {

@ -395,6 +397,11 @@ frequency_tbl <- function(x,
    tbl_format <- 'pandoc'
  }

+  if (!is.null(title)) {
+    x.name <- trimws(gsub("^Frequency table of", "", title[1L], ignore.case = TRUE))
+    cols <- NULL
+  }
+
  structure(.Data = df,
            class = c('frequency_tbl', class(df)),
            opt = list(data = x.name,
@ -522,7 +529,7 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
    }
  }

-  title <- paste("Frequency table", title)
+  title <- paste("Frequency table", trimws(title))

  # bold title
  if (opt$tbl_format == "pandoc") {
--- a/R/globals.R
+++ b/R/globals.R
@ -49,6 +49,10 @@ globalVariables(c(".",
                  "mic",
                  "microorganisms",
                  "microorganisms.old",
+                  "microorganismsDT",
+                  "microorganisms.prevDT",
+                  "microorganisms.unprevDT",
+                  "microorganisms.oldDT",
                  "mo",
                  "mo.old",
                  "n",
--- a/R/mo.R
+++ b/R/mo.R
@ -49,7 +49,15 @@
 #'
 #' Use the \code{\link{mo_property}} functions to get properties based on the returned code, see Examples.
 #'
-#' This function uses Artificial Intelligence (AI) to help getting more logical results, based on type of input and known prevalence of human pathogens. For example:
+#' This function uses Artificial Intelligence (AI) to help getting fast and logical results. It tries to find matches in this order:
+#' \itemize{
+#'   \item{Taxonomic kingdom: it first searches in bacteria, then fungi, then protozoa}
+#'   \item{Human pathogenic prevalence: it first searches in more prevalent microorganisms, then less prevalent ones}
+#'   \item{Valid MO codes and full names: it first searches in already valid MO code and genus/species combinations}
+#'   \item{Breakdown of input values: from here it starts to breakdown input values to find possible matches}
+#' }
+#'
+#' A couple of effects because of these rules
 #' \itemize{
 #'   \item{\code{"E. coli"} will return the ID of \emph{Escherichia coli} and not \emph{Entamoeba coli}, although the latter would alphabetically come first}
 #'   \item{\code{"H. influenzae"} will return the ID of \emph{Haemophilus influenzae} and not \emph{Haematobacter influenzae} for the same reason}
@ -63,10 +71,11 @@
 #' \if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr}
 #' This package contains the \strong{complete microbial taxonomic data} (with all  seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, \url{https://www.itis.gov}).
 #'
-#' All (sub)species from the taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. Furthermore, the responsible authors and year of publication are available. This allows users to use authoritative taxonomic information for their data analysis on any microorganism, not only human pathogens.
+#' All (sub)species from the \strong{taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package}, as well as all previously accepted names known to ITIS. Furthermore, the responsible authors and year of publication are available. This \strong{allows users to use authoritative taxonomic information} for their data analysis on any microorganism, not only human pathogens. It also helps to \strong{quickly determine the Gram stain of bacteria}, since all bacteria are classified into subkingdom Negibacteria or Posibacteria.
 #'
 #' ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3].
-#  (source as a section, so it can be inherited by other man pages:)
+#'
+#  (source as a section, so it can be inherited by other man pages)
 #' @section Source:
 #' [1] Becker K \emph{et al.} \strong{Coagulase-Negative Staphylococci}. 2014. Clin Microbiol Rev. 27(4): 870–926. \url{https://dx.doi.org/10.1128/CMR.00109-13}
 #'
@ -129,9 +138,10 @@
 #'   mutate(mo = guess_mo(paste(genus, species)))
 #' }
 as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE, reference_df = NULL) {
-  exec_as.mo(x = x, Becker = Becker, Lancefield = Lancefield,
-             allow_uncertain = allow_uncertain, reference_df = reference_df,
-             property = "mo")
+  structure(mo_validate(x = x, property = "mo",
+                        Becker = Becker, Lancefield = Lancefield,
+                        allow_uncertain = allow_uncertain, reference_df = reference_df),
+            class = "mo")
 }

 #' @rdname as.mo
@ -147,8 +157,15 @@ is.mo <- function(x) {
 guess_mo <- as.mo

 #' @importFrom dplyr %>% pull left_join
-#' @importFrom data.table as.data.table setkey
+#' @importFrom data.table data.table as.data.table setkey
 exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE, reference_df = NULL, property = "mo") {
+
+  # These data.tables are available as data sets when the AMR package is loaded:
+  #   microorganismsDT        # this one is sorted by kingdom (B<F<P), prevalence, TSN
+  #   microorganisms.prevDT   # same as microorganismsDT, but with prevalence != 9999
+  #   microorganisms.unprevDT # same as microorganismsDT, but with prevalence == 9999
+  #   microorganisms.oldDT    # old taxonomic names, sorted by name (genus+species), TSN
+
  if (NCOL(x) == 2) {
    # support tidyverse selection like: df %>% select(colA, colB)
    # paste these columns together
@ -176,12 +193,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
  # remove empty values (to later fill them in again)
  x <- x[!is.na(x) & !is.null(x) & !identical(x, "")]

-  # These data.tables are available because of .onAttach:
-  #   MOs
-  #   MOs_mostprevalent
-  #   MOs_allothers
-  #   MOs_old
-
  # defined df to check for
  if (!is.null(reference_df)) {
    if (!is.data.frame(reference_df) | NCOL(reference_df) < 2) {
@ -193,18 +204,12 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
    )
  }

-  if (all(x %in% AMR::microorganisms[, property])) {
-    # already existing mo
-  } else if (all(x %in% AMR::microorganisms[, "mo"])) {
-    # existing mo codes when not looking for property "mo"
-    suppressWarnings(
-      x <- data.frame(mo = x, stringsAsFactors = FALSE) %>%
-        left_join(AMR::microorganisms, by = "mo") %>%
-        pull(property)
-    )
+  if (all(x %in% microorganismsDT[["mo"]])) {
+    # existing mo codes when not looking for property "mo", like mo_genus("B_ESCHR_COL")
+    x <- microorganismsDT[data.table(mo = x), on = "mo", ..property][[1]]
  } else if (!is.null(reference_df)
             & all(x %in% reference_df[, 1])
-             & all(reference_df[, 2] %in% AMR::microorganisms$mo)) {
+             & all(reference_df[, 2] %in% microorganismsDT[["mo"]])) {
    # manually defined reference
    colnames(reference_df)[1] <- "x"
    colnames(reference_df)[2] <- "mo"
@ -214,24 +219,12 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
        left_join(AMR::microorganisms, by = "mo") %>%
        pull(property)
    )
-  } else if (all(x %in% AMR::microorganisms.certe[, "certe"])) {
+  } else if (all(toupper(x) %in% AMR::microorganisms.certe[, "certe"])) {
    # old Certe codes
-    suppressWarnings(
-      x <- data.frame(certe = x, stringsAsFactors = FALSE) %>%
-        left_join(AMR::microorganisms.certe, by = "certe") %>%
-        left_join(AMR::microorganisms, by = "mo") %>%
-        pull(property)
-    )
-  } else if (all(x %in% AMR::microorganisms.umcg[, "umcg"])) {
-    # old UMCG codes
-    suppressWarnings(
-      x <- data.frame(umcg = x, stringsAsFactors = FALSE) %>%
-        left_join(AMR::microorganisms.umcg, by = "umcg") %>%
-        left_join(AMR::microorganisms.certe, by = "certe") %>%
-        left_join(AMR::microorganisms, by = "mo") %>%
-        pull(property)
-    )
-  } else {
+    y <- as.data.table(AMR::microorganisms.certe)[data.table(certe = toupper(x)), on = "certe", ]
+    x <- microorganismsDT[data.table(mo = y[["mo"]]), on = "mo", ..property][[1]]
+
+  } else if (!all(x %in% microorganismsDT[[property]])) {

    x_backup <- trimws(x, which = "both")
    x_species <- paste(x_backup, "species")
@ -280,36 +273,36 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
            | toupper(x_trimmed[i]) == 'MSSA'
            | toupper(x_trimmed[i]) == 'VISA'
            | toupper(x_trimmed[i]) == 'VRSA') {
-          x[i] <- MOs[mo == 'B_STPHY_AUR', ..property][[1]][1L]
+          x[i] <- microorganismsDT[mo == 'B_STPHY_AUR', ..property][[1]][1L]
          next
        }
        if (toupper(x_trimmed[i]) == 'MRSE'
            | toupper(x_trimmed[i]) == 'MSSE') {
-          x[i] <- MOs[mo == 'B_STPHY_EPI', ..property][[1]][1L]
+          x[i] <- microorganismsDT[mo == 'B_STPHY_EPI', ..property][[1]][1L]
          next
        }
        if (toupper(x_trimmed[i]) == 'VRE') {
-          x[i] <- MOs[mo == 'B_ENTRC', ..property][[1]][1L]
+          x[i] <- microorganismsDT[mo == 'B_ENTRC', ..property][[1]][1L]
          next
        }
        if (toupper(x_trimmed[i]) == 'MRPA') {
          # multi resistant P. aeruginosa
-          x[i] <- MOs[mo == 'B_PDMNS_AER', ..property][[1]][1L]
+          x[i] <- microorganismsDT[mo == 'B_PDMNS_AER', ..property][[1]][1L]
          next
        }
        if (toupper(x_trimmed[i]) == 'CRS'
            | toupper(x_trimmed[i]) == 'CRSM') {
          # co-trim resistant S. maltophilia
-          x[i] <- MOs[mo == 'B_STNTR_MAL', ..property][[1]][1L]
+          x[i] <- microorganismsDT[mo == 'B_STNTR_MAL', ..property][[1]][1L]
          next
        }
        if (toupper(x_trimmed[i]) %in% c('PISP', 'PRSP', 'VISP', 'VRSP')) {
          # peni I, peni R, vanco I, vanco R: S. pneumoniae
-          x[i] <- MOs[mo == 'B_STRPTC_PNE', ..property][[1]][1L]
+          x[i] <- microorganismsDT[mo == 'B_STRPTC_PNE', ..property][[1]][1L]
          next
        }
        if (toupper(x_trimmed[i]) %like% '^G[ABCDFGHK]S$') {
-          x[i] <- MOs[mo == gsub("G([ABCDFGHK])S", "B_STRPTC_GR\\1", x_trimmed[i]), ..property][[1]][1L]
+          x[i] <- microorganismsDT[mo == gsub("G([ABCDFGHK])S", "B_STRPTC_GR\\1", x_trimmed[i]), ..property][[1]][1L]
          next
        }
        # CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese) ----
@ -317,14 +310,14 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
            | tolower(x_trimmed[i]) %like% '[ck]oagulas[ea] negatie?[vf]'
            | tolower(x[i]) %like% '[ck]o?ns[^a-z]?$') {
          # coerce S. coagulase negative
-          x[i] <- MOs[mo == 'B_STPHY_CNS', ..property][[1]][1L]
+          x[i] <- microorganismsDT[mo == 'B_STPHY_CNS', ..property][[1]][1L]
          next
        }
        if (tolower(x[i]) %like% '[ck]oagulas[ea] positie?[vf]'
            | tolower(x_trimmed[i]) %like% '[ck]oagulas[ea] positie?[vf]'
            | tolower(x[i]) %like% '[ck]o?ps[^a-z]?$') {
          # coerce S. coagulase positive
-          x[i] <- MOs[mo == 'B_STPHY_CPS', ..property][[1]][1L]
+          x[i] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L]
          next
        }
      }
@ -332,14 +325,14 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
      # FIRST TRY FULLNAMES AND CODES
      # if only genus is available, return only genus
      if (all(!c(x[i], x_trimmed[i]) %like% " ")) {
-        found <- MOs[tolower(fullname) %in% tolower(c(x_species[i], x_trimmed_species[i])), ..property][[1]]
+        found <- microorganismsDT[tolower(fullname) %in% tolower(c(x_species[i], x_trimmed_species[i])), ..property][[1]]
        if (length(found) > 0) {
          x[i] <- found[1L]
          next
        }
        if (nchar(x_trimmed[i]) > 4) {
          # not when abbr is esco, stau, klpn, etc.
-          found <- MOs[tolower(fullname) %like% gsub(" ", ".*", x_trimmed_species[i], fixed = TRUE), ..property][[1]]
+          found <- microorganismsDT[tolower(fullname) %like% gsub(" ", ".*", x_trimmed_species[i], fixed = TRUE), ..property][[1]]
          if (length(found) > 0) {
            x[i] <- found[1L]
            next
@ -348,20 +341,22 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
      }

      # TRY OTHER SOURCES ----
-      if (x_backup[i] %in% AMR::microorganisms.certe[, 1]) {
-        x[i] <- MOs[mo == AMR::microorganisms.certe[AMR::microorganisms.certe[, 1] == x_backup[i], 2], ..property][[1]][1L]
-        next
+      if (x_backup[i] %in% AMR::microorganisms.certe$certe) {
+        x[i] <- microorganismsDT[mo == AMR::microorganisms.certe[AMR::microorganisms.certe[, 1] == x_backup[i], 2], ..property][[1]][1L]
+        # x[i] <- exec_as.mo(x = AMR::microorganisms.certe[AMR::microorganisms.certe$certe == x_backup[i], "mo"],
+        #                    property = property)
+        # next
      }
      if (x_backup[i] %in% AMR::microorganisms.umcg[, 1]) {
        ref_certe <- AMR::microorganisms.umcg[AMR::microorganisms.umcg[, 1] == x_backup[i], 2]
        ref_mo <- AMR::microorganisms.certe[AMR::microorganisms.certe[, 1] == ref_certe, 2]
-        x[i] <- MOs[mo == ref_mo, ..property][[1]][1L]
+        x[i] <- microorganismsDT[mo == ref_mo, ..property][[1]][1L]
        next
      }
      if (x_backup[i] %in% reference_df[, 1]) {
        ref_mo <- reference_df[reference_df[, 1] == x_backup[i], 2]
-        if (ref_mo %in% MOs[, mo]) {
-          x[i] <- MOs[mo == ref_mo, ..property][[1]][1L]
+        if (ref_mo %in% microorganismsDT[, mo]) {
+          x[i] <- microorganismsDT[mo == ref_mo, ..property][[1]][1L]
          next
        } else {
          warning("Value '", x_backup[i], "' was found in reference_df, but '", ref_mo, "' is not a valid MO code.", call. = FALSE)
@ -369,20 +364,19 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
      }

      # TRY FIRST THOUSAND MOST PREVALENT IN HUMAN INFECTIONS ----
-
-      found <- MOs_mostprevalent[tolower(fullname) %in% tolower(c(x_backup[i], x_trimmed[i])), ..property][[1]]
+      found <- microorganisms.prevDT[tolower(fullname) %in% tolower(c(x_backup[i], x_trimmed[i])), ..property][[1]]
      # most probable: is exact match in fullname
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
      }
-      found <- MOs_mostprevalent[tsn == x_trimmed[i], ..property][[1]]
+      found <- microorganisms.prevDT[tsn == x_trimmed[i], ..property][[1]]
      # is a valid TSN
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
      }
-      found <- MOs_mostprevalent[mo == toupper(x_backup[i]), ..property][[1]]
+      found <- microorganisms.prevDT[mo == toupper(x_backup[i]), ..property][[1]]
      # is a valid mo
      if (length(found) > 0) {
        x[i] <- found[1L]
@ -390,21 +384,21 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
      }

      # try any match keeping spaces ----
-      found <- MOs_mostprevalent[fullname %like% x_withspaces[i], ..property][[1]]
+      found <- microorganisms.prevDT[fullname %like% x_withspaces[i], ..property][[1]]
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
      }

      # try any match keeping spaces, not ending with $ ----
-      found <- MOs_mostprevalent[fullname %like% x_withspaces_start[i], ..property][[1]]
+      found <- microorganisms.prevDT[fullname %like% x_withspaces_start[i], ..property][[1]]
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
      }

      # try any match diregarding spaces ----
-      found <- MOs_mostprevalent[fullname %like% x[i], ..property][[1]]
+      found <- microorganisms.prevDT[fullname %like% x[i], ..property][[1]]
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
@ -412,7 +406,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =

      # try fullname without start and stop regex, to also find subspecies ----
      # like "K. pneu rhino" -> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH
-      found <- MOs_mostprevalent[fullname %like% x_withspaces_start[i], ..property][[1]]
+      found <- microorganisms.prevDT[fullname %like% x_withspaces_start[i], ..property][[1]]
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
@ -427,7 +421,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
        x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(),
                             '.* ',
                             x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
-        found <- MOs_mostprevalent[fullname %like% paste0('^', x_split[i]), ..property][[1]]
+        found <- microorganisms.prevDT[fullname %like% paste0('^', x_split[i]), ..property][[1]]
        if (length(found) > 0) {
          x[i] <- found[1L]
          next
@ -442,7 +436,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
      #   x_trimmed[i] <- trimws(x_trimmed[i], which = "both")
      # }
      # if (!is.na(x_trimmed[i])) {
-      #   found <- MOs_mostprevalent[fullname %like% x_trimmed[i], ..property][[1]]
+      #   found <- microorganisms.prevDT[fullname %like% x_trimmed[i], ..property][[1]]
      #   if (length(found) > 0) {
      #     x[i] <- found[1L]
      #     next
@ -450,25 +444,25 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
      # }

      # THEN TRY ALL OTHERS ----
-      found <- MOs_allothers[tolower(fullname) == tolower(x_backup[i]), ..property][[1]]
+      found <- microorganisms.unprevDT[tolower(fullname) == tolower(x_backup[i]), ..property][[1]]
      # most probable: is exact match in fullname
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
      }
-      found <- MOs_allothers[tolower(fullname) == tolower(x_trimmed[i]), ..property][[1]]
+      found <- microorganisms.unprevDT[tolower(fullname) == tolower(x_trimmed[i]), ..property][[1]]
      # most probable: is exact match in fullname
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
      }
-      found <- MOs_allothers[tsn == x_trimmed[i], ..property][[1]]
+      found <- microorganisms.unprevDT[tsn == x_trimmed[i], ..property][[1]]
      # is a valid TSN
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
      }
-      found <- MOs_allothers[mo == toupper(x_backup[i]), ..property][[1]]
+      found <- microorganisms.unprevDT[mo == toupper(x_backup[i]), ..property][[1]]
      # is a valid mo
      if (length(found) > 0) {
        x[i] <- found[1L]
@ -476,21 +470,21 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
      }

      # try any match keeping spaces ----
-      found <- MOs_allothers[fullname %like% x_withspaces[i], ..property][[1]]
+      found <- microorganisms.unprevDT[fullname %like% x_withspaces[i], ..property][[1]]
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
      }

      # try any match keeping spaces, not ending with $ ----
-      found <- MOs_allothers[fullname %like% x_withspaces_start[i], ..property][[1]]
+      found <- microorganisms.unprevDT[fullname %like% x_withspaces_start[i], ..property][[1]]
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
      }

      # try any match diregarding spaces ----
-      found <- MOs_allothers[fullname %like% x[i], ..property][[1]]
+      found <- microorganisms.unprevDT[fullname %like% x[i], ..property][[1]]
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
@ -498,7 +492,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =

      # try fullname without start and stop regex, to also find subspecies ----
      # like "K. pneu rhino" -> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH
-      found <- MOs_allothers[fullname %like% x_withspaces_start[i], ..property][[1]]
+      found <- microorganisms.unprevDT[fullname %like% x_withspaces_start[i], ..property][[1]]
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
@ -513,7 +507,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
        x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(),
                             '.* ',
                             x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
-        found <- MOs_allothers[fullname %like% paste0('^', x_split[i]), ..property][[1]]
+        found <- microorganisms.unprevDT[fullname %like% paste0('^', x_split[i]), ..property][[1]]
        if (length(found) > 0) {
          x[i] <- found[1L]
          next
@ -528,7 +522,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
      #   x_trimmed[i] <- trimws(x_trimmed[i], which = "both")
      # }
      # if (!is.na(x_trimmed[i])) {
-      #   found <- MOs_allothers[fullname %like% x_trimmed[i], ..property][[1]]
+      #   found <- microorganisms.unprevDT[fullname %like% x_trimmed[i], ..property][[1]]
      #   if (length(found) > 0) {
      #     x[i] <- found[1L]
      #     next
@ -538,33 +532,33 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
      # MISCELLANEOUS ----

      # look for old taxonomic names ----
-      found <- MOs_old[tolower(name) == tolower(x_backup[i])
-                       | tsn == x_trimmed[i]
-                       | name %like% x_withspaces[i],]
+      found <- microorganisms.oldDT[tolower(name) == tolower(x_backup[i])
+                                    | tsn == x_trimmed[i]
+                                    | name %like% x_withspaces[i],]
      if (NROW(found) > 0) {
-        x[i] <- MOs[tsn == found[1, tsn_new], ..property][[1]]
+        x[i] <- microorganismsDT[tsn == found[1, tsn_new], ..property][[1]]
        renamed_note(name_old = found[1, name],
-                     name_new = MOs[tsn == found[1, tsn_new], fullname],
+                     name_new = microorganismsDT[tsn == found[1, tsn_new], fullname],
                     ref_old = found[1, ref],
-                     ref_new = MOs[tsn == found[1, tsn_new], ref])
+                     ref_new = microorganismsDT[tsn == found[1, tsn_new], ref])
        next
      }

      # check for uncertain results ----
      if (allow_uncertain == TRUE) {
        # (1) look again for old taxonomic names, now for G. species ----
-        found <- MOs_old[name %like% x_withspaces[i]
-                         | name %like% x_withspaces_start[i]
-                         | name %like% x[i],]
+        found <- microorganisms.oldDT[name %like% x_withspaces[i]
+                                      | name %like% x_withspaces_start[i]
+                                      | name %like% x[i],]
        if (NROW(found) > 0) {
-          x[i] <- MOs[tsn == found[1, tsn_new], ..property][[1]]
+          x[i] <- microorganismsDT[tsn == found[1, tsn_new], ..property][[1]]
          warning("Uncertain interpretation: '",
                  x_backup[i], "' -> '", found[1, name], "'",
                  call. = FALSE, immediate. = TRUE)
          renamed_note(name_old = found[1, name],
-                       name_new = MOs[tsn == found[1, tsn_new], fullname],
+                       name_new = microorganismsDT[tsn == found[1, tsn_new], fullname],
                       ref_old = found[1, ref],
-                       ref_new = MOs[tsn == found[1, tsn_new], ref])
+                       ref_new = microorganismsDT[tsn == found[1, tsn_new], ref])
          next
        }

@ -574,7 +568,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
        x[i] <- suppressWarnings(suppressMessages(as.mo(x_strip)))
        if (!is.na(x[i])) {
          warning("Uncertain interpretation: '",
-                  x_backup[i], "' -> '", MOs[mo == x[i], fullname], "' (", x[i], ")",
+                  x_backup[i], "' -> '", microorganismsDT[mo == x[i], fullname], "' (", x[i], ")",
                  call. = FALSE, immediate. = TRUE)
          next
        }
@ -599,7 +593,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
  if (Becker == TRUE | Becker == "all") {
    # See Source. It's this figure:
    # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4187637/figure/F3/
-    MOs_staph <- MOs[genus == "Staphylococcus"]
+    MOs_staph <- microorganismsDT[genus == "Staphylococcus"]
    setkey(MOs_staph, species)
    CoNS <- MOs_staph[species %in% c("arlettae", "auricularis", "capitis",
                                     "caprae", "carnosus", "cohnii", "condimenti",
@ -617,35 +611,35 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
                                     "hyicus", "intermedius",
                                     "pseudintermedius", "pseudointermedius",
                                     "schleiferi"), ..property][[1]]
-    x[x %in% CoNS] <- MOs[mo == 'B_STPHY_CNS', ..property][[1]][1L]
-    x[x %in% CoPS] <- MOs[mo == 'B_STPHY_CPS', ..property][[1]][1L]
+    x[x %in% CoNS] <- microorganismsDT[mo == 'B_STPHY_CNS', ..property][[1]][1L]
+    x[x %in% CoPS] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L]
    if (Becker == "all") {
-      x[x == MOs[mo == 'B_STPHY_AUR', ..property][[1]][1L]] <- MOs[mo == 'B_STPHY_CPS', ..property][[1]][1L]
+      x[x == microorganismsDT[mo == 'B_STPHY_AUR', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L]
    }
  }

  # Lancefield ----
  if (Lancefield == TRUE | Lancefield == "all") {
    # group A - S. pyogenes
-    x[x == MOs[mo == 'B_STRPTC_PYO', ..property][[1]][1L]] <- MOs[mo == 'B_STRPTC_GRA', ..property][[1]][1L]
+    x[x == microorganismsDT[mo == 'B_STRPTC_PYO', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPTC_GRA', ..property][[1]][1L]
    # group B - S. agalactiae
-    x[x == MOs[mo == 'B_STRPTC_AGA', ..property][[1]][1L]] <- MOs[mo == 'B_STRPTC_GRB', ..property][[1]][1L]
+    x[x == microorganismsDT[mo == 'B_STRPTC_AGA', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPTC_GRB', ..property][[1]][1L]
    # group C
-    S_groupC <- MOs %>% filter(genus == "Streptococcus",
-                               species %in% c("equisimilis", "equi",
-                                              "zooepidemicus", "dysgalactiae")) %>%
+    S_groupC <- microorganismsDT %>% filter(genus == "Streptococcus",
+                                            species %in% c("equisimilis", "equi",
+                                                           "zooepidemicus", "dysgalactiae")) %>%
      pull(property)
-    x[x %in% S_groupC] <- MOs[mo == 'B_STRPTC_GRC', ..property][[1]][1L]
+    x[x %in% S_groupC] <- microorganismsDT[mo == 'B_STRPTC_GRC', ..property][[1]][1L]
    if (Lancefield == "all") {
      # all Enterococci
-      x[x %like% "^(Enterococcus|B_ENTRC)"] <- MOs[mo == 'B_STRPTC_GRD', ..property][[1]][1L]
+      x[x %like% "^(Enterococcus|B_ENTRC)"] <- microorganismsDT[mo == 'B_STRPTC_GRD', ..property][[1]][1L]
    }
    # group F - S. anginosus
-    x[x == MOs[mo == 'B_STRPTC_ANG', ..property][[1]][1L]] <- MOs[mo == 'B_STRPTC_GRF', ..property][[1]][1L]
+    x[x == microorganismsDT[mo == 'B_STRPTC_ANG', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPTC_GRF', ..property][[1]][1L]
    # group H - S. sanguinis
-    x[x == MOs[mo == 'B_STRPTC_SAN', ..property][[1]][1L]] <- MOs[mo == 'B_STRPTC_GRH', ..property][[1]][1L]
+    x[x == microorganismsDT[mo == 'B_STRPTC_SAN', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPTC_GRH', ..property][[1]][1L]
    # group K - S. salivarius
-    x[x == MOs[mo == 'B_STRPTC_SAL', ..property][[1]][1L]] <- MOs[mo == 'B_STRPTC_GRK', ..property][[1]][1L]
+    x[x == microorganismsDT[mo == 'B_STRPTC_SAL', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPTC_GRK', ..property][[1]][1L]
  }

  # comply to x, which is also unique and without empty values
@ -700,7 +694,7 @@ print.mo <- function(x, ...) {
 #' @export
 #' @noRd
 as.data.frame.mo <- function (x, ...) {
-  # same as as.data.frame.character but with removed stringsAsFactors
+  # same as as.data.frame.character but with removed stringsAsFactors, since it will be class "mo"
  nm <- paste(deparse(substitute(x), width.cutoff = 500L),
              collapse = " ")
  if (!"nm" %in% names(list(...))) {
--- a/R/mo_property.R
+++ b/R/mo_property.R
@ -383,7 +383,9 @@ mo_validate <- function(x, property, ...) {
    Lancefield <- FALSE
  }

-  if (!all(x %in% AMR::microorganisms[, property]) | Becker %in% c(TRUE, "all") | Lancefield == TRUE) {
+  if (!all(x %in% microorganismsDT[[property]])
+      | Becker %in% c(TRUE, "all")
+      | Lancefield %in% c(TRUE, "all")) {
    exec_as.mo(x, property = property, ...)
  } else {
    x
--- a/R/zzz.R
+++ b/R/zzz.R
@ -50,21 +50,25 @@ NULL

 .onAttach <- function(libname, pkgname) {
  # save data.tables to improve speed of as.mo:
-  MOs <- data.table::as.data.table(AMR::microorganisms)
-  data.table::setkey(MOs, prevalence, tsn)

-  base::assign(x = "MOs",
-         value = MOs,
-         envir = base::as.environment("package:AMR"))
-  base::assign(x = "MOs_mostprevalent",
-         value = MOs[prevalence != 9999,],
-         envir = base::as.environment("package:AMR"))
-  base::assign(x = "MOs_allothers",
-         value = MOs[prevalence == 9999,],
-         envir = base::as.environment("package:AMR"))
+  # microorganismsDT <- data.table::as.data.table(AMR::microorganisms)
+  # microorganisms.oldDT <- data.table::as.data.table(AMR::microorganisms.old)
+  #
+  # data.table::setkey(microorganismsDT, prevalence, tsn)
+  # data.table::setkey(microorganisms.oldDT, tsn, name)

-  base::assign(x = "MOs_old",
-         value = data.table::as.data.table(AMR::microorganisms.old),
-         envir = base::as.environment("package:AMR"))
+  base::assign(x = "microorganismsDT",
+               value = microorganismsDT,
+               envir = base::as.environment("package:AMR"))
+  base::assign(x = "microorganisms.prevDT",
+               value = microorganismsDT[prevalence != 9999,],
+               envir = base::as.environment("package:AMR"))
+  base::assign(x = "microorganisms.unprevDT",
+               value = microorganismsDT[prevalence == 9999,],
+               envir = base::as.environment("package:AMR"))
+
+  base::assign(x = "microorganisms.oldDT",
+               value = microorganisms.oldDT,
+               envir = base::as.environment("package:AMR"))

 }
--- a/README.md
+++ b/README.md
@ -65,7 +65,7 @@ The `AMR` package basically does four important things:

 3. It **analyses the data** with convenient functions that use well-known methods.

-   * Calculate the resistance (and even co-resistance) of microbial isolates with the `portion_R`, `portion_IR`, `portion_I`, `portion_SI` and `portion_S` functions. Similarly, the *amount* of isolates can be determined with the `count_R`, `count_IR`, `count_I`, `count_SI` and `count_S` functions. All these functions can be used [with the `dplyr` package](https://dplyr.tidyverse.org/#usage) (e.g. in conjunction with [`summarise`](https://dplyr.tidyverse.org/reference/summarise.html))
+   * Calculate the resistance (and even co-resistance) of microbial isolates with the `portion_R`, `portion_IR`, `portion_I`, `portion_SI` and `portion_S` functions. Similarly, the *number* of isolates can be determined with the `count_R`, `count_IR`, `count_I`, `count_SI` and `count_S` functions. All these functions can be used [with the `dplyr` package](https://dplyr.tidyverse.org/#usage) (e.g. in conjunction with [`summarise`](https://dplyr.tidyverse.org/reference/summarise.html))
   * Plot AMR results with `geom_rsi`, a function made for the `ggplot2` package
   * Predict antimicrobial resistance for the nextcoming years using logistic regression models with the `resistance_predict` function
   * Conduct descriptive statistics to enhance base R: calculate kurtosis, skewness and create frequency tables
@ -83,7 +83,7 @@ The `AMR` package basically does four important things:

 This package contains the **complete microbial taxonomic data** (with all  seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov). 

-All (sub)species from the taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. Furthermore, the responsible authors and year of publication are available. This allows users to use authoritative taxonomic information for their data analysis on any microorganism, not only human pathogens.
+All (sub)species from the **taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package**, as well as all previously accepted names known to ITIS. Furthermore, the responsible authors and year of publication are available. This **allows users to use authoritative taxonomic information** for their data analysis on any microorganism, not only human pathogens. It also helps to **quickly determine the Gram stain of bacteria**, since all bacteria are classified into subkingdom Negibacteria or Posibacteria.

 ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists.

@ -102,6 +102,12 @@ mo_class("E. coli")
 mo_family("E. coli")
 # [1] "Enterobacteriaceae"

+mo_subkingdom("E. coli")
+# [1] "Negibacteria"
+
+mo_gramstain("E. coli") # based on subkingdom
+# [1] "Gram negative"
+
 mo_ref("E. coli")
 # [1] "Castellani and Chalmers, 1919"
 ```
@ -453,7 +459,7 @@ Using the `microbenchmark` package, we can review the calculation performance of
 library(microbenchmark)
 ```

-In the next test, we try to 'coerce' different input values for *Staphylococcus aureus*. The actual result is the same every time: it returns its MO code `B_STAPHY_AUR` (*B* stands for *Bacteria*, the taxonomic kingdom). 
+In the next test, we try to 'coerce' different input values for *Staphylococcus aureus*. The actual result is the same every time: it returns its MO code `B_STPHY_AUR` (*B* stands for *Bacteria*, the taxonomic kingdom). 

 But the calculation time differs a lot. Here, the AI effect can be reviewed best:

@ -464,56 +470,61 @@ microbenchmark(A = as.mo("stau"),
               D = as.mo("S.  aureus"),
               E = as.mo("STAAUR"),
               F = as.mo("Staphylococcus aureus"),
+               G = as.mo("B_STPHY_AUR"),
               times = 10,
               unit = "ms")
 # Unit: milliseconds
-#  expr      min       lq     mean   median       uq      max neval
-#     A 36.05088 36.14782 36.65635 36.24466 36.43075 39.78544    10
-#     B 16.43575 16.46885 16.67816 16.66053 16.84858 16.95299    10
-#     C 14.44150 14.52182 16.81197 14.59173 14.67854 36.75244    10
-#     D 14.49765 14.58153 16.71666 14.59414 14.61094 35.50731    10
-#     E 14.45212 14.75146 14.82033 14.85559 14.96433 15.03438    10
-#     F 10.69445 10.73852 10.80334 10.79596 10.86856 10.97465    10
+#  expr       min        lq       mean    median        uq        max neval
+#     A 38.864859 38.923316 42.5410391 39.172790 39.394955  70.512389    10
+#     B 13.912175 14.002899 14.1044062 14.084962 14.254467  14.281845    10
+#     C 11.492663 11.555520 76.6953055 11.652670 11.864149 662.026786    10
+#     D 11.616702 11.683261 12.1807189 11.873159 12.142327  14.761724    10
+#     E 13.761108 14.012048 14.1360584 14.106509 14.293229  14.547522    10
+#     F  6.743735  6.785151  6.8962407  6.871335  7.000961   7.158383    10
+#     G  0.119220  0.137030  0.1411503  0.142512  0.145061   0.176909    10
 ```

-The more an input value resembles a full name, the faster the result will be found. In the table above, all measurements are in milliseconds, tested on a quite regular Linux server from 2007 with 2 GB RAM. A value of 10.8 milliseconds means it will roughly determine 93 different (unique) input values per second. It case of 36.2 milliseconds, this is only 28 input values per second.
+In the table above, all measurements are shown in milliseconds (thousands of seconds), tested on a quite regular Linux server from 2007 (Core 2 Duo 2.7 GHz, 2 GB DDR2 RAM). A value of 6.9 milliseconds means it will roughly determine 144 different (unique) input values per second. It case of 39.2 milliseconds, this is only 26 input values per second. The more an input value resembles a full name (like C, D and F), the faster the result will be found. In case of G, the input is already a valid MO code, so it only almost takes no time at all (0.0001 seconds on our server).

-To improve speed, the `as.mo` function also takes into account the prevalence of human pathogenic microorganisms. The downside is of course that less prevalent microorganisms will be determined far less faster. See this example for the ID of *Burkholderia nodosa* (`B_BRKHL_NOD`):
+To achieve this speed, the `as.mo` function also takes into account the prevalence of human pathogenic microorganisms. The downside is of course that less prevalent microorganisms will be determined far less faster. See this example for the ID of *Burkholderia nodosa* (`B_BRKHL_NOD`):

 ```r
-microbenchmark(B = as.mo("burnod"),
+microbenchmark(A = as.mo("buno"),
+               B = as.mo("burnod"),
               C = as.mo("B. nodosa"),
               D = as.mo("B.  nodosa"),
               E = as.mo("BURNOD"),
               F = as.mo("Burkholderia nodosa"),
+               G = as.mo("B_BRKHL_NOD"),
               times = 10,
               unit = "ms")
 # Unit: milliseconds
-#  expr      min        lq      mean    median        uq       max neval
-#     B 175.9446 176.80440 179.18240 177.00131 177.62021 198.86286    10
-#     C  88.1902  88.57705  89.08851  88.84293  89.15498  91.76621    10
-#     D 110.2641 110.67497 113.66290 111.20534 111.80744 134.44699    10
-#     E 175.0728 177.04235 207.83542 190.38109 200.33448 388.12177    10
-#     F  45.0778  45.31617  52.72430  45.62962  67.85262  70.42250    10
+#  expr        min         lq        mean      median         uq        max neval
+#     A 124.175427 124.474837 125.8610536 125.3750560 126.160945 131.485994    10
+#     B 154.249713 155.364729 160.9077032 156.8738940 157.136183 197.315105    10
+#     C  66.066571  66.162393  66.5538611  66.4488130  66.698077  67.623404    10
+#     D  86.747693  86.918665  90.7831016  87.8149725  89.440982 116.767991    10
+#     E 154.863827 155.208563 162.6535954 158.4062465 168.593785 187.378088    10
+#     F  32.427028  32.638648  32.9929454  32.7860475  32.992813  34.674241    10
+#     G   0.213155   0.216578   0.2369226   0.2338985   0.253734   0.285581    10
 ```
-(Note: `A` is missing here, because `as.mo("buno")` returns `F_BUELL_NOT`: the ID of the fungus *Buellia notabilis*)

-That takes up to 12 times as much time! A value of 190.4 milliseconds means it can only determine ~5 different input values per second. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance.
+That takes up to 11 times as much time! A value of 158.4 milliseconds means it can only determine ~6 different input values per second. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance.

 To relieve this pitfall and further improve performance, two important calculations take almost no time at all: **repetive results** and **already precalculated results**.

 Let's set up 25,000 entries of `"Staphylococcus aureus"` and check its speed:
 ```r
 repetive_results <- rep("Staphylococcus aureus", 25000)
-microbenchmark(A = as.mo(repetive_results),
+microbenchmark(F = as.mo(repetive_results),
               times = 10,
               unit = "ms")
 # Unit: milliseconds
 #  expr      min       lq     mean   median       uq      max neval
-#     A 14.61282  14.6372 14.70817 14.72597 14.76124 14.78498    10
+#     F 12.24381 12.34707 13.84736 12.37689 12.43266 40.36833   100
 ```

-So transforming 25,000 times (!) `"Staphylococcus aureus"` only takes 4 ms (0.004 seconds) more than transforming it once. You only lose time on your unique input values.
+So transforming 25,000 times (!) `"Staphylococcus aureus"` only takes 6 ms (0.006 seconds) more than transforming it once. You only lose time on your unique input values.

 What about precalculated results? This package also contains helper functions for specific microbial properties, for example `mo_fullname`. It returns the full microbial name (genus, species and possibly subspecies) and uses `as.mo` internally. If the input is however an already precalculated result, it almost doesn't take any time at all (see 'C' below):

@ -524,13 +535,13 @@ microbenchmark(A = mo_fullname("B_STPHY_AUR"),
               times = 10,
               unit = "ms")
 # Unit: milliseconds
-#  expr       min       lq       mean    median        uq       max neval
-#     A 13.548652 13.74588 13.8052969 13.813594 13.881165 14.090969    10
-#     B 15.079781 15.16785 15.3835842 15.374477 15.395115 16.072995    10
-#     C  0.171182  0.18563  0.2306307  0.203413  0.224610  0.492312    10
+#  expr       min        lq       mean     median        uq       max neval
+#     A 11.364086 11.460537 11.5104799 11.4795330 11.524860 11.818263    10
+#     B 11.976454 12.012352 12.1704592 12.0853020 12.210004 12.881737    10
+#     C  0.095823  0.102528  0.1167754  0.1153785  0.132629  0.140661    10
 ```

-So going from `mo_fullname("Staphylococcus aureus")` to `"Staphylococcus aureus"` takes 0.0002 seconds - it doesn't even start calculating *if the result would be the same as the expected resulting value*. That goes for all helper functions:
+So going from `mo_fullname("Staphylococcus aureus")` to `"Staphylococcus aureus"` takes 0.0001 seconds - it doesn't even start calculating *if the result would be the same as the expected resulting value*. That goes for all helper functions:

 ```r
 microbenchmark(A = mo_species("aureus"),
@ -545,17 +556,17 @@ microbenchmark(A = mo_species("aureus"),
               unit = "ms")
 # Unit: milliseconds
 #  expr      min       lq      mean    median       uq      max neval
-#     A 0.145270 0.158750 0.1908419 0.1693655 0.218255 0.300528    10
-#     B 0.182985 0.184522 0.2025408 0.1970235 0.209944 0.243328    10
-#     C 0.176280 0.201632 0.2618147 0.2303025 0.339499 0.388249    10
-#     D 0.136890 0.139054 0.1552231 0.1518010 0.168738 0.193042    10
-#     E 0.100921 0.116496 0.1321823 0.1222930 0.129976 0.230477    10
-#     F 0.103017 0.110281 0.1214480 0.1199880 0.124319 0.147506    10
-#     G 0.099246 0.110280 0.1195553 0.1188705 0.125436 0.149741    10
-#     H 0.114331 0.117264 0.1249819 0.1220830 0.129557 0.143385    10
+#     A 0.096801 0.120966 0.1264836 0.1262045 0.135773 0.158192    10
+#     B 0.102807 0.123899 0.1258339 0.1286835 0.132420 0.143245    10
+#     C 0.122503 0.128299 0.1374623 0.1292070 0.139683 0.187315    10
+#     D 0.087372 0.093239 0.1053774 0.1026330 0.113633 0.128299    10
+#     E 0.084020 0.098617 0.1124383 0.1094420 0.113423 0.178515    10
+#     F 0.080667 0.085346 0.1068579 0.1128295 0.115030 0.133537    10
+#     G 0.087443 0.090026 0.1030171 0.0995250 0.106369 0.152325    10
+#     H 0.084648 0.103156 0.1058313 0.1095120 0.112864 0.117265    10
 ```

-Of course, when running `mo_phylum("Firmicutes")` the function has zero knowledge about the actual microorganism, namely *S. aureus*. But since the result would be `"Firmicutes"` too, there is no point in calculating the result. And since this package 'knows' all phyla of all known microorganisms (according to ITIS), it can just return the initial value immediately.
+Of course, when running `mo_phylum("Firmicutes")` the function has zero knowledge about the actual microorganism, namely *S. aureus*. But since the result would be `"Firmicutes"` too, there is no point in calculating the result. And because this package 'knows' all phyla of all known microorganisms (according to ITIS), it can just return the initial value immediately.

 ## Copyright

--- a/data/microorganisms.oldDT.rda
+++ b/data/microorganisms.oldDT.rda
--- a/data/microorganisms.prevDT.rda
+++ b/data/microorganisms.prevDT.rda
--- a/data/microorganisms.rda
+++ b/data/microorganisms.rda
--- a/data/microorganisms.unprevDT.rda
+++ b/data/microorganisms.unprevDT.rda
--- a/data/microorganismsDT.rda
+++ b/data/microorganismsDT.rda
--- a/man/antibiotics.Rd
+++ b/man/antibiotics.Rd
@ -4,7 +4,7 @@
 \name{antibiotics}
 \alias{antibiotics}
 \title{Data set with 423 antibiotics}
-\format{A \code{\link{tibble}} with 423 observations and 18 variables:
+\format{A \code{\link{data.frame}} with 423 observations and 18 variables:
 \describe{
  \item{\code{atc}}{ATC code, like \code{J01CR02}}
  \item{\code{certe}}{Certe code, like \code{amcl}}
--- a/man/as.mo.Rd
+++ b/man/as.mo.Rd
@ -54,7 +54,15 @@ A microbial ID from this package (class: \code{mo}) typically looks like these e

 Use the \code{\link{mo_property}} functions to get properties based on the returned code, see Examples.

-This function uses Artificial Intelligence (AI) to help getting more logical results, based on type of input and known prevalence of human pathogens. For example:
+This function uses Artificial Intelligence (AI) to help getting fast and logical results. It tries to find matches in this order:
+\itemize{
+  \item{Taxonomic kingdom: it first searches in bacteria, then fungi, then protozoa}
+  \item{Human pathogenic prevalence: it first searches in more prevalent microorganisms, then less prevalent ones}
+  \item{Valid MO codes and full names: it first searches in already valid MO code and genus/species combinations}
+  \item{Breakdown of input values: from here it starts to breakdown input values to find possible matches}
+}
+
+A couple of effects because of these rules
 \itemize{
  \item{\code{"E. coli"} will return the ID of \emph{Escherichia coli} and not \emph{Entamoeba coli}, although the latter would alphabetically come first}
  \item{\code{"H. influenzae"} will return the ID of \emph{Haemophilus influenzae} and not \emph{Haematobacter influenzae} for the same reason}
@ -70,7 +78,7 @@ This means that looking up human pathogenic microorganisms takes less time than
 \if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr}
 This package contains the \strong{complete microbial taxonomic data} (with all  seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, \url{https://www.itis.gov}).

-All (sub)species from the taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. Furthermore, the responsible authors and year of publication are available. This allows users to use authoritative taxonomic information for their data analysis on any microorganism, not only human pathogens.
+All (sub)species from the \strong{taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package}, as well as all previously accepted names known to ITIS. Furthermore, the responsible authors and year of publication are available. This \strong{allows users to use authoritative taxonomic information} for their data analysis on any microorganism, not only human pathogens. It also helps to \strong{quickly determine the Gram stain of bacteria}, since all bacteria are classified into subkingdom Negibacteria or Posibacteria.

 ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3].
 }
--- a/man/freq.Rd
+++ b/man/freq.Rd
@ -10,12 +10,12 @@
 frequency_tbl(x, ..., sort.count = TRUE,
  nmax = getOption("max.print.freq"), na.rm = TRUE, row.names = TRUE,
  markdown = !interactive(), digits = 2, quote = FALSE,
-  header = !markdown, na = "<NA>", sep = " ")
+  header = !markdown, title = NULL, na = "<NA>", sep = " ")

 freq(x, ..., sort.count = TRUE, nmax = getOption("max.print.freq"),
  na.rm = TRUE, row.names = TRUE, markdown = !interactive(),
-  digits = 2, quote = FALSE, header = !markdown, na = "<NA>",
-  sep = " ")
+  digits = 2, quote = FALSE, header = !markdown, title = NULL,
+  na = "<NA>", sep = " ")

 top_freq(f, n)

@ -43,6 +43,8 @@ top_freq(f, n)

 \item{header}{a logical value indicating whether an informative header should be printed}

+\item{title}{text to show above frequency table, at default to tries to coerce from the variables passed to \code{x}}
+
 \item{na}{a character string to should be used to show empty (\code{NA}) values (only useful when \code{na.rm = FALSE})}

 \item{sep}{a character string to separate the terms when selecting multiple columns}
--- a/man/microorganisms.Rd
+++ b/man/microorganisms.Rd
@ -19,7 +19,7 @@
  \item{\code{subkingdom}}{Taxonomic subkingdom of the microorganism as found in ITIS, see Source}
  \item{\code{gramstain}}{Gram of microorganism, like \code{"Gram negative"}}
  \item{\code{type}}{Type of microorganism, like \code{"Bacteria"} and \code{"Fungi"}}
-  \item{\code{prevalence}}{A rounded integer based on prevalence of the microorganism. Used internally by \code{\link{as.mo}}, otherwise quite meaningless.}
+  \item{\code{prevalence}}{An integer based on estimated prevalence of the microorganism in humans. Used internally by \code{\link{as.mo}}, otherwise quite meaningless. It has a value of 25 for manually added items and a value of 1000 for all unprevalent microorganisms whose genus was somewhere in the top 250 (with another species).}
  \item{\code{ref}}{Author(s) and year of concerning publication as found in ITIS, see Source}
 }}
 \source{
@ -36,7 +36,7 @@ A data set containing the complete microbial taxonomy of the kingdoms Bacteria,
 \if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr}
 This package contains the \strong{complete microbial taxonomic data} (with all  seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, \url{https://www.itis.gov}).

-All (sub)species from the taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. Furthermore, the responsible authors and year of publication are available. This allows users to use authoritative taxonomic information for their data analysis on any microorganism, not only human pathogens.
+All (sub)species from the \strong{taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package}, as well as all previously accepted names known to ITIS. Furthermore, the responsible authors and year of publication are available. This \strong{allows users to use authoritative taxonomic information} for their data analysis on any microorganism, not only human pathogens. It also helps to \strong{quickly determine the Gram stain of bacteria}, since all bacteria are classified into subkingdom Negibacteria or Posibacteria.

 ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3].
 }
--- a/man/microorganisms.certe.Rd
+++ b/man/microorganisms.certe.Rd
@ -4,7 +4,7 @@
 \name{microorganisms.certe}
 \alias{microorganisms.certe}
 \title{Translation table for Certe}
-\format{A \code{\link{tibble}} with 2,665 observations and 2 variables:
+\format{A \code{\link{data.frame}} with 2,665 observations and 2 variables:
 \describe{
  \item{\code{certe}}{Code of microorganism according to Certe MMB}
  \item{\code{mo}}{Code of microorganism in \code{\link{microorganisms}}}
--- a/man/microorganisms.old.Rd
+++ b/man/microorganisms.old.Rd
@ -25,7 +25,7 @@ A data set containing old (previously valid or accepted) taxonomic names accordi
 \if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr}
 This package contains the \strong{complete microbial taxonomic data} (with all  seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, \url{https://www.itis.gov}).

-All (sub)species from the taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. Furthermore, the responsible authors and year of publication are available. This allows users to use authoritative taxonomic information for their data analysis on any microorganism, not only human pathogens.
+All (sub)species from the \strong{taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package}, as well as all previously accepted names known to ITIS. Furthermore, the responsible authors and year of publication are available. This \strong{allows users to use authoritative taxonomic information} for their data analysis on any microorganism, not only human pathogens. It also helps to \strong{quickly determine the Gram stain of bacteria}, since all bacteria are classified into subkingdom Negibacteria or Posibacteria.

 ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3].
 }
--- a/man/microorganisms.umcg.Rd
+++ b/man/microorganisms.umcg.Rd
@ -4,7 +4,7 @@
 \name{microorganisms.umcg}
 \alias{microorganisms.umcg}
 \title{Translation table for UMCG}
-\format{A \code{\link{tibble}} with 1,095 observations and 2 variables:
+\format{A \code{\link{data.frame}} with 1,095 observations and 2 variables:
 \describe{
  \item{\code{umcg}}{Code of microorganism according to UMCG MMB}
  \item{\code{certe}}{Code of microorganism according to Certe MMB}
--- a/man/mo_property.Rd
+++ b/man/mo_property.Rd
@ -71,7 +71,7 @@ Use these functions to return a specific property of a microorganism from the \c
 \if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr}
 This package contains the \strong{complete microbial taxonomic data} (with all  seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, \url{https://www.itis.gov}).

-All (sub)species from the taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. Furthermore, the responsible authors and year of publication are available. This allows users to use authoritative taxonomic information for their data analysis on any microorganism, not only human pathogens.
+All (sub)species from the \strong{taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package}, as well as all previously accepted names known to ITIS. Furthermore, the responsible authors and year of publication are available. This \strong{allows users to use authoritative taxonomic information} for their data analysis on any microorganism, not only human pathogens. It also helps to \strong{quickly determine the Gram stain of bacteria}, since all bacteria are classified into subkingdom Negibacteria or Posibacteria.

 ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3].
 }
--- a/man/supplementary_data.Rd
+++ b/man/supplementary_data.Rd
@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{supplementary_data}
+\alias{supplementary_data}
+\alias{microorganismsDT}
+\alias{microorganisms.prevDT}
+\alias{microorganisms.unprevDT}
+\alias{microorganisms.oldDT}
+\title{Supplementary Data}
+\format{An object of class \code{data.table} (inherits from \code{data.frame}) with 18833 rows and 15 columns.}
+\usage{
+microorganismsDT
+
+microorganisms.prevDT
+
+microorganisms.unprevDT
+
+microorganisms.oldDT
+}
+\description{
+These \code{\link{data.table}s} are transformed from the \code{\link{microorganisms}} and \code{\link{microorganisms}} data sets to improve speed of \code{\link{as.mo}}. They are meant for internal use only, and are only mentioned here for reference.
+}
+\keyword{datasets}
--- a/tests/testthat/test-freq.R
+++ b/tests/testthat/test-freq.R
@ -21,6 +21,7 @@ test_that("frequency table works", {
  expect_output(print(freq(septic_patients$age, markdown = TRUE), markdown = TRUE))
  expect_output(print(freq(septic_patients$age[0])))
  expect_output(print(freq(septic_patients$age, quote = TRUE)))
+  expect_output(print(freq(septic_patients$age, markdown = TRUE, title = "TITLE")))

  # character
  expect_output(print(freq(septic_patients$mo)))
--- a/tests/testthat/test-mo_property.R
+++ b/tests/testthat/test-mo_property.R
@ -53,4 +53,10 @@ test_that("mo_property works", {
  expect_identical(mo_property("E. coli", property = "species"),
                   mo_species("E. coli"))

+  # check vector with random values
+  library(dplyr)
+  df_sample <- AMR::microorganisms %>% sample_n(100)
+  expect_identical(df_sample %>% pull(mo) %>% mo_fullname(),
+                   df_sample %>% pull(fullname))
+
 })
--- a/vignettes/AMR.Rmd
+++ b/vignettes/AMR.Rmd
@ -39,7 +39,7 @@ The `AMR` package basically does four important things:

 3. It **analyses the data** with convenient functions that use well-known methods.

-   * Calculate the resistance (and even co-resistance) of microbial isolates with the `portion_R`, `portion_IR`, `portion_I`, `portion_SI` and `portion_S` functions. Similarly, the *amount* of isolates can be determined with the `count_R`, `count_IR`, `count_I`, `count_SI` and `count_S` functions. All these functions can be used [with the `dplyr` package](https://dplyr.tidyverse.org/#usage) (e.g. in conjunction with [`summarise`](https://dplyr.tidyverse.org/reference/summarise.html))
+   * Calculate the resistance (and even co-resistance) of microbial isolates with the `portion_R`, `portion_IR`, `portion_I`, `portion_SI` and `portion_S` functions. Similarly, the *number* of isolates can be determined with the `count_R`, `count_IR`, `count_I`, `count_SI` and `count_S` functions. All these functions can be used [with the `dplyr` package](https://dplyr.tidyverse.org/#usage) (e.g. in conjunction with [`summarise`](https://dplyr.tidyverse.org/reference/summarise.html))
   * Plot AMR results with `geom_rsi`, a function made for the `ggplot2` package
   * Predict antimicrobial resistance for the nextcoming years using logistic regression models with the `resistance_predict` function
   * Conduct descriptive statistics to enhance base R: calculate kurtosis, skewness and create frequency tables