From 1bf8dc298391e91ae634dec8607e85238636028d Mon Sep 17 00:00:00 2001
From: "Matthijs S. Berends" <m.s.berends@umcg.nl>
Date: Mon, 29 Oct 2018 17:26:17 +0100
Subject: [PATCH] as.mo speedup: assigned to namespace

---
 DESCRIPTION  |  2 +-
 R/mo.R       | 81 ++++++++++++++++++++++------------------------------
 R/zzz.R      | 25 ++++++++++++++--
 man/AMR.Rd   |  4 +--
 man/as.mo.Rd |  2 +-
 5 files changed, 61 insertions(+), 53 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index d3d844f9..637b9da1 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: AMR
 Version: 0.4.0.9007
-Date: 2018-10-23
+Date: 2018-10-29
 Title: Antimicrobial Resistance Analysis
 Authors@R: c(
     person(
diff --git a/R/mo.R b/R/mo.R
index 492b5dd8..e65bc7dd 100644
--- a/R/mo.R
+++ b/R/mo.R
@@ -44,7 +44,7 @@
 #'   |   |    |    ----> subspecies, a 3-4 letter acronym
 #'   |   |     ----> species, a 3-4 letter acronym
 #'   |    ----> genus, a 5-7 letter acronym, mostly without vowels
-#'    ----> taxonomic kingdom, either Bacteria (B), Fungi (F) or Protozoa (P)
+#'    ----> taxonomic kingdom, either B (Bacteria), F (Fungi) or P (Protozoa)
 #' }
 #'
 #' Use the \code{\link{mo_property}} functions to get properties based on the returned code, see Examples.
@@ -176,10 +176,11 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
   # remove empty values (to later fill them in again)
   x <- x[!is.na(x) & !is.null(x) & !identical(x, "")]
 
-  MOs <- NULL # will be set later, if needed
-  MOs_mostprevalent <- NULL # will be set later, if needed
-  MOs_allothers <- NULL # will be set later, if needed
-  MOs_old <- NULL # will be set later, if needed
+  # These data.tables are available because of .onAttach:
+  #   MOs
+  #   MOs_mostprevalent
+  #   MOs_allothers
+  #   MOs_old
 
   # defined df to check for
   if (!is.null(reference_df)) {
@@ -232,10 +233,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
     )
   } else {
 
-    MOs <- as.data.table(AMR::microorganisms)
-    setkey(MOs, prevalence, tsn)
-    MOs_mostprevalent <- MOs[prevalence != 9999,]
-
     x_backup <- trimws(x, which = "both")
     x_species <- paste(x_backup, "species")
     # translate to English for supported languages of mo_property
@@ -421,17 +418,20 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
         next
       }
 
-      # try splitting of characters and then find ID ----
-      # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus
-      x_split <- x
-      x_length <- nchar(x_trimmed[i])
-      x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(),
-                           '.* ',
-                           x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
-      found <- MOs_mostprevalent[fullname %like% paste0('^', x_split[i]), ..property][[1]]
-      if (length(found) > 0) {
-        x[i] <- found[1L]
-        next
+      # try splitting of characters in the middle and then find ID ----
+      # only when text length is 6 or lower
+      # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus
+      if (nchar(x_trimmed[i]) <= 6) {
+        x_split <- x
+        x_length <- nchar(x_trimmed[i])
+        x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(),
+                             '.* ',
+                             x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
+        found <- MOs_mostprevalent[fullname %like% paste0('^', x_split[i]), ..property][[1]]
+        if (length(found) > 0) {
+          x[i] <- found[1L]
+          next
+        }
       }
 
       # try any match with text before and after original search string ----
@@ -450,10 +450,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
       # }
 
       # THEN TRY ALL OTHERS ----
-      if (is.null(MOs_allothers)) {
-        MOs_allothers <- MOs[prevalence == 9999,]
-      }
-
       found <- MOs_allothers[tolower(fullname) == tolower(x_backup[i]), ..property][[1]]
       # most probable: is exact match in fullname
       if (length(found) > 0) {
@@ -508,17 +504,20 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
         next
       }
 
-      # try splitting of characters and then find ID ----
-      # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus
-      x_split <- x
-      x_length <- nchar(x_trimmed[i])
-      x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(),
-                           '.* ',
-                           x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
-      found <- MOs_allothers[fullname %like% paste0('^', x_split[i]), ..property][[1]]
-      if (length(found) > 0) {
-        x[i] <- found[1L]
-        next
+      # try splitting of characters in the middle and then find ID ----
+      # only when text length is 6 or lower
+      # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus
+      if (nchar(x_trimmed[i]) <= 6) {
+        x_split <- x
+        x_length <- nchar(x_trimmed[i])
+        x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(),
+                             '.* ',
+                             x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
+        found <- MOs_allothers[fullname %like% paste0('^', x_split[i]), ..property][[1]]
+        if (length(found) > 0) {
+          x[i] <- found[1L]
+          next
+        }
       }
 
       # # try any match with text before and after original search string ----
@@ -539,10 +538,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
       # MISCELLANEOUS ----
 
       # look for old taxonomic names ----
-      if (is.null(MOs_old)) {
-        MOs_old <- as.data.table(AMR::microorganisms.old)
-        setkey(MOs_old, name, tsn_new)
-      }
       found <- MOs_old[tolower(name) == tolower(x_backup[i])
                        | tsn == x_trimmed[i]
                        | name %like% x_withspaces[i],]
@@ -604,10 +599,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
   if (Becker == TRUE | Becker == "all") {
     # See Source. It's this figure:
     # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4187637/figure/F3/
-    if (is.null(MOs)) {
-      MOs <- as.data.table(AMR::microorganisms)
-      setkey(MOs, prevalence, tsn)
-    }
     MOs_staph <- MOs[genus == "Staphylococcus"]
     setkey(MOs_staph, species)
     CoNS <- MOs_staph[species %in% c("arlettae", "auricularis", "capitis",
@@ -635,10 +626,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
 
   # Lancefield ----
   if (Lancefield == TRUE | Lancefield == "all") {
-    if (is.null(MOs)) {
-      MOs <- as.data.table(AMR::microorganisms)
-      setkey(MOs, prevalence, tsn)
-    }
     # group A - S. pyogenes
     x[x == MOs[mo == 'B_STRPTC_PYO', ..property][[1]][1L]] <- MOs[mo == 'B_STRPTC_GRA', ..property][[1]][1L]
     # group B - S. agalactiae
diff --git a/R/zzz.R b/R/zzz.R
index 6c6ccac4..5e2e79f9 100755
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -18,7 +18,7 @@
 
 #' The \code{AMR} Package
 #'
-#' Welcome to the \code{AMR} package. This page gives some additional contact information abount the authors.
+#' Welcome to the \code{AMR} package. This page gives some additional contact information about the authors.
 #' @details
 #' This package was intended to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and work with antibiotic properties by using evidence-based methods.
 #'
@@ -39,7 +39,7 @@
 #' 9700 RB Groningen
 #'
 #' If you have found a bug, please file a new issue at: \cr
-#' \url{https://github.com/msberends/AMR/issues}
+#' \url{https://gitlab.com/msberends/AMR/issues}
 #' @name AMR
 #' @rdname AMR
 NULL
@@ -47,3 +47,24 @@ NULL
 .onLoad <- function(libname, pkgname) {
   backports::import(pkgname)
 }
+
+.onAttach <- function(libname, pkgname) {
+  # save data.tables to improve speed of as.mo:
+  MOs <- data.table::as.data.table(AMR::microorganisms)
+  data.table::setkey(MOs, prevalence, tsn)
+
+  base::assign(x = "MOs",
+         value = MOs,
+         envir = base::as.environment("package:AMR"))
+  base::assign(x = "MOs_mostprevalent",
+         value = MOs[prevalence != 9999,],
+         envir = base::as.environment("package:AMR"))
+  base::assign(x = "MOs_allothers",
+         value = MOs[prevalence == 9999,],
+         envir = base::as.environment("package:AMR"))
+
+  base::assign(x = "MOs_old",
+         value = data.table::as.data.table(AMR::microorganisms.old),
+         envir = base::as.environment("package:AMR"))
+
+}
diff --git a/man/AMR.Rd b/man/AMR.Rd
index 58e38208..920df520 100644
--- a/man/AMR.Rd
+++ b/man/AMR.Rd
@@ -4,7 +4,7 @@
 \alias{AMR}
 \title{The \code{AMR} Package}
 \description{
-Welcome to the \code{AMR} package. This page gives some additional contact information abount the authors.
+Welcome to the \code{AMR} package. This page gives some additional contact information about the authors.
 }
 \details{
 This package was intended to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and work with antibiotic properties by using evidence-based methods.
@@ -31,6 +31,6 @@ Post Office Box 30001 \cr
 9700 RB Groningen
 
 If you have found a bug, please file a new issue at: \cr
-\url{https://github.com/msberends/AMR/issues}
+\url{https://gitlab.com/msberends/AMR/issues}
 }
 
diff --git a/man/as.mo.Rd b/man/as.mo.Rd
index 1d787930..39f011c7 100644
--- a/man/as.mo.Rd
+++ b/man/as.mo.Rd
@@ -49,7 +49,7 @@ A microbial ID from this package (class: \code{mo}) typically looks like these e
   |   |    |    ----> subspecies, a 3-4 letter acronym
   |   |     ----> species, a 3-4 letter acronym
   |    ----> genus, a 5-7 letter acronym, mostly without vowels
-   ----> taxonomic kingdom, either Bacteria (B), Fungi (F) or Protozoa (P)
+   ----> taxonomic kingdom, either B (Bacteria), F (Fungi) or P (Protozoa)
 }
 
 Use the \code{\link{mo_property}} functions to get properties based on the returned code, see Examples.