From 1bf8dc298391e91ae634dec8607e85238636028d Mon Sep 17 00:00:00 2001 From: "Matthijs S. Berends" Date: Mon, 29 Oct 2018 17:26:17 +0100 Subject: [PATCH] as.mo speedup: assigned to namespace --- DESCRIPTION | 2 +- R/mo.R | 81 ++++++++++++++++++++++------------------------------ R/zzz.R | 25 ++++++++++++++-- man/AMR.Rd | 4 +-- man/as.mo.Rd | 2 +- 5 files changed, 61 insertions(+), 53 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d3d844f9..637b9da1 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR Version: 0.4.0.9007 -Date: 2018-10-23 +Date: 2018-10-29 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/R/mo.R b/R/mo.R index 492b5dd8..e65bc7dd 100644 --- a/R/mo.R +++ b/R/mo.R @@ -44,7 +44,7 @@ #' | | | ----> subspecies, a 3-4 letter acronym #' | | ----> species, a 3-4 letter acronym #' | ----> genus, a 5-7 letter acronym, mostly without vowels -#' ----> taxonomic kingdom, either Bacteria (B), Fungi (F) or Protozoa (P) +#' ----> taxonomic kingdom, either B (Bacteria), F (Fungi) or P (Protozoa) #' } #' #' Use the \code{\link{mo_property}} functions to get properties based on the returned code, see Examples. @@ -176,10 +176,11 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = # remove empty values (to later fill them in again) x <- x[!is.na(x) & !is.null(x) & !identical(x, "")] - MOs <- NULL # will be set later, if needed - MOs_mostprevalent <- NULL # will be set later, if needed - MOs_allothers <- NULL # will be set later, if needed - MOs_old <- NULL # will be set later, if needed + # These data.tables are available because of .onAttach: + # MOs + # MOs_mostprevalent + # MOs_allothers + # MOs_old # defined df to check for if (!is.null(reference_df)) { @@ -232,10 +233,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = ) } else { - MOs <- as.data.table(AMR::microorganisms) - setkey(MOs, prevalence, tsn) - MOs_mostprevalent <- MOs[prevalence != 9999,] - x_backup <- trimws(x, which = "both") x_species <- paste(x_backup, "species") # translate to English for supported languages of mo_property @@ -421,17 +418,20 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = next } - # try splitting of characters and then find ID ---- - # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus - x_split <- x - x_length <- nchar(x_trimmed[i]) - x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(), - '.* ', - x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws()) - found <- MOs_mostprevalent[fullname %like% paste0('^', x_split[i]), ..property][[1]] - if (length(found) > 0) { - x[i] <- found[1L] - next + # try splitting of characters in the middle and then find ID ---- + # only when text length is 6 or lower + # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus + if (nchar(x_trimmed[i]) <= 6) { + x_split <- x + x_length <- nchar(x_trimmed[i]) + x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(), + '.* ', + x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws()) + found <- MOs_mostprevalent[fullname %like% paste0('^', x_split[i]), ..property][[1]] + if (length(found) > 0) { + x[i] <- found[1L] + next + } } # try any match with text before and after original search string ---- @@ -450,10 +450,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = # } # THEN TRY ALL OTHERS ---- - if (is.null(MOs_allothers)) { - MOs_allothers <- MOs[prevalence == 9999,] - } - found <- MOs_allothers[tolower(fullname) == tolower(x_backup[i]), ..property][[1]] # most probable: is exact match in fullname if (length(found) > 0) { @@ -508,17 +504,20 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = next } - # try splitting of characters and then find ID ---- - # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus - x_split <- x - x_length <- nchar(x_trimmed[i]) - x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(), - '.* ', - x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws()) - found <- MOs_allothers[fullname %like% paste0('^', x_split[i]), ..property][[1]] - if (length(found) > 0) { - x[i] <- found[1L] - next + # try splitting of characters in the middle and then find ID ---- + # only when text length is 6 or lower + # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus + if (nchar(x_trimmed[i]) <= 6) { + x_split <- x + x_length <- nchar(x_trimmed[i]) + x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(), + '.* ', + x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws()) + found <- MOs_allothers[fullname %like% paste0('^', x_split[i]), ..property][[1]] + if (length(found) > 0) { + x[i] <- found[1L] + next + } } # # try any match with text before and after original search string ---- @@ -539,10 +538,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = # MISCELLANEOUS ---- # look for old taxonomic names ---- - if (is.null(MOs_old)) { - MOs_old <- as.data.table(AMR::microorganisms.old) - setkey(MOs_old, name, tsn_new) - } found <- MOs_old[tolower(name) == tolower(x_backup[i]) | tsn == x_trimmed[i] | name %like% x_withspaces[i],] @@ -604,10 +599,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = if (Becker == TRUE | Becker == "all") { # See Source. It's this figure: # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4187637/figure/F3/ - if (is.null(MOs)) { - MOs <- as.data.table(AMR::microorganisms) - setkey(MOs, prevalence, tsn) - } MOs_staph <- MOs[genus == "Staphylococcus"] setkey(MOs_staph, species) CoNS <- MOs_staph[species %in% c("arlettae", "auricularis", "capitis", @@ -635,10 +626,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = # Lancefield ---- if (Lancefield == TRUE | Lancefield == "all") { - if (is.null(MOs)) { - MOs <- as.data.table(AMR::microorganisms) - setkey(MOs, prevalence, tsn) - } # group A - S. pyogenes x[x == MOs[mo == 'B_STRPTC_PYO', ..property][[1]][1L]] <- MOs[mo == 'B_STRPTC_GRA', ..property][[1]][1L] # group B - S. agalactiae diff --git a/R/zzz.R b/R/zzz.R index 6c6ccac4..5e2e79f9 100755 --- a/R/zzz.R +++ b/R/zzz.R @@ -18,7 +18,7 @@ #' The \code{AMR} Package #' -#' Welcome to the \code{AMR} package. This page gives some additional contact information abount the authors. +#' Welcome to the \code{AMR} package. This page gives some additional contact information about the authors. #' @details #' This package was intended to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and work with antibiotic properties by using evidence-based methods. #' @@ -39,7 +39,7 @@ #' 9700 RB Groningen #' #' If you have found a bug, please file a new issue at: \cr -#' \url{https://github.com/msberends/AMR/issues} +#' \url{https://gitlab.com/msberends/AMR/issues} #' @name AMR #' @rdname AMR NULL @@ -47,3 +47,24 @@ NULL .onLoad <- function(libname, pkgname) { backports::import(pkgname) } + +.onAttach <- function(libname, pkgname) { + # save data.tables to improve speed of as.mo: + MOs <- data.table::as.data.table(AMR::microorganisms) + data.table::setkey(MOs, prevalence, tsn) + + base::assign(x = "MOs", + value = MOs, + envir = base::as.environment("package:AMR")) + base::assign(x = "MOs_mostprevalent", + value = MOs[prevalence != 9999,], + envir = base::as.environment("package:AMR")) + base::assign(x = "MOs_allothers", + value = MOs[prevalence == 9999,], + envir = base::as.environment("package:AMR")) + + base::assign(x = "MOs_old", + value = data.table::as.data.table(AMR::microorganisms.old), + envir = base::as.environment("package:AMR")) + +} diff --git a/man/AMR.Rd b/man/AMR.Rd index 58e38208..920df520 100644 --- a/man/AMR.Rd +++ b/man/AMR.Rd @@ -4,7 +4,7 @@ \alias{AMR} \title{The \code{AMR} Package} \description{ -Welcome to the \code{AMR} package. This page gives some additional contact information abount the authors. +Welcome to the \code{AMR} package. This page gives some additional contact information about the authors. } \details{ This package was intended to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and work with antibiotic properties by using evidence-based methods. @@ -31,6 +31,6 @@ Post Office Box 30001 \cr 9700 RB Groningen If you have found a bug, please file a new issue at: \cr -\url{https://github.com/msberends/AMR/issues} +\url{https://gitlab.com/msberends/AMR/issues} } diff --git a/man/as.mo.Rd b/man/as.mo.Rd index 1d787930..39f011c7 100644 --- a/man/as.mo.Rd +++ b/man/as.mo.Rd @@ -49,7 +49,7 @@ A microbial ID from this package (class: \code{mo}) typically looks like these e | | | ----> subspecies, a 3-4 letter acronym | | ----> species, a 3-4 letter acronym | ----> genus, a 5-7 letter acronym, mostly without vowels - ----> taxonomic kingdom, either Bacteria (B), Fungi (F) or Protozoa (P) + ----> taxonomic kingdom, either B (Bacteria), F (Fungi) or P (Protozoa) } Use the \code{\link{mo_property}} functions to get properties based on the returned code, see Examples.