1
0
mirror of https://github.com/msberends/AMR.git synced 2024-12-26 07:26:13 +01:00

as.mo speedup: assigned to namespace

This commit is contained in:
dr. M.S. (Matthijs) Berends 2018-10-29 17:26:17 +01:00
parent b4e71cdc4f
commit 1bf8dc2983
5 changed files with 61 additions and 53 deletions

View File

@ -1,6 +1,6 @@
Package: AMR Package: AMR
Version: 0.4.0.9007 Version: 0.4.0.9007
Date: 2018-10-23 Date: 2018-10-29
Title: Antimicrobial Resistance Analysis Title: Antimicrobial Resistance Analysis
Authors@R: c( Authors@R: c(
person( person(

81
R/mo.R
View File

@ -44,7 +44,7 @@
#' | | | ----> subspecies, a 3-4 letter acronym #' | | | ----> subspecies, a 3-4 letter acronym
#' | | ----> species, a 3-4 letter acronym #' | | ----> species, a 3-4 letter acronym
#' | ----> genus, a 5-7 letter acronym, mostly without vowels #' | ----> genus, a 5-7 letter acronym, mostly without vowels
#' ----> taxonomic kingdom, either Bacteria (B), Fungi (F) or Protozoa (P) #' ----> taxonomic kingdom, either B (Bacteria), F (Fungi) or P (Protozoa)
#' } #' }
#' #'
#' Use the \code{\link{mo_property}} functions to get properties based on the returned code, see Examples. #' Use the \code{\link{mo_property}} functions to get properties based on the returned code, see Examples.
@ -176,10 +176,11 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
# remove empty values (to later fill them in again) # remove empty values (to later fill them in again)
x <- x[!is.na(x) & !is.null(x) & !identical(x, "")] x <- x[!is.na(x) & !is.null(x) & !identical(x, "")]
MOs <- NULL # will be set later, if needed # These data.tables are available because of .onAttach:
MOs_mostprevalent <- NULL # will be set later, if needed # MOs
MOs_allothers <- NULL # will be set later, if needed # MOs_mostprevalent
MOs_old <- NULL # will be set later, if needed # MOs_allothers
# MOs_old
# defined df to check for # defined df to check for
if (!is.null(reference_df)) { if (!is.null(reference_df)) {
@ -232,10 +233,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
) )
} else { } else {
MOs <- as.data.table(AMR::microorganisms)
setkey(MOs, prevalence, tsn)
MOs_mostprevalent <- MOs[prevalence != 9999,]
x_backup <- trimws(x, which = "both") x_backup <- trimws(x, which = "both")
x_species <- paste(x_backup, "species") x_species <- paste(x_backup, "species")
# translate to English for supported languages of mo_property # translate to English for supported languages of mo_property
@ -421,17 +418,20 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
next next
} }
# try splitting of characters and then find ID ---- # try splitting of characters in the middle and then find ID ----
# like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus # only when text length is 6 or lower
x_split <- x # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus
x_length <- nchar(x_trimmed[i]) if (nchar(x_trimmed[i]) <= 6) {
x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(), x_split <- x
'.* ', x_length <- nchar(x_trimmed[i])
x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws()) x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(),
found <- MOs_mostprevalent[fullname %like% paste0('^', x_split[i]), ..property][[1]] '.* ',
if (length(found) > 0) { x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
x[i] <- found[1L] found <- MOs_mostprevalent[fullname %like% paste0('^', x_split[i]), ..property][[1]]
next if (length(found) > 0) {
x[i] <- found[1L]
next
}
} }
# try any match with text before and after original search string ---- # try any match with text before and after original search string ----
@ -450,10 +450,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
# } # }
# THEN TRY ALL OTHERS ---- # THEN TRY ALL OTHERS ----
if (is.null(MOs_allothers)) {
MOs_allothers <- MOs[prevalence == 9999,]
}
found <- MOs_allothers[tolower(fullname) == tolower(x_backup[i]), ..property][[1]] found <- MOs_allothers[tolower(fullname) == tolower(x_backup[i]), ..property][[1]]
# most probable: is exact match in fullname # most probable: is exact match in fullname
if (length(found) > 0) { if (length(found) > 0) {
@ -508,17 +504,20 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
next next
} }
# try splitting of characters and then find ID ---- # try splitting of characters in the middle and then find ID ----
# like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus # only when text length is 6 or lower
x_split <- x # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus
x_length <- nchar(x_trimmed[i]) if (nchar(x_trimmed[i]) <= 6) {
x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(), x_split <- x
'.* ', x_length <- nchar(x_trimmed[i])
x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws()) x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(),
found <- MOs_allothers[fullname %like% paste0('^', x_split[i]), ..property][[1]] '.* ',
if (length(found) > 0) { x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
x[i] <- found[1L] found <- MOs_allothers[fullname %like% paste0('^', x_split[i]), ..property][[1]]
next if (length(found) > 0) {
x[i] <- found[1L]
next
}
} }
# # try any match with text before and after original search string ---- # # try any match with text before and after original search string ----
@ -539,10 +538,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
# MISCELLANEOUS ---- # MISCELLANEOUS ----
# look for old taxonomic names ---- # look for old taxonomic names ----
if (is.null(MOs_old)) {
MOs_old <- as.data.table(AMR::microorganisms.old)
setkey(MOs_old, name, tsn_new)
}
found <- MOs_old[tolower(name) == tolower(x_backup[i]) found <- MOs_old[tolower(name) == tolower(x_backup[i])
| tsn == x_trimmed[i] | tsn == x_trimmed[i]
| name %like% x_withspaces[i],] | name %like% x_withspaces[i],]
@ -604,10 +599,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
if (Becker == TRUE | Becker == "all") { if (Becker == TRUE | Becker == "all") {
# See Source. It's this figure: # See Source. It's this figure:
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4187637/figure/F3/ # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4187637/figure/F3/
if (is.null(MOs)) {
MOs <- as.data.table(AMR::microorganisms)
setkey(MOs, prevalence, tsn)
}
MOs_staph <- MOs[genus == "Staphylococcus"] MOs_staph <- MOs[genus == "Staphylococcus"]
setkey(MOs_staph, species) setkey(MOs_staph, species)
CoNS <- MOs_staph[species %in% c("arlettae", "auricularis", "capitis", CoNS <- MOs_staph[species %in% c("arlettae", "auricularis", "capitis",
@ -635,10 +626,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
# Lancefield ---- # Lancefield ----
if (Lancefield == TRUE | Lancefield == "all") { if (Lancefield == TRUE | Lancefield == "all") {
if (is.null(MOs)) {
MOs <- as.data.table(AMR::microorganisms)
setkey(MOs, prevalence, tsn)
}
# group A - S. pyogenes # group A - S. pyogenes
x[x == MOs[mo == 'B_STRPTC_PYO', ..property][[1]][1L]] <- MOs[mo == 'B_STRPTC_GRA', ..property][[1]][1L] x[x == MOs[mo == 'B_STRPTC_PYO', ..property][[1]][1L]] <- MOs[mo == 'B_STRPTC_GRA', ..property][[1]][1L]
# group B - S. agalactiae # group B - S. agalactiae

25
R/zzz.R
View File

@ -18,7 +18,7 @@
#' The \code{AMR} Package #' The \code{AMR} Package
#' #'
#' Welcome to the \code{AMR} package. This page gives some additional contact information abount the authors. #' Welcome to the \code{AMR} package. This page gives some additional contact information about the authors.
#' @details #' @details
#' This package was intended to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and work with antibiotic properties by using evidence-based methods. #' This package was intended to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and work with antibiotic properties by using evidence-based methods.
#' #'
@ -39,7 +39,7 @@
#' 9700 RB Groningen #' 9700 RB Groningen
#' #'
#' If you have found a bug, please file a new issue at: \cr #' If you have found a bug, please file a new issue at: \cr
#' \url{https://github.com/msberends/AMR/issues} #' \url{https://gitlab.com/msberends/AMR/issues}
#' @name AMR #' @name AMR
#' @rdname AMR #' @rdname AMR
NULL NULL
@ -47,3 +47,24 @@ NULL
.onLoad <- function(libname, pkgname) { .onLoad <- function(libname, pkgname) {
backports::import(pkgname) backports::import(pkgname)
} }
.onAttach <- function(libname, pkgname) {
# save data.tables to improve speed of as.mo:
MOs <- data.table::as.data.table(AMR::microorganisms)
data.table::setkey(MOs, prevalence, tsn)
base::assign(x = "MOs",
value = MOs,
envir = base::as.environment("package:AMR"))
base::assign(x = "MOs_mostprevalent",
value = MOs[prevalence != 9999,],
envir = base::as.environment("package:AMR"))
base::assign(x = "MOs_allothers",
value = MOs[prevalence == 9999,],
envir = base::as.environment("package:AMR"))
base::assign(x = "MOs_old",
value = data.table::as.data.table(AMR::microorganisms.old),
envir = base::as.environment("package:AMR"))
}

View File

@ -4,7 +4,7 @@
\alias{AMR} \alias{AMR}
\title{The \code{AMR} Package} \title{The \code{AMR} Package}
\description{ \description{
Welcome to the \code{AMR} package. This page gives some additional contact information abount the authors. Welcome to the \code{AMR} package. This page gives some additional contact information about the authors.
} }
\details{ \details{
This package was intended to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and work with antibiotic properties by using evidence-based methods. This package was intended to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and work with antibiotic properties by using evidence-based methods.
@ -31,6 +31,6 @@ Post Office Box 30001 \cr
9700 RB Groningen 9700 RB Groningen
If you have found a bug, please file a new issue at: \cr If you have found a bug, please file a new issue at: \cr
\url{https://github.com/msberends/AMR/issues} \url{https://gitlab.com/msberends/AMR/issues}
} }

View File

@ -49,7 +49,7 @@ A microbial ID from this package (class: \code{mo}) typically looks like these e
| | | ----> subspecies, a 3-4 letter acronym | | | ----> subspecies, a 3-4 letter acronym
| | ----> species, a 3-4 letter acronym | | ----> species, a 3-4 letter acronym
| ----> genus, a 5-7 letter acronym, mostly without vowels | ----> genus, a 5-7 letter acronym, mostly without vowels
----> taxonomic kingdom, either Bacteria (B), Fungi (F) or Protozoa (P) ----> taxonomic kingdom, either B (Bacteria), F (Fungi) or P (Protozoa)
} }
Use the \code{\link{mo_property}} functions to get properties based on the returned code, see Examples. Use the \code{\link{mo_property}} functions to get properties based on the returned code, see Examples.