mirror of
https://github.com/msberends/AMR.git
synced 2025-07-08 20:41:58 +02:00
Catalogue of Life, replaces ITIS
This commit is contained in:
60
R/data.R
60
R/data.R
@ -130,14 +130,14 @@
|
||||
#
|
||||
"antibiotics"
|
||||
|
||||
#' Data set with ~20,000 microorganisms
|
||||
#' Data set with ~60,000 microorganisms
|
||||
#'
|
||||
#' A data set containing the complete microbial taxonomy of the kingdoms Bacteria, Fungi and Protozoa from ITIS. MO codes can be looked up using \code{\link{as.mo}}.
|
||||
#' A data set containing the microbial taxonomy of six kingdoms from the Catalogue of Life. MO codes can be looked up using \code{\link{as.mo}}.
|
||||
#' @inheritSection ITIS ITIS
|
||||
#' @format A \code{\link{data.frame}} with 19,456 observations and 15 variables:
|
||||
#' @format A \code{\link{data.frame}} with 56,659 observations and 15 variables:
|
||||
#' \describe{
|
||||
#' \item{\code{mo}}{ID of microorganism}
|
||||
#' \item{\code{tsn}}{Taxonomic Serial Number (TSN), as defined by ITIS}
|
||||
#' \item{\code{col_id}}{Catalogue of Life ID}
|
||||
#' \item{\code{genus}}{Taxonomic genus of the microorganism as found in ITIS, see Source}
|
||||
#' \item{\code{species}}{Taxonomic species of the microorganism as found in ITIS, see Source}
|
||||
#' \item{\code{subspecies}}{Taxonomic subspecies of the microorganism as found in ITIS, see Source}
|
||||
@ -155,15 +155,10 @@
|
||||
#' @source Integrated Taxonomic Information System (ITIS) public online database, \url{https://www.itis.gov}.
|
||||
#' @details Manually added were:
|
||||
#' \itemize{
|
||||
#' \item{605 species of Aspergillus (as Aspergillus misses from ITIS, list from https://en.wikipedia.org/wiki/List_of_Aspergillus_species on 2019-02-05)}
|
||||
#' \item{23 species of Trichophyton (as Trichophyton misses from ITIS, list from https://en.wikipedia.org/wiki/Trichophyton on 2019-02-05)}
|
||||
#' \item{9 species of Streptococcus (beta haemolytic groups A, B, C, D, F, G, H, K and unspecified)}
|
||||
#' \item{2 species of Straphylococcus (coagulase-negative [CoNS] and coagulase-positive [CoPS])}
|
||||
#' \item{1 species of Candida (C. glabrata)}
|
||||
#' \item{9 species of \emph{Streptococcus} (beta haemolytic groups A, B, C, D, F, G, H, K and unspecified)}
|
||||
#' \item{2 species of \emph{Staphylococcus} (coagulase-negative [CoNS] and coagulase-positive [CoPS])}
|
||||
#' \item{2 other undefined (unknown Gram negatives and unknown Gram positives)}
|
||||
#' }
|
||||
#'
|
||||
#' These manual entries have no Taxonomic Serial Number (TSN), so can be looked up with \code{filter(microorganisms, is.na(tsn)}.
|
||||
#' @inheritSection AMR Read more on our website!
|
||||
#' @seealso \code{\link{as.mo}} \code{\link{mo_property}} \code{\link{microorganisms.codes}}
|
||||
"microorganisms"
|
||||
@ -172,12 +167,12 @@
|
||||
#'
|
||||
#' A data set containing old (previously valid or accepted) taxonomic names according to ITIS. This data set is used internally by \code{\link{as.mo}}.
|
||||
#' @inheritSection as.mo ITIS
|
||||
#' @format A \code{\link{data.frame}} with 2,383 observations and 4 variables:
|
||||
#' @format A \code{\link{data.frame}} with 14,506 observations and 4 variables:
|
||||
#' \describe{
|
||||
#' \item{\code{tsn}}{Old Taxonomic Serial Number (TSN), as defined by ITIS}
|
||||
#' \item{\code{name}}{Old taxonomic name of the microorganism as found in ITIS, see Source}
|
||||
#' \item{\code{tsn_new}}{New Taxonomic Serial Number (TSN), as defined by ITIS}
|
||||
#' \item{\code{ref}}{Author(s) and year of concerning publication as found in ITIS, see Source}
|
||||
#' \item{\code{col_id}}{Catalogue of Life ID}
|
||||
#' \item{\code{tsn_new}}{New Catalogue of Life ID}
|
||||
#' \item{\code{fullname}}{Old taxonomic name of the microorganism as found in the CoL, see Source}
|
||||
#' \item{\code{ref}}{Author(s) and year of concerning publication as found in the CoL, see Source}
|
||||
#' }
|
||||
#' @source [3] Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}.
|
||||
#' @inheritSection AMR Read more on our website!
|
||||
@ -250,36 +245,3 @@
|
||||
#' }
|
||||
#' @inheritSection AMR Read more on our website!
|
||||
"WHONET"
|
||||
|
||||
#' Supplementary Data
|
||||
#'
|
||||
#' These \code{\link{data.table}s} are transformed from the \code{\link{microorganisms}} and \code{\link{microorganisms}} data sets to improve speed of \code{\link{as.mo}}. They are meant for internal use only, and are only mentioned here for reference.
|
||||
#' @rdname supplementary_data
|
||||
#' @name supplementary_data
|
||||
#' @inheritSection AMR Read more on our website!
|
||||
# # Renew data:
|
||||
# # sorted on (1) bacteria, (2) fungi, (3) protozoa and then human pathogenic prevalence and then TSN:
|
||||
# microorganismsDT <- data.table::as.data.table(AMR::microorganisms)
|
||||
# data.table::setkey(microorganismsDT, kingdom, prevalence, fullname)
|
||||
# microorganisms.prevDT <- microorganismsDT[prevalence != 9999,]
|
||||
# microorganisms.unprevDT <- microorganismsDT[prevalence == 9999,]
|
||||
# microorganisms.oldDT <- data.table::as.data.table(AMR::microorganisms.old)
|
||||
# data.table::setkey(microorganisms.oldDT, tsn, name)
|
||||
# usethis::use_data(microorganismsDT, overwrite = TRUE)
|
||||
# usethis::use_data(microorganisms.prevDT, overwrite = TRUE)
|
||||
# usethis::use_data(microorganisms.unprevDT, overwrite = TRUE)
|
||||
# usethis::use_data(microorganisms.oldDT, overwrite = TRUE)
|
||||
# rm(microorganismsDT)
|
||||
# rm(microorganisms.prevDT)
|
||||
# rm(microorganisms.unprevDT)
|
||||
# rm(microorganisms.oldDT)
|
||||
"microorganismsDT"
|
||||
|
||||
#' @rdname supplementary_data
|
||||
"microorganisms.prevDT"
|
||||
|
||||
#' @rdname supplementary_data
|
||||
"microorganisms.unprevDT"
|
||||
|
||||
#' @rdname supplementary_data
|
||||
"microorganisms.oldDT"
|
||||
|
@ -548,6 +548,7 @@ eucast_rules <- function(tbl,
|
||||
tbl <- tbl %>%
|
||||
mutate_at(vars(col_mo), as.mo) %>%
|
||||
left_join_microorganisms(by = col_mo, suffix = c("_oldcols", "")) %>%
|
||||
mutate(gramstain = mo_gramstain(pull(., col_mo))) %>%
|
||||
as.data.frame(stringsAsFactors = FALSE)
|
||||
|
||||
if (info == TRUE) {
|
||||
|
@ -70,7 +70,7 @@
|
||||
#' @keywords isolate isolates first
|
||||
#' @seealso \code{\link{key_antibiotics}}
|
||||
#' @export
|
||||
#' @importFrom dplyr arrange_at lag between row_number filter mutate arrange
|
||||
#' @importFrom dplyr arrange_at lag between row_number filter mutate arrange pull
|
||||
#' @importFrom crayon blue bold silver
|
||||
#' @return Logical vector
|
||||
#' @source Methodology of this function is based on: \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition}, 2014, \emph{Clinical and Laboratory Standards Institute (CLSI)}. \url{https://clsi.org/standards/products/microbiology/documents/m39/}.
|
||||
|
@ -43,7 +43,7 @@
|
||||
#' @inheritSection first_isolate Key antibiotics
|
||||
#' @rdname key_antibiotics
|
||||
#' @export
|
||||
#' @importFrom dplyr %>% mutate if_else
|
||||
#' @importFrom dplyr %>% mutate if_else pull
|
||||
#' @importFrom crayon blue bold
|
||||
#' @seealso \code{\link{first_isolate}}
|
||||
#' @inheritSection AMR Read more on our website!
|
||||
@ -149,7 +149,8 @@ key_antibiotics <- function(tbl,
|
||||
tbl <- tbl %>%
|
||||
mutate_at(vars(col_mo), as.mo) %>%
|
||||
left_join_microorganisms(by = col_mo) %>%
|
||||
mutate(key_ab = NA_character_)
|
||||
mutate(key_ab = NA_character_,
|
||||
gramstain = mo_gramstain(pull(., col_mo)))
|
||||
|
||||
# Gram +
|
||||
tbl <- tbl %>% mutate(key_ab =
|
||||
|
167
R/mo.R
167
R/mo.R
@ -47,7 +47,8 @@
|
||||
#' | | | ----> subspecies, a 3-4 letter acronym
|
||||
#' | | ----> species, a 3-4 letter acronym
|
||||
#' | ----> genus, a 5-7 letter acronym, mostly without vowels
|
||||
#' ----> taxonomic kingdom, either B (Bacteria), F (Fungi) or P (Protozoa)
|
||||
#' ----> taxonomic kingdom: A (Archaea), B (Bacteria), C (Chromista),
|
||||
#' F (Fungi), P (Protozoa) or V (Viruses)
|
||||
#' }
|
||||
#'
|
||||
#' Use the \code{\link{mo_property}} functions to get properties based on the returned code, see Examples.
|
||||
@ -81,7 +82,7 @@
|
||||
#'
|
||||
#' Examples:
|
||||
#' \itemize{
|
||||
#' \item{\code{"Streptococcus group B (known as S. agalactiae)"}. The text between brackets will be removed and a warning will be thrown that the result \emph{Streptococcus group B} (\code{B_STRPTC_GRB}) needs review.}
|
||||
#' \item{\code{"Streptococcus group B (known as S. agalactiae)"}. The text between brackets will be removed and a warning will be thrown that the result \emph{Streptococcus group B} (\code{B_STRPT_GRB}) needs review.}
|
||||
#' \item{\code{"S. aureus - please mind: MRSA"}. The last word will be stripped, after which the function will try to find a match. If it does not, the second last word will be stripped, etc. Again, a warning will be thrown that the result \emph{Staphylococcus aureus} (\code{B_STPHY_AUR}) needs review.}
|
||||
#' \item{\code{"D. spartina"}. This is the abbreviation of an old taxonomic name: \emph{Didymosphaeria spartinae} (the last "e" was missing from the input). This fungus was renamed to \emph{Leptosphaeria obiones}, so a warning will be thrown that this result (\code{F_LPTSP_OBI}) needs review.}
|
||||
#' \item{\code{"Fluoroquinolone-resistant Neisseria gonorrhoeae"}. The first word will be stripped, after which the function will try to find a match. A warning will be thrown that the result \emph{Neisseria gonorrhoeae} (\code{B_NESSR_GON}) needs review.}
|
||||
@ -118,8 +119,6 @@
|
||||
#' as.mo("MRSA") # Methicillin Resistant S. aureus
|
||||
#' as.mo("VISA") # Vancomycin Intermediate S. aureus
|
||||
#' as.mo("VRSA") # Vancomycin Resistant S. aureus
|
||||
#' as.mo(369) # Search on TSN (Taxonomic Serial Number), a unique identifier
|
||||
#' # for the Integrated Taxonomic Information System (ITIS)
|
||||
#'
|
||||
#' as.mo("Streptococcus group A")
|
||||
#' as.mo("GAS") # Group A Streptococci
|
||||
@ -128,8 +127,8 @@
|
||||
#' as.mo("S. epidermidis") # will remain species: B_STPHY_EPI
|
||||
#' as.mo("S. epidermidis", Becker = TRUE) # will not remain species: B_STPHY_CNS
|
||||
#'
|
||||
#' as.mo("S. pyogenes") # will remain species: B_STRPTC_PYO
|
||||
#' as.mo("S. pyogenes", Lancefield = TRUE) # will not remain species: B_STRPTC_GRA
|
||||
#' as.mo("S. pyogenes") # will remain species: B_STRPT_PYO
|
||||
#' as.mo("S. pyogenes", Lancefield = TRUE) # will not remain species: B_STRPT_GRA
|
||||
#'
|
||||
#' # Use mo_* functions to get a specific property based on `mo`
|
||||
#' Ecoli <- as.mo("E. coli") # returns `B_ESCHR_COL`
|
||||
@ -178,11 +177,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
|
||||
if (!"AMR" %in% base::.packages()) {
|
||||
library("AMR")
|
||||
# These data.tables are available as data sets when the AMR package is loaded:
|
||||
# microorganismsDT # this one is sorted by kingdom (B<F<P), prevalence, TSN
|
||||
# microorganisms.prevDT # same as microorganismsDT, but with prevalence != 9999
|
||||
# microorganisms.unprevDT # same as microorganismsDT, but with prevalence == 9999
|
||||
# microorganisms.oldDT # old taxonomic names, sorted by name (genus+species), TSN
|
||||
# check onLoad() in R/zzz.R: data tables are created there.
|
||||
}
|
||||
|
||||
if (clear_options == TRUE) {
|
||||
@ -220,6 +215,17 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
# remove empty values (to later fill them in again with NAs)
|
||||
x <- x[!is.na(x) & !is.null(x) & !identical(x, "")]
|
||||
|
||||
|
||||
# conversion v0.5.0 to v0.6.0, remove for v0.7.0
|
||||
x <- gsub("B_STRPTC", "B_STRPT", x)
|
||||
x <- gsub("B_STRPT_EQUI", "B_STRPT_EQU", x)
|
||||
x <- gsub("B_PDMNS", "B_PSDMN", x)
|
||||
x <- gsub("B_CTRDM", "B_CLSTR", x)
|
||||
x <- gsub("F_CANDD_GLB", "F_CANDD_GLA", x)
|
||||
x <- gsub("F_CANDD_LUS", "F_CANDD", x)
|
||||
x <- gsub("B_FCTRM", "B_FSBCT", x)
|
||||
|
||||
|
||||
# defined df to check for
|
||||
if (!is.null(reference_df)) {
|
||||
if (!is.data.frame(reference_df) | NCOL(reference_df) < 2) {
|
||||
@ -239,8 +245,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
# all empty
|
||||
if (property == "mo") {
|
||||
return(structure(rep(NA_character_, length(x_input)), class = "mo"))
|
||||
} else if (property == "tsn") {
|
||||
return(rep(NA_integer_, length(x_input)))
|
||||
} else {
|
||||
return(rep(NA_character_, length(x_input)))
|
||||
}
|
||||
@ -283,7 +287,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
# replace hemolytic by haemolytic
|
||||
x <- gsub("ha?emoly", "haemoly", x)
|
||||
# place minus back in streptococci
|
||||
x <- gsub("(alpha|beta|gamma) haemoly", "\\1-haemolytic", x)
|
||||
x <- gsub("(alpha|beta|gamma) ha?emoly", "\\1-haemoly", x)
|
||||
# remove genus as first word
|
||||
x <- gsub("^Genus ", "", x)
|
||||
|
||||
@ -372,7 +376,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
}
|
||||
if (toupper(x_trimmed[i]) == 'MRPA') {
|
||||
# multi resistant P. aeruginosa
|
||||
x[i] <- microorganismsDT[mo == 'B_PDMNS_AER', ..property][[1]][1L]
|
||||
x[i] <- microorganismsDT[mo == 'B_PSDMN_AER', ..property][[1]][1L]
|
||||
next
|
||||
}
|
||||
if (toupper(x_trimmed[i]) == 'CRS'
|
||||
@ -383,22 +387,22 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
}
|
||||
if (toupper(x_trimmed[i]) %in% c('PISP', 'PRSP', 'VISP', 'VRSP')) {
|
||||
# peni I, peni R, vanco I, vanco R: S. pneumoniae
|
||||
x[i] <- microorganismsDT[mo == 'B_STRPTC_PNE', ..property][[1]][1L]
|
||||
x[i] <- microorganismsDT[mo == 'B_STRPT_PNE', ..property][[1]][1L]
|
||||
next
|
||||
}
|
||||
if (toupper(x_trimmed[i]) %like% '^G[ABCDFGHK]S$') {
|
||||
# Streptococci, like GBS = Group B Streptococci (B_STRPTC_GRB)
|
||||
x[i] <- microorganismsDT[mo == gsub("G([ABCDFGHK])S", "B_STRPTC_GR\\1", x_trimmed[i], ignore.case = TRUE), ..property][[1]][1L]
|
||||
# Streptococci, like GBS = Group B Streptococci (B_STRPT_GRB)
|
||||
x[i] <- microorganismsDT[mo == gsub("G([ABCDFGHK])S", "B_STRPT_GR\\1", x_trimmed[i], ignore.case = TRUE), ..property][[1]][1L]
|
||||
next
|
||||
}
|
||||
if (toupper(x_trimmed[i]) %like% '(streptococc|streptokok).* [ABCDFGHK]$') {
|
||||
# Streptococci in different languages, like "estreptococos grupo B"
|
||||
x[i] <- microorganismsDT[mo == gsub(".*(streptococ|streptokok|estreptococ).* ([ABCDFGHK])$", "B_STRPTC_GR\\2", x_trimmed[i], ignore.case = TRUE), ..property][[1]][1L]
|
||||
x[i] <- microorganismsDT[mo == gsub(".*(streptococ|streptokok|estreptococ).* ([ABCDFGHK])$", "B_STRPT_GR\\2", x_trimmed[i], ignore.case = TRUE), ..property][[1]][1L]
|
||||
next
|
||||
}
|
||||
if (toupper(x_trimmed[i]) %like% 'group [ABCDFGHK] (streptococ|streptokok|estreptococ)') {
|
||||
# Streptococci in different languages, like "Group A Streptococci"
|
||||
x[i] <- microorganismsDT[mo == gsub(".*group ([ABCDFGHK]) (streptococ|streptokok|estreptococ).*", "B_STRPTC_GR\\1", x_trimmed[i], ignore.case = TRUE), ..property][[1]][1L]
|
||||
x[i] <- microorganismsDT[mo == gsub(".*group ([ABCDFGHK]) (streptococ|streptokok|estreptococ).*", "B_STRPT_GR\\1", x_trimmed[i], ignore.case = TRUE), ..property][[1]][1L]
|
||||
next
|
||||
}
|
||||
# CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese) ----
|
||||
@ -490,15 +494,72 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
}
|
||||
}
|
||||
|
||||
# TRY FIRST THOUSAND MOST PREVALENT IN HUMAN INFECTIONS ----
|
||||
found <- microorganisms.prevDT[tolower(fullname) %in% tolower(c(x_backup[i], x_trimmed[i])), ..property][[1]]
|
||||
# FIRST TRY SUPERPREVALENT IN HUMAN INFECTIONS ----
|
||||
found <- microorganisms.superprevDT[tolower(fullname) %in% tolower(c(x_backup[i], x_trimmed[i])), ..property][[1]]
|
||||
# most probable: is exact match in fullname
|
||||
if (length(found) > 0) {
|
||||
x[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
found <- microorganisms.prevDT[tsn == x_trimmed[i], ..property][[1]]
|
||||
# is a valid TSN
|
||||
found <- microorganisms.superprevDT[mo == toupper(x_backup[i]), ..property][[1]]
|
||||
# is a valid mo
|
||||
if (length(found) > 0) {
|
||||
x[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
found <- microorganisms.superprevDT[tolower(fullname) == tolower(x_trimmed_without_group[i]), ..property][[1]]
|
||||
if (length(found) > 0) {
|
||||
x[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
|
||||
# try any match keeping spaces ----
|
||||
found <- microorganisms.superprevDT[fullname %like% x_withspaces_start_end[i], ..property][[1]]
|
||||
if (length(found) > 0 & nchar(x_trimmed[i]) >= 6) {
|
||||
x[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
|
||||
# try any match keeping spaces, not ending with $ ----
|
||||
found <- microorganisms.superprevDT[fullname %like% x_withspaces_start_only[i], ..property][[1]]
|
||||
if (length(found) > 0 & nchar(x_trimmed[i]) >= 6) {
|
||||
x[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
|
||||
# try any match diregarding spaces ----
|
||||
found <- microorganisms.superprevDT[fullname %like% x[i], ..property][[1]]
|
||||
if (length(found) > 0 & nchar(x_trimmed[i]) >= 6) {
|
||||
x[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
|
||||
# try splitting of characters in the middle and then find ID ----
|
||||
# only when text length is 6 or lower
|
||||
# like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus
|
||||
if (nchar(x_trimmed[i]) <= 6) {
|
||||
x_length <- nchar(x_trimmed[i])
|
||||
x[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2),
|
||||
'.* ',
|
||||
x_trimmed[i] %>% substr((x_length / 2) + 1, x_length))
|
||||
found <- microorganisms.superprevDT[fullname %like% paste0('^', x[i]), ..property][[1]]
|
||||
if (length(found) > 0) {
|
||||
x[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
}
|
||||
|
||||
# try fullname without start and stop regex, to also find subspecies ----
|
||||
# like "K. pneu rhino" >> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH
|
||||
found <- microorganisms.superprevDT[fullname %like% x_withspaces_start_only[i], ..property][[1]]
|
||||
if (length(found) > 0) {
|
||||
x[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
|
||||
# TRY PREVALENT IN HUMAN INFECTIONS ----
|
||||
found <- microorganisms.prevDT[tolower(fullname) %in% tolower(c(x_backup[i], x_trimmed[i])), ..property][[1]]
|
||||
# most probable: is exact match in fullname
|
||||
if (length(found) > 0) {
|
||||
x[i] <- found[1L]
|
||||
next
|
||||
@ -559,7 +620,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
next
|
||||
}
|
||||
|
||||
# THEN TRY ALL OTHERS ----
|
||||
# THEN UNPREVALENT IN HUMAN INFECTIONS ----
|
||||
found <- microorganisms.unprevDT[tolower(fullname) == tolower(x_backup[i]), ..property][[1]]
|
||||
# most probable: is exact match in fullname
|
||||
if (length(found) > 0) {
|
||||
@ -572,12 +633,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
x[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
found <- microorganisms.unprevDT[tsn == x_trimmed[i], ..property][[1]]
|
||||
# is a valid TSN
|
||||
if (length(found) > 0) {
|
||||
x[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
found <- microorganisms.unprevDT[mo == toupper(x_backup[i]), ..property][[1]]
|
||||
# is a valid mo
|
||||
if (length(found) > 0) {
|
||||
@ -633,23 +688,23 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
# MISCELLANEOUS ----
|
||||
|
||||
# look for old taxonomic names ----
|
||||
found <- microorganisms.oldDT[tolower(name) == tolower(x_backup[i])
|
||||
| tsn == x_trimmed[i]
|
||||
| name %like% x_withspaces_start_end[i],]
|
||||
found <- microorganisms.oldDT[tolower(fullname) == tolower(x_backup[i])
|
||||
| fullname %like% x_withspaces_start_end[i],]
|
||||
if (NROW(found) > 0) {
|
||||
col_id_new <- found[1, col_id_new]
|
||||
# when property is "ref" (which is the case in mo_ref, mo_authors and mo_year), return the old value, so:
|
||||
# mo_ref("Chlamydia psittaci) = "Page, 1968" (with warning)
|
||||
# mo_ref("Chlamydophila psittaci) = "Everett et al., 1999"
|
||||
if (property == "ref") {
|
||||
x[i] <- found[1, ref]
|
||||
} else {
|
||||
x[i] <- microorganismsDT[tsn == found[1, tsn_new], ..property][[1]]
|
||||
x[i] <- microorganismsDT[col_id == found[1, col_id_new], ..property][[1]]
|
||||
}
|
||||
was_renamed(name_old = found[1, name],
|
||||
name_new = microorganismsDT[tsn == found[1, tsn_new], fullname],
|
||||
was_renamed(name_old = found[1, fullname],
|
||||
name_new = microorganismsDT[col_id == found[1, col_id_new], fullname],
|
||||
ref_old = found[1, ref],
|
||||
ref_new = microorganismsDT[tsn == found[1, tsn_new], ref],
|
||||
mo = microorganismsDT[tsn == found[1, tsn_new], mo])
|
||||
ref_new = microorganismsDT[col_id == found[1, col_id_new], ref],
|
||||
mo = microorganismsDT[col_id == found[1, col_id_new], mo])
|
||||
next
|
||||
}
|
||||
|
||||
@ -673,9 +728,9 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
}
|
||||
|
||||
# (2) look again for old taxonomic names, now for G. species ----
|
||||
found <- microorganisms.oldDT[name %like% c.x_withspaces_start_end
|
||||
| name %like% d.x_withspaces_start_only
|
||||
| name %like% e.x,]
|
||||
found <- microorganisms.oldDT[fullname %like% c.x_withspaces_start_end
|
||||
| fullname %like% d.x_withspaces_start_only
|
||||
| fullname %like% e.x,]
|
||||
if (NROW(found) > 0 & nchar(b.x_trimmed) >= 6) {
|
||||
if (property == "ref") {
|
||||
# when property is "ref" (which is the case in mo_ref, mo_authors and mo_year), return the old value, so:
|
||||
@ -683,15 +738,15 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
# mo_ref("Chlamydophila psittaci) = "Everett et al., 1999"
|
||||
x <- found[1, ref]
|
||||
} else {
|
||||
x <- microorganismsDT[tsn == found[1, tsn_new], ..property][[1]]
|
||||
x <- microorganismsDT[col_id == found[1, col_id_new], ..property][[1]]
|
||||
}
|
||||
was_renamed(name_old = found[1, name],
|
||||
name_new = microorganismsDT[tsn == found[1, tsn_new], fullname],
|
||||
was_renamed(name_old = found[1, fullname],
|
||||
name_new = microorganismsDT[col_id == found[1, col_id_new], fullname],
|
||||
ref_old = found[1, ref],
|
||||
ref_new = microorganismsDT[tsn == found[1, tsn_new], ref],
|
||||
mo = microorganismsDT[tsn == found[1, tsn_new], mo])
|
||||
ref_new = microorganismsDT[col_id == found[1, col_id_new], ref],
|
||||
mo = microorganismsDT[col_id == found[1, col_id_new], mo])
|
||||
uncertainties <<- c(uncertainties,
|
||||
paste0("'", a.x_backup, "' >> ", found[1, name], " (TSN ", found[1, tsn], ")"))
|
||||
paste0("'", a.x_backup, "' >> ", found[1, fullname], " (Catalogue of Life ID ", found[1, col_id], ")"))
|
||||
return(x)
|
||||
}
|
||||
|
||||
@ -835,25 +890,25 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
# Lancefield ----
|
||||
if (Lancefield == TRUE | Lancefield == "all") {
|
||||
# group A - S. pyogenes
|
||||
x[x == microorganismsDT[mo == 'B_STRPTC_PYO', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPTC_GRA', ..property][[1]][1L]
|
||||
x[x == microorganismsDT[mo == 'B_STRPT_PYO', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPT_GRA', ..property][[1]][1L]
|
||||
# group B - S. agalactiae
|
||||
x[x == microorganismsDT[mo == 'B_STRPTC_AGA', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPTC_GRB', ..property][[1]][1L]
|
||||
x[x == microorganismsDT[mo == 'B_STRPT_AGA', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPT_GRB', ..property][[1]][1L]
|
||||
# group C
|
||||
S_groupC <- microorganismsDT %>% filter(genus == "Streptococcus",
|
||||
species %in% c("equisimilis", "equi",
|
||||
"zooepidemicus", "dysgalactiae")) %>%
|
||||
pull(property)
|
||||
x[x %in% S_groupC] <- microorganismsDT[mo == 'B_STRPTC_GRC', ..property][[1]][1L]
|
||||
x[x %in% S_groupC] <- microorganismsDT[mo == 'B_STRPT_GRC', ..property][[1]][1L]
|
||||
if (Lancefield == "all") {
|
||||
# all Enterococci
|
||||
x[x %like% "^(Enterococcus|B_ENTRC)"] <- microorganismsDT[mo == 'B_STRPTC_GRD', ..property][[1]][1L]
|
||||
x[x %like% "^(Enterococcus|B_ENTRC)"] <- microorganismsDT[mo == 'B_STRPT_GRD', ..property][[1]][1L]
|
||||
}
|
||||
# group F - S. anginosus
|
||||
x[x == microorganismsDT[mo == 'B_STRPTC_ANG', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPTC_GRF', ..property][[1]][1L]
|
||||
x[x == microorganismsDT[mo == 'B_STRPT_ANG', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPT_GRF', ..property][[1]][1L]
|
||||
# group H - S. sanguinis
|
||||
x[x == microorganismsDT[mo == 'B_STRPTC_SAN', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPTC_GRH', ..property][[1]][1L]
|
||||
x[x == microorganismsDT[mo == 'B_STRPT_SAN', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPT_GRH', ..property][[1]][1L]
|
||||
# group K - S. salivarius
|
||||
x[x == microorganismsDT[mo == 'B_STRPTC_SAL', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPTC_GRK', ..property][[1]][1L]
|
||||
x[x == microorganismsDT[mo == 'B_STRPT_SAL', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPT_GRK', ..property][[1]][1L]
|
||||
}
|
||||
|
||||
|
||||
@ -875,8 +930,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
|
||||
if (property == "mo") {
|
||||
class(x) <- "mo"
|
||||
} else if (property == "tsn") {
|
||||
x <- as.integer(x)
|
||||
}
|
||||
|
||||
if (length(mo_renamed()) > 0) {
|
||||
|
@ -38,7 +38,7 @@
|
||||
#' @rdname mo_property
|
||||
#' @name mo_property
|
||||
#' @return \itemize{
|
||||
#' \item{An \code{integer} in case of \code{mo_TSN} and \code{mo_year}}
|
||||
#' \item{An \code{integer} in case of \code{mo_year}}
|
||||
#' \item{A \code{list} in case of \code{mo_taxonomy}}
|
||||
#' \item{A \code{character} in all other cases}
|
||||
#' }
|
||||
@ -57,7 +57,6 @@
|
||||
#' mo_genus("E. coli") # "Escherichia"
|
||||
#' mo_species("E. coli") # "coli"
|
||||
#' mo_subspecies("E. coli") # NA
|
||||
#' mo_TSN("E. coli") # 285 (Taxonomic Serial Number)
|
||||
#'
|
||||
#' ## colloquial properties
|
||||
#' mo_fullname("E. coli") # "Escherichia coli"
|
||||
@ -242,17 +241,15 @@ mo_type <- function(x, language = get_locale(), ...) {
|
||||
#' @rdname mo_property
|
||||
#' @export
|
||||
mo_gramstain <- function(x, language = get_locale(), ...) {
|
||||
mo_translate(mo_validate(x = x, property = "gramstain", ...), language = language)
|
||||
}
|
||||
|
||||
#' @rdname mo_property
|
||||
#' @export
|
||||
mo_TSN <- function(x, ...) {
|
||||
res <- mo_validate(x = x, property = "tsn", ...)
|
||||
if (any(is.na(res))) {
|
||||
warning("Some results do not have a TSN, because they are missing from ITIS and were added manually. See ?microorganisms.")
|
||||
}
|
||||
res
|
||||
x.bak <- x
|
||||
x <- mo_phylum(x, ...)
|
||||
x[x %in% c("Actinobacteria",
|
||||
"Chloroflexi",
|
||||
"Firmicutes",
|
||||
"Tenericutes")] <- "Gram positive"
|
||||
x[x != "Gram positive"] <- "Gram negative"
|
||||
x[mo_kingdom(x.bak) != "Bacteria"] <- NA_character_
|
||||
mo_translate(x, language = language)
|
||||
}
|
||||
|
||||
#' @rdname mo_property
|
||||
@ -284,7 +281,6 @@ mo_year <- function(x, ...) {
|
||||
mo_taxonomy <- function(x, ...) {
|
||||
x <- AMR::as.mo(x, ...)
|
||||
base::list(kingdom = mo_kingdom(x),
|
||||
subkingdom = mo_subkingdom(x),
|
||||
phylum = mo_phylum(x),
|
||||
class = mo_class(x),
|
||||
order = mo_order(x),
|
||||
@ -472,11 +468,7 @@ mo_validate <- function(x, property, ...) {
|
||||
|
||||
if (!"AMR" %in% base::.packages()) {
|
||||
library("AMR")
|
||||
# These data.tables are available as data sets when the AMR package is loaded:
|
||||
# microorganismsDT # this one is sorted by kingdom (B<F<P), prevalence, TSN
|
||||
# microorganisms.prevDT # same as microorganismsDT, but with prevalence != 9999
|
||||
# microorganisms.unprevDT # same as microorganismsDT, but with prevalence == 9999
|
||||
# microorganisms.oldDT # old taxonomic names, sorted by name (genus+species), TSN
|
||||
# check onLoad() in R/zzz.R: data tables are created there.
|
||||
}
|
||||
|
||||
if (!all(x %in% microorganismsDT[[property]])
|
||||
@ -486,8 +478,6 @@ mo_validate <- function(x, property, ...) {
|
||||
} else {
|
||||
if (property == "mo") {
|
||||
return(structure(x, class = "mo"))
|
||||
} else if (property == "tsn") {
|
||||
return(as.integer(x))
|
||||
} else {
|
||||
return(x)
|
||||
}
|
||||
|
63
R/zzz.R
63
R/zzz.R
@ -65,6 +65,69 @@
|
||||
#' @rdname AMR
|
||||
NULL
|
||||
|
||||
#' @importFrom dplyr mutate
|
||||
#' @importFrom data.table as.data.table setkey
|
||||
.onLoad <- function(libname, pkgname) {
|
||||
# get new functions not available in older versions of R
|
||||
backports::import(pkgname)
|
||||
|
||||
# register data
|
||||
if (!all(c("microorganismsDT",
|
||||
"microorganisms.prevDT",
|
||||
"microorganisms.unprevDT",
|
||||
"microorganisms.oldDT") %in% ls(envir = asNamespace("AMR")))) {
|
||||
|
||||
# packageStartupMessage("Loading taxonomic database...", appendLF = FALSE)
|
||||
|
||||
microorganismsDT <- AMR::microorganisms %>%
|
||||
mutate(prevalent = ifelse(phylum %in% c("Proteobacteria",
|
||||
"Firmicutes",
|
||||
"Actinobacteria",
|
||||
"Bacteroidetes")
|
||||
| genus %in% c("Candida",
|
||||
"Aspergillus",
|
||||
"Trichophyton",
|
||||
"Giardia",
|
||||
"Dientamoeba",
|
||||
"Entamoeba"),
|
||||
0,
|
||||
1),
|
||||
superprevalent = ifelse(
|
||||
# most important Gram negatives
|
||||
class == "Gammaproteobacteria"
|
||||
# Streptococci and Staphylococci
|
||||
| order %in% c("Lactobacillales",
|
||||
"Bacillales"),
|
||||
0,
|
||||
1)) %>%
|
||||
as.data.table()
|
||||
setkey(microorganismsDT, kingdom, superprevalent, prevalent, fullname)
|
||||
microorganisms.superprevDT <- microorganismsDT[superprevalent == 0,]
|
||||
microorganisms.prevDT <- microorganismsDT[superprevalent == 1 & prevalent == 0,]
|
||||
microorganisms.unprevDT <- microorganismsDT[superprevalent == 1 & prevalent == 1,]
|
||||
microorganisms.oldDT <- as.data.table(AMR::microorganisms.old)
|
||||
setkey(microorganisms.oldDT, col_id, fullname)
|
||||
|
||||
assign(x = "microorganismsDT",
|
||||
value = microorganismsDT,
|
||||
envir = asNamespace("AMR"))
|
||||
|
||||
assign(x = "microorganisms.superprevDT",
|
||||
value = microorganisms.superprevDT,
|
||||
envir = asNamespace("AMR"))
|
||||
|
||||
assign(x = "microorganisms.prevDT",
|
||||
value = microorganisms.prevDT,
|
||||
envir = asNamespace("AMR"))
|
||||
|
||||
assign(x = "microorganisms.unprevDT",
|
||||
value = microorganisms.unprevDT,
|
||||
envir = asNamespace("AMR"))
|
||||
|
||||
assign(x = "microorganisms.oldDT",
|
||||
value = microorganisms.oldDT,
|
||||
envir = asNamespace("AMR"))
|
||||
|
||||
# packageStartupMessage("OK.", appendLF = TRUE)
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user