diff --git a/.travis.yml b/.travis.yml index 71ceb601..4acc29fb 100755 --- a/.travis.yml +++ b/.travis.yml @@ -43,7 +43,7 @@ before_install: - if [ $TRAVIS_OS_NAME = osx ]; then brew install libgit2; fi install: - - Rscript -e "install.packages(c('devtools', 'backports', 'clipr', 'curl', 'data.table', 'dplyr', 'hms', 'knitr', 'readr', 'rlang', 'rvest', 'xml2'))" + - Rscript -e "install.packages(c('devtools', 'backports', 'clipr', 'curl', 'data.table', 'dplyr', 'hms', 'knitr', 'readr', 'rlang', 'rvest', 'xml2', 'covr', 'ggplot2', 'rmarkdown', 'testthat', 'tidyr'))" - if [ $TRAVIS_OS_NAME = osx ]; then Rscript -e "devtools::install_github('r-lib/rlang')"; fi # postrun diff --git a/DESCRIPTION b/DESCRIPTION index b28fc9b5..1f96b3bc 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR Version: 0.3.0.9009 -Date: 2018-09-24 +Date: 2018-09-27 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/R/data.R b/R/data.R index bb529c12..a098d626 100755 --- a/R/data.R +++ b/R/data.R @@ -148,9 +148,9 @@ #' Data set with old taxonomic data from ITIS #' -#' A data set containing old, previously valid, taxonomic names. This data set is used internally by \code{\link{as.mo}}. +#' A data set containing old (previously valid or accepted) taxonomic names according to ITIS. This data set is used internally by \code{\link{as.mo}}. #' @inheritSection as.mo ITIS -#' @format A \code{\link{data.frame}} with 1,682 observations and 5 variables: +#' @format A \code{\link{data.frame}} with 2,384 observations and 5 variables: #' \describe{ #' \item{\code{tsn}}{Old Taxonomic Serial Number (TSN), as defined by ITIS} #' \item{\code{name}}{Old taxonomic name of the microorganism as found in ITIS, see Source} @@ -168,10 +168,21 @@ #' @format A \code{\link{tibble}} with 1,095 observations and 2 variables: #' \describe{ #' \item{\code{umcg}}{Code of microorganism according to UMCG MMB} +#' \item{\code{certe}}{Code of microorganism according to Certe MMB} +#' } +#' @seealso \code{\link{as.mo}} \code{\link{microorganisms.certe}} \code{\link{microorganisms}} +"microorganisms.umcg" + +#' Translation table for Certe +#' +#' A data set containing all bacteria codes of Certe MMB. These codes can be joined to data with an ID from \code{\link{microorganisms}$mo} (using \code{\link{left_join_microorganisms}}). GLIMS codes can also be translated to valid \code{MO}s with \code{\link{guess_mo}}. +#' @format A \code{\link{tibble}} with 2,664 observations and 2 variables: +#' \describe{ +#' \item{\code{certe}}{Code of microorganism according to Certe MMB} #' \item{\code{mo}}{Code of microorganism in \code{\link{microorganisms}}} #' } #' @seealso \code{\link{as.mo}} \code{\link{microorganisms}} -"microorganisms.umcg" +"microorganisms.certe" #' Data set with 2000 blood culture isolates of septic patients #' diff --git a/R/globals.R b/R/globals.R index 9f19c1f8..9b0a608b 100755 --- a/R/globals.R +++ b/R/globals.R @@ -57,6 +57,7 @@ globalVariables(c(".", "real_first_isolate", "S", "septic_patients", + "shortname", "species", "tsn", "tsn_new", diff --git a/R/mo.R b/R/mo.R index 1cd4797e..30af6118 100644 --- a/R/mo.R +++ b/R/mo.R @@ -60,7 +60,7 @@ #' \code{guess_mo} is an alias of \code{as.mo}. #' @section ITIS: #' \if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr} -#' This \code{AMR} package contains the \strong{complete microbial taxonomic data} (with seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3]. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. +#' This \code{AMR} package contains the \strong{complete microbial taxonomic data} (with seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, \url{https://www.itis.gov}). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3]. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. # (source as section, so it can be inherited by mo_property:) #' @section Source: #' [1] Becker K \emph{et al.} \strong{Coagulase-Negative Staphylococci}. 2014. Clin Microbiol Rev. 27(4): 870–926. \url{https://dx.doi.org/10.1128/CMR.00109-13} @@ -69,8 +69,6 @@ #' #' [3] Integrated Taxonomic Information System (ITIS). Retrieved September 2018. \url{http://www.itis.gov} #' @export -#' @importFrom dplyr %>% pull left_join -#' @importFrom data.table as.data.table setkey #' @return Character (vector) with class \code{"mo"}. Unknown values will return \code{NA}. #' @seealso \code{\link{microorganisms}} for the \code{data.frame} with ITIS content that is being used to determine ID's. \cr #' The \code{\link{mo_property}} functions (like \code{\link{mo_genus}}, \code{\link{mo_gramstain}}) to get properties based on the returned code. @@ -126,7 +124,25 @@ #' mutate(mo = guess_mo(paste(genus, species))) #' } as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE) { + exec_as.mo(x = x, Becker = Becker, Lancefield = Lancefield, + allow_uncertain = allow_uncertain, property = "mo") +} +#' @rdname as.mo +#' @export +is.mo <- function(x) { + # bactid for older releases + # remove when is.bactid will be removed + identical(class(x), "mo") | identical(class(x), "bactid") +} + +#' @rdname as.mo +#' @export +guess_mo <- as.mo + +#' @importFrom dplyr %>% pull left_join +#' @importFrom data.table as.data.table setkey +exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE, property = "mo") { if (NCOL(x) == 2) { # support tidyverse selection like: df %>% select(colA, colB) # paste these columns together @@ -147,374 +163,362 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE } } - MOs <- as.data.table(AMR::microorganisms) - setkey(MOs, prevalence, tsn) - MOs_mostprevalent <- MOs[prevalence != 9999,] - MOs_allothers <- NULL # will be set later, if needed - MOs_old <- NULL # will be set later, if needed - - - if (all(unique(x) %in% MOs[,mo])) { - class(x) <- "mo" - attr(x, 'package') <- 'AMR' - attr(x, 'ITIS') <- TRUE - return(x) - } - if (AMR::is.mo(x) & isTRUE(attributes(x)$ITIS)) { - # check for new mo class, data coming from ITIS - return(x) - } - - failures <- character(0) x_input <- x - # only check the uniques, which is way faster x <- unique(x) - x_backup <- trimws(x, which = "both") - x_species <- paste(x_backup, "species") - # translate to English for supported languages of mo_property - x <- gsub("(Gruppe|gruppe|groep|grupo|gruppo|groupe)", "group", x) - # remove 'empty' genus and species values - x <- gsub("(no MO)", "", x, fixed = TRUE) - # remove dots and other non-text in case of "E. coli" except spaces - x <- gsub("[^a-zA-Z0-9/ \\-]+", "", x) - # but spaces before and after should be omitted - x <- trimws(x, which = "both") - x_trimmed <- x - x_trimmed_species <- paste(x_trimmed, "species") - # replace space by regex sign - x_withspaces <- gsub(" ", ".* ", x, fixed = TRUE) - x <- gsub(" ", ".*", x, fixed = TRUE) - # add start en stop regex - x <- paste0('^', x, '$') - x_withspaces_all <- x_withspaces - x_withspaces_start <- paste0('^', x_withspaces) - x_withspaces <- paste0('^', x_withspaces, '$') + MOs <- NULL # will be set later, if needed + MOs_mostprevalent <- NULL # will be set later, if needed + MOs_allothers <- NULL # will be set later, if needed + MOs_old <- NULL # will be set later, if needed - # cat(paste0('x "', x, '"\n')) - # cat(paste0('x_species "', x_species, '"\n')) - # cat(paste0('x_withspaces_all "', x_withspaces_all, '"\n')) - # cat(paste0('x_withspaces_start "', x_withspaces_start, '"\n')) - # cat(paste0('x_withspaces "', x_withspaces, '"\n')) - # cat(paste0('x_backup "', x_backup, '"\n')) - # cat(paste0('x_trimmed "', x_trimmed, '"\n')) - # cat(paste0('x_trimmed_species "', x_trimmed_species, '"\n')) + if (all(x %in% AMR::microorganisms[, property])) { + # already existing mo + } else if (all(x %in% AMR::microorganisms.certe[, "certe"])) { + # old Certe codes + suppressWarnings( + x <- data.frame(certe = x, stringsAsFactors = FALSE) %>% + left_join(AMR::microorganisms.certe, by = "certe") %>% + left_join(AMR::microorganisms, by = "mo") %>% + pull(property) + ) + } else if (all(x %in% AMR::microorganisms.umcg[, "umcg"])) { + # old UMCG codes + suppressWarnings( + x <- data.frame(umcg = x, stringsAsFactors = FALSE) %>% + left_join(AMR::microorganisms.umcg, by = "umcg") %>% + left_join(AMR::microorganisms.certe, by = "certe") %>% + left_join(AMR::microorganisms, by = "mo") %>% + pull(property) + ) + } else { - for (i in 1:length(x)) { - if (identical(x_trimmed[i], "") | is.na(x_trimmed[i])) { - # empty values - x[i] <- NA - next - } + MOs <- as.data.table(AMR::microorganisms) + setkey(MOs, prevalence, tsn) + MOs_mostprevalent <- MOs[prevalence != 9999,] - # translate known trivial abbreviations to genus + species ---- - if (!is.na(x_trimmed[i])) { - if (toupper(x_trimmed[i]) == 'MRSA' - | toupper(x_trimmed[i]) == 'VISA' - | toupper(x_trimmed[i]) == 'VRSA') { - x[i] <- 'B_STPHY_AUR' - next - } - if (toupper(x_trimmed[i]) == 'MRSE') { - x[i] <- 'B_STPHY_EPI' - next - } - if (toupper(x_trimmed[i]) == 'VRE') { - x[i] <- 'B_ENTRC' - next - } - if (toupper(x_trimmed[i]) == 'MRPA') { - # multi resistant P. aeruginosa - x[i] <- 'B_PDMNS_AER' - next - } - if (toupper(x_trimmed[i]) %in% c('PISP', 'PRSP', 'VISP', 'VRSP')) { - # peni I, peni R, vanco I, vanco R: S. pneumoniae - x[i] <- 'B_STRPTC_PNE' - next - } - if (toupper(x_trimmed[i]) %like% '^G[ABCDFGHK]S$') { - x[i] <- gsub("G([ABCDFGHK])S", "B_STRPTC_GR\\1", x_trimmed[i]) - next - } - # CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese) ---- - if (tolower(x[i]) %like% '[ck]oagulas[ea] negatie?[vf]' - | tolower(x_trimmed[i]) %like% '[ck]oagulas[ea] negatie?[vf]' - | tolower(x[i]) %like% '[ck]o?ns[^a-z]?$') { - # coerce S. coagulase negative - x[i] <- 'B_STPHY_CNS' - next - } - if (tolower(x[i]) %like% '[ck]oagulas[ea] positie?[vf]' - | tolower(x_trimmed[i]) %like% '[ck]oagulas[ea] positie?[vf]' - | tolower(x[i]) %like% '[ck]o?ps[^a-z]?$') { - # coerce S. coagulase positive - x[i] <- 'B_STPHY_CPS' - next - } - } + x_backup <- trimws(x, which = "both") + x_species <- paste(x_backup, "species") + # translate to English for supported languages of mo_property + x <- gsub("(Gruppe|gruppe|groep|grupo|gruppo|groupe)", "group", x) + # remove 'empty' genus and species values + x <- gsub("(no MO)", "", x, fixed = TRUE) + # remove dots and other non-text in case of "E. coli" except spaces + x <- gsub("[^a-zA-Z0-9/ \\-]+", "", x) + # but spaces before and after should be omitted + x <- trimws(x, which = "both") + x_trimmed <- x + x_trimmed_species <- paste(x_trimmed, "species") + # replace space by regex sign + x_withspaces <- gsub(" ", ".* ", x, fixed = TRUE) + x <- gsub(" ", ".*", x, fixed = TRUE) + # add start en stop regex + x <- paste0('^', x, '$') + x_withspaces_all <- x_withspaces + x_withspaces_start <- paste0('^', x_withspaces) + x_withspaces <- paste0('^', x_withspaces, '$') - # FIRST TRY FULLNAMES AND CODES - # if only genus is available, don't select species - if (all(!c(x[i], x_trimmed[i]) %like% " ")) { - found <- MOs[tolower(fullname) %in% tolower(c(x_species[i], x_trimmed_species[i])), mo] - if (length(found) > 0) { - x[i] <- found[1L] + # cat(paste0('x "', x, '"\n')) + # cat(paste0('x_species "', x_species, '"\n')) + # cat(paste0('x_withspaces_all "', x_withspaces_all, '"\n')) + # cat(paste0('x_withspaces_start "', x_withspaces_start, '"\n')) + # cat(paste0('x_withspaces "', x_withspaces, '"\n')) + # cat(paste0('x_backup "', x_backup, '"\n')) + # cat(paste0('x_trimmed "', x_trimmed, '"\n')) + # cat(paste0('x_trimmed_species "', x_trimmed_species, '"\n')) + + for (i in 1:length(x)) { + if (identical(x_trimmed[i], "") | is.na(x_trimmed[i])) { + # empty values + x[i] <- NA next } - if (nchar(x_trimmed[i]) > 4) { - # not when abbr is esco, stau, klpn, etc. - found <- MOs[tolower(fullname) %like% gsub(" ", ".*", x_trimmed_species[i], fixed = TRUE), mo] + + # translate known trivial abbreviations to genus + species ---- + if (!is.na(x_trimmed[i])) { + if (toupper(x_trimmed[i]) == 'MRSA' + | toupper(x_trimmed[i]) == 'VISA' + | toupper(x_trimmed[i]) == 'VRSA') { + x[i] <- MOs[mo == 'B_STPHY_AUR', ..property][[1]][1L] + next + } + if (toupper(x_trimmed[i]) == 'MRSE') { + x[i] <- MOs[mo == 'B_STPHY_EPI', ..property][[1]][1L] + next + } + if (toupper(x_trimmed[i]) == 'VRE') { + x[i] <- MOs[mo == 'B_ENTRC', ..property][[1]][1L] + next + } + if (toupper(x_trimmed[i]) == 'MRPA') { + # multi resistant P. aeruginosa + x[i] <- MOs[mo == 'B_PDMNS_AER', ..property][[1]][1L] + next + } + if (toupper(x_trimmed[i]) %in% c('PISP', 'PRSP', 'VISP', 'VRSP')) { + # peni I, peni R, vanco I, vanco R: S. pneumoniae + x[i] <- MOs[mo == 'B_STRPTC_PNE', ..property][[1]][1L] + next + } + if (toupper(x_trimmed[i]) %like% '^G[ABCDFGHK]S$') { + x[i] <- MOs[mo == gsub("G([ABCDFGHK])S", "B_STRPTC_GR\\1", x_trimmed[i]), ..property][[1]][1L] + next + } + # CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese) ---- + if (tolower(x[i]) %like% '[ck]oagulas[ea] negatie?[vf]' + | tolower(x_trimmed[i]) %like% '[ck]oagulas[ea] negatie?[vf]' + | tolower(x[i]) %like% '[ck]o?ns[^a-z]?$') { + # coerce S. coagulase negative + x[i] <- MOs[mo == 'B_STPHY_CNS', ..property][[1]][1L] + next + } + if (tolower(x[i]) %like% '[ck]oagulas[ea] positie?[vf]' + | tolower(x_trimmed[i]) %like% '[ck]oagulas[ea] positie?[vf]' + | tolower(x[i]) %like% '[ck]o?ps[^a-z]?$') { + # coerce S. coagulase positive + x[i] <- MOs[mo == 'B_STPHY_CPS', ..property][[1]][1L] + next + } + } + + # FIRST TRY FULLNAMES AND CODES + # if only genus is available, don't select species + if (all(!c(x[i], x_trimmed[i]) %like% " ")) { + found <- MOs[tolower(fullname) %in% tolower(c(x_species[i], x_trimmed_species[i])), ..property][[1]] if (length(found) > 0) { x[i] <- found[1L] next } + if (nchar(x_trimmed[i]) > 4) { + # not when abbr is esco, stau, klpn, etc. + found <- MOs[tolower(fullname) %like% gsub(" ", ".*", x_trimmed_species[i], fixed = TRUE), ..property][[1]] + if (length(found) > 0) { + x[i] <- found[1L] + next + } + } } - } - # search for GLIMS code ---- - found <- AMR::microorganisms.umcg[which(toupper(AMR::microorganisms.umcg$umcg) == toupper(x_trimmed[i])),]$mo - if (length(found) > 0) { - x[i] <- MOs[mo.old == found, mo][1L] - next - } + # TRY FIRST THOUSAND MOST PREVALENT IN HUMAN INFECTIONS ---- - # TRY FIRST THOUSAND MOST PREVALENT IN HUMAN INFECTIONS ---- + found <- MOs_mostprevalent[tolower(fullname) %in% tolower(c(x_backup[i], x_trimmed[i])), ..property][[1]] + # most probable: is exact match in fullname + if (length(found) > 0) { + x[i] <- found[1L] + next + } + found <- MOs_mostprevalent[tsn == x_trimmed[i], ..property][[1]] + # is a valid TSN + if (length(found) > 0) { + x[i] <- found[1L] + next + } + found <- MOs_mostprevalent[mo == toupper(x_backup[i]), ..property][[1]] + # is a valid mo + if (length(found) > 0) { + x[i] <- found[1L] + next + } - found <- MOs_mostprevalent[tolower(fullname) %in% tolower(c(x_backup[i], x_trimmed[i])), mo] - # most probable: is exact match in fullname - if (length(found) > 0) { - x[i] <- found[1L] - next - } - found <- MOs_mostprevalent[tsn == x_trimmed[i], mo] - # is a valid TSN - if (length(found) > 0) { - x[i] <- found[1L] - next - } - found <- MOs_mostprevalent[mo == toupper(x_backup[i]), mo] - # is a valid mo - if (length(found) > 0) { - x[i] <- found[1L] - next - } - found <- MOs_mostprevalent[mo.old == toupper(x_backup[i]) - | (substr(x_backup[i], 4, 6) == "SPP" & mo.old == substr(x_backup[i], 1, 3)) - | mo.old == substr(x_backup[i], 1, 3), mo] - # is a valid old mo - if (length(found) > 0) { - x[i] <- found[1L] - next - } + # try any match keeping spaces ---- + found <- MOs_mostprevalent[fullname %like% x_withspaces[i], ..property][[1]] + if (length(found) > 0) { + x[i] <- found[1L] + next + } - # try any match keeping spaces ---- - found <- MOs_mostprevalent[fullname %like% x_withspaces[i], mo] - if (length(found) > 0) { - x[i] <- found[1L] - next - } + # try any match keeping spaces, not ending with $ ---- + found <- MOs_mostprevalent[fullname %like% x_withspaces_start[i], ..property][[1]] + if (length(found) > 0) { + x[i] <- found[1L] + next + } - # try any match keeping spaces, not ending with $ ---- - found <- MOs_mostprevalent[fullname %like% x_withspaces_start[i], mo] - if (length(found) > 0) { - x[i] <- found[1L] - next - } + # try any match diregarding spaces ---- + found <- MOs_mostprevalent[fullname %like% x[i], ..property][[1]] + if (length(found) > 0) { + x[i] <- found[1L] + next + } - # try any match diregarding spaces ---- - found <- MOs_mostprevalent[fullname %like% x[i], mo] - if (length(found) > 0) { - x[i] <- found[1L] - next - } + # try fullname without start and stop regex, to also find subspecies ---- + # like "K. pneu rhino" -> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH + found <- MOs_mostprevalent[fullname %like% x_withspaces_start[i], ..property][[1]] + if (length(found) > 0) { + x[i] <- found[1L] + next + } - # try fullname without start and stop regex, to also find subspecies ---- - # like "K. pneu rhino" -> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH - found <- MOs_mostprevalent[fullname %like% x_withspaces_start[i], mo] - if (length(found) > 0) { - x[i] <- found[1L] - next - } + # try splitting of characters and then find ID ---- + # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus + x_split <- x + x_length <- nchar(x_trimmed[i]) + x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(), + '.* ', + x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws()) + found <- MOs_mostprevalent[fullname %like% paste0('^', x_split[i]), ..property][[1]] + if (length(found) > 0) { + x[i] <- found[1L] + next + } - # try splitting of characters and then find ID ---- - # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus - x_split <- x - x_length <- nchar(x_trimmed[i]) - x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(), - '.* ', - x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws()) - found <- MOs_mostprevalent[fullname %like% paste0('^', x_split[i]), mo] - if (length(found) > 0) { - x[i] <- found[1L] - next - } + # try any match with text before and after original search string ---- + # so "negative rods" will be "GNR" + # if (x_trimmed[i] %like% "^Gram") { + # x_trimmed[i] <- gsub("^Gram", "", x_trimmed[i], ignore.case = TRUE) + # # remove leading and trailing spaces again + # x_trimmed[i] <- trimws(x_trimmed[i], which = "both") + # } + # if (!is.na(x_trimmed[i])) { + # found <- MOs_mostprevalent[fullname %like% x_trimmed[i], ..property][[1]] + # if (length(found) > 0) { + # x[i] <- found[1L] + # next + # } + # } - # try any match with text before and after original search string ---- - # so "negative rods" will be "GNR" - # if (x_trimmed[i] %like% "^Gram") { - # x_trimmed[i] <- gsub("^Gram", "", x_trimmed[i], ignore.case = TRUE) - # # remove leading and trailing spaces again - # x_trimmed[i] <- trimws(x_trimmed[i], which = "both") - # } - # if (!is.na(x_trimmed[i])) { - # found <- MOs_mostprevalent[fullname %like% x_trimmed[i], mo] - # if (length(found) > 0) { - # x[i] <- found[1L] - # next - # } - # } + # THEN TRY ALL OTHERS ---- + if (is.null(MOs_allothers)) { + MOs_allothers <- MOs[prevalence == 9999,] + } - # THEN TRY ALL OTHERS ---- - if (is.null(MOs_allothers)) { - MOs_allothers <- MOs[prevalence == 9999,] - } + found <- MOs_allothers[tolower(fullname) == tolower(x_backup[i]), ..property][[1]] + # most probable: is exact match in fullname + if (length(found) > 0) { + x[i] <- found[1L] + next + } + found <- MOs_allothers[tolower(fullname) == tolower(x_trimmed[i]), ..property][[1]] + # most probable: is exact match in fullname + if (length(found) > 0) { + x[i] <- found[1L] + next + } + found <- MOs_allothers[tsn == x_trimmed[i], ..property][[1]] + # is a valid TSN + if (length(found) > 0) { + x[i] <- found[1L] + next + } + found <- MOs_allothers[mo == toupper(x_backup[i]), ..property][[1]] + # is a valid mo + if (length(found) > 0) { + x[i] <- found[1L] + next + } - found <- MOs_allothers[tolower(fullname) == tolower(x_backup[i]), mo] - # most probable: is exact match in fullname - if (length(found) > 0) { - x[i] <- found[1L] - next - } - found <- MOs_allothers[tolower(fullname) == tolower(x_trimmed[i]), mo] - # most probable: is exact match in fullname - if (length(found) > 0) { - x[i] <- found[1L] - next - } - found <- MOs_allothers[tsn == x_trimmed[i], mo] - # is a valid TSN - if (length(found) > 0) { - x[i] <- found[1L] - next - } - found <- MOs_allothers[mo == toupper(x_backup[i]), mo] - # is a valid mo - if (length(found) > 0) { - x[i] <- found[1L] - next - } - found <- MOs_allothers[mo.old == toupper(x_backup[i]) - | (substr(x_backup[i], 4, 6) == "SPP" & mo.old == substr(x_backup[i], 1, 3)) - | mo.old == substr(x_backup[i], 1, 3), mo] - # is a valid old mo - if (length(found) > 0) { - x[i] <- found[1L] - next - } + # try any match keeping spaces ---- + found <- MOs_allothers[fullname %like% x_withspaces[i], ..property][[1]] + if (length(found) > 0) { + x[i] <- found[1L] + next + } - # try any match keeping spaces ---- - found <- MOs_allothers[fullname %like% x_withspaces[i], mo] - if (length(found) > 0) { - x[i] <- found[1L] - next - } + # try any match keeping spaces, not ending with $ ---- + found <- MOs_allothers[fullname %like% x_withspaces_start[i], ..property][[1]] + if (length(found) > 0) { + x[i] <- found[1L] + next + } - # try any match keeping spaces, not ending with $ ---- - found <- MOs_allothers[fullname %like% x_withspaces_start[i], mo] - if (length(found) > 0) { - x[i] <- found[1L] - next - } + # try any match diregarding spaces ---- + found <- MOs_allothers[fullname %like% x[i], ..property][[1]] + if (length(found) > 0) { + x[i] <- found[1L] + next + } - # try any match diregarding spaces ---- - found <- MOs_allothers[fullname %like% x[i], mo] - if (length(found) > 0) { - x[i] <- found[1L] - next - } + # try fullname without start and stop regex, to also find subspecies ---- + # like "K. pneu rhino" -> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH + found <- MOs_allothers[fullname %like% x_withspaces_start[i], ..property][[1]] + if (length(found) > 0) { + x[i] <- found[1L] + next + } - # try fullname without start and stop regex, to also find subspecies ---- - # like "K. pneu rhino" -> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH - found <- MOs_allothers[fullname %like% x_withspaces_start[i], mo] - if (length(found) > 0) { - x[i] <- found[1L] - next - } + # try splitting of characters and then find ID ---- + # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus + x_split <- x + x_length <- nchar(x_trimmed[i]) + x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(), + '.* ', + x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws()) + found <- MOs_allothers[fullname %like% paste0('^', x_split[i]), ..property][[1]] + if (length(found) > 0) { + x[i] <- found[1L] + next + } - # try splitting of characters and then find ID ---- - # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus - x_split <- x - x_length <- nchar(x_trimmed[i]) - x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(), - '.* ', - x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws()) - found <- MOs_allothers[fullname %like% paste0('^', x_split[i]), mo] - if (length(found) > 0) { - x[i] <- found[1L] - next - } + # # try any match with text before and after original search string ---- + # # so "negative rods" will be "GNR" + # if (x_trimmed[i] %like% "^Gram") { + # x_trimmed[i] <- gsub("^Gram", "", x_trimmed[i], ignore.case = TRUE) + # # remove leading and trailing spaces again + # x_trimmed[i] <- trimws(x_trimmed[i], which = "both") + # } + # if (!is.na(x_trimmed[i])) { + # found <- MOs_allothers[fullname %like% x_trimmed[i], ..property][[1]] + # if (length(found) > 0) { + # x[i] <- found[1L] + # next + # } + # } - # # try any match with text before and after original search string ---- - # # so "negative rods" will be "GNR" - # if (x_trimmed[i] %like% "^Gram") { - # x_trimmed[i] <- gsub("^Gram", "", x_trimmed[i], ignore.case = TRUE) - # # remove leading and trailing spaces again - # x_trimmed[i] <- trimws(x_trimmed[i], which = "both") - # } - # if (!is.na(x_trimmed[i])) { - # found <- MOs_allothers[fullname %like% x_trimmed[i], mo] - # if (length(found) > 0) { - # x[i] <- found[1L] - # next - # } - # } + # MISCELLANEOUS ---- - # MISCELLANEOUS ---- - - # look for old taxonomic names ---- - if (is.null(MOs_old)) { - MOs_old <- as.data.table(AMR::microorganisms.old) - setkey(MOs_old, name, tsn_new) - } - found <- MOs_old[tolower(name) == tolower(x_backup[i]) | - tsn == x_trimmed[i],] - if (NROW(found) > 0) { - x[i] <- MOs[tsn == found[1, tsn_new], mo] - renamed_note(name_old = found[1, name], - name_new = MOs[tsn == found[1, tsn_new], fullname], - authors = found[1, authors], - year = found[1, year]) - next - } - - # check for uncertain results ---- - if (allow_uncertain == TRUE) { - # (1) look again for old taxonomic names, now for G. species ---- - found <- MOs_old[name %like% x_withspaces[i] - | name %like% x_withspaces_start[i] - | name %like% x[i],] + # look for old taxonomic names ---- + if (is.null(MOs_old)) { + MOs_old <- as.data.table(AMR::microorganisms.old) + setkey(MOs_old, name, tsn_new) + } + found <- MOs_old[tolower(name) == tolower(x_backup[i]) | + tsn == x_trimmed[i],] if (NROW(found) > 0) { - x[i] <- MOs[tsn == found[1, tsn_new], mo] - warning("Uncertain interpretation: '", - x_backup[i], "' -> '", found[1, name], "'", - call. = FALSE, immediate. = TRUE) + x[i] <- MOs[tsn == found[1, tsn_new], ..property][[1]] renamed_note(name_old = found[1, name], name_new = MOs[tsn == found[1, tsn_new], fullname], authors = found[1, authors], year = found[1, year]) - next } - # (2) try to strip off one element and check the remains - x_strip <- x_backup[i] %>% strsplit(" ") %>% unlist() - x_strip <- x_strip[1:length(x_strip) - 1] - x[i] <- suppressWarnings(suppressMessages(as.mo(x_strip))) - if (!is.na(x[i])) { - warning("Uncertain interpretation: '", - x_backup[i], "' -> '", MOs[mo == x[i], fullname], "' (", x[i], ")", - call. = FALSE, immediate. = TRUE) - next + # check for uncertain results ---- + if (allow_uncertain == TRUE) { + # (1) look again for old taxonomic names, now for G. species ---- + found <- MOs_old[name %like% x_withspaces[i] + | name %like% x_withspaces_start[i] + | name %like% x[i],] + if (NROW(found) > 0) { + x[i] <- MOs[tsn == found[1, tsn_new], ..property][[1]] + warning("Uncertain interpretation: '", + x_backup[i], "' -> '", found[1, name], "'", + call. = FALSE, immediate. = TRUE) + renamed_note(name_old = found[1, name], + name_new = MOs[tsn == found[1, tsn_new], fullname], + authors = found[1, authors], + year = found[1, year]) + + next + } + + # (2) try to strip off one element and check the remains + x_strip <- x_backup[i] %>% strsplit(" ") %>% unlist() + x_strip <- x_strip[1:length(x_strip) - 1] + x[i] <- suppressWarnings(suppressMessages(as.mo(x_strip))) + if (!is.na(x[i])) { + warning("Uncertain interpretation: '", + x_backup[i], "' -> '", MOs[mo == x[i], fullname], "' (", x[i], ")", + call. = FALSE, immediate. = TRUE) + next + } } + + # not found ---- + x[i] <- NA_character_ + failures <- c(failures, x_backup[i]) + } - - # not found ---- - x[i] <- NA_character_ - failures <- c(failures, x_backup[i]) - } failures <- failures[!failures %in% c(NA, NULL, NaN)] @@ -529,6 +533,10 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE if (Becker == TRUE | Becker == "all") { # See Source. It's this figure: # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4187637/figure/F3/ + if (is.null(MOs)) { + MOs <- as.data.table(AMR::microorganisms) + setkey(MOs, prevalence, tsn) + } MOs_staph <- MOs[genus == "Staphylococcus"] setkey(MOs_staph, species) CoNS <- MOs_staph[species %in% c("arlettae", "auricularis", "capitis", @@ -541,65 +549,66 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE "pettenkoferi", "piscifermentans", "rostri", "saccharolyticus", "saprophyticus", "sciuri", "stepanovicii", "simulans", "succinus", - "vitulinus", "warneri", "xylosus"), mo] + "vitulinus", "warneri", "xylosus"), ..property][[1]] CoPS <- MOs_staph[species %in% c("simiae", "agnetis", "chromogenes", "delphini", "felis", "lutrae", "hyicus", "intermedius", "pseudintermedius", "pseudointermedius", - "schleiferi"), mo] - x[x %in% CoNS] <- "B_STPHY_CNS" - x[x %in% CoPS] <- "B_STPHY_CPS" + "schleiferi"), ..property][[1]] + x[x %in% CoNS] <- MOs[mo == 'B_STPHY_CNS', ..property][[1]][1L] + x[x %in% CoPS] <- MOs[mo == 'B_STPHY_CPS', ..property][[1]][1L] if (Becker == "all") { - x[x == "B_STPHY_AUR"] <- "B_STPHY_CPS" + x[x == MOs[mo == 'B_STPHY_AUR', ..property][[1]][1L]] <- MOs[mo == 'B_STPHY_CPS', ..property][[1]][1L] } } # Lancefield ---- if (Lancefield == TRUE | Lancefield == "all") { - # group A - x[x == "B_STRPTC_PYO"] <- "B_STRPTC_GRA" # S. pyogenes - # group B - x[x == "B_STRPTC_AGA"] <- "B_STRPTC_GRB" # S. agalactiae + if (is.null(MOs)) { + MOs <- as.data.table(AMR::microorganisms) + setkey(MOs, prevalence, tsn) + } + # group A - S. pyogenes + x[x == MOs[mo == 'B_STRPTC_PYO', ..property][[1]][1L]] <- MOs[mo == 'B_STRPTC_GRA', ..property][[1]][1L] + # group B - S. agalactiae + x[x == MOs[mo == 'B_STRPTC_AGA', ..property][[1]][1L]] <- MOs[mo == 'B_STRPTC_GRB', ..property][[1]][1L] # group C S_groupC <- MOs %>% filter(genus == "Streptococcus", species %in% c("equisimilis", "equi", "zooepidemicus", "dysgalactiae")) %>% - pull(mo) - x[x %in% S_groupC] <- "B_STRPTC_GRC" # S. agalactiae + pull(property) + x[x %in% S_groupC] <- MOs[mo == 'B_STRPTC_GRC', ..property][[1]][1L] if (Lancefield == "all") { - x[substr(x, 1, 7) == "B_ENTRC"] <- "B_STRPTC_GRD" # all Enterococci + # all Enterococci + x[x %like% "^(Enterococcus|B_ENTRC)"] <- MOs[mo == 'B_STRPTC_GRD', ..property][[1]][1L] } - # group F - x[x == "B_STRPTC_ANG"] <- "B_STRPTC_GRF" # S. anginosus - # group H - x[x == "B_STRPTC_SAN"] <- "B_STRPTC_GRH" # S. sanguinis - # group K - x[x == "B_STRPTC_SAL"] <- "B_STRPTC_GRK" # S. salivarius + # group F - S. anginosus + x[x == MOs[mo == 'B_STRPTC_ANG', ..property][[1]][1L]] <- MOs[mo == 'B_STRPTC_GRF', ..property][[1]][1L] + # group H - S. sanguinis + x[x == MOs[mo == 'B_STRPTC_SAN', ..property][[1]][1L]] <- MOs[mo == 'B_STRPTC_GRH', ..property][[1]][1L] + # group K - S. salivarius + x[x == MOs[mo == 'B_STRPTC_SAL', ..property][[1]][1L]] <- MOs[mo == 'B_STRPTC_GRK', ..property][[1]][1L] } # left join the found results to the original input values (x_input) - DT_found <- data.table(input = as.character(unique(x_input)), + df_found <- data.frame(input = as.character(unique(x_input)), found = x, - key = "input", stringsAsFactors = FALSE) - DT_input <- data.table(input = as.character(x_input), - key = "input", + df_input <- data.frame(input = as.character(x_input), stringsAsFactors = FALSE) - x <- DT_found[DT_input, on = "input", found] + x <- df_input %>% + left_join(df_found, + by = "input") %>% + pull(found) - # df_found <- data.frame(input = as.character(unique(x_input)), - # found = x, - # stringsAsFactors = FALSE) - # df_input <- data.frame(input = as.character(x_input), - # stringsAsFactors = FALSE) - # x <- df_input %>% - # left_join(df_found, - # by = "input") %>% - # pull(found) + if (property == "mo") { + class(x) <- "mo" + attr(x, 'package') <- 'AMR' + attr(x, 'ITIS') <- TRUE + } else if (property == "tsn") { + x <- as.integer(x) + } - class(x) <- "mo" - attr(x, 'package') <- 'AMR' - attr(x, 'ITIS') <- TRUE x } @@ -614,19 +623,6 @@ renamed_note <- function(name_old, name_new, authors, year) { base::message(msg) } - -#' @rdname as.mo -#' @export -is.mo <- function(x) { - # bactid for older releases - # remove when is.bactid will be removed - identical(class(x), "mo") | identical(class(x), "bactid") -} - -#' @rdname as.mo -#' @export -guess_mo <- as.mo - #' @exportMethod print.mo #' @export #' @noRd diff --git a/R/mo_property.R b/R/mo_property.R index 89f9061a..e6b6270e 100644 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -27,7 +27,7 @@ #' @inheritSection as.mo Source #' @rdname mo_property #' @name mo_property -#' @return A logical (in case of \code{mo_aerobic}), a list (in case of \code{mo_taxonomy}), a character otherwise +#' @return A \code{list} (in case of \code{mo_taxonomy}) or a \code{character} otherwise #' @export #' @seealso \code{\link{microorganisms}} #' @examples @@ -39,7 +39,7 @@ #' mo_family("E. coli") # "Enterobacteriaceae" #' mo_genus("E. coli") # "Escherichia" #' mo_species("E. coli") # "coli" -#' mo_subspecies("E. coli") # "" +#' mo_subspecies("E. coli") # NA #' mo_fullname("E. coli") # "Escherichia coli" #' mo_shortname("E. coli") # "E. coli" #' mo_gramstain("E. coli") # "Gram negative" @@ -98,15 +98,17 @@ #' # Complete taxonomy up to Subkingdom, returns a list #' mo_taxonomy("E. coli") mo_fullname <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL) { - mo_property(x, "fullname", Becker = Becker, Lancefield = Lancefield, language = language) + x <- mo_validate(x = x, property = "fullname", Becker = Becker, Lancefield = Lancefield) + mo_translate(x, language = language) } #' @rdname mo_property +#' @importFrom dplyr %>% left_join mutate pull #' @export mo_shortname <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL) { if (Becker %in% c(TRUE, "all") | Lancefield == TRUE) { - res1 <- as.mo(x) - res2 <- suppressWarnings(as.mo(x, Becker = Becker, Lancefield = Lancefield)) + res1 <- AMR::as.mo(x) + res2 <- suppressWarnings(AMR::as.mo(res1, Becker = Becker, Lancefield = Lancefield)) res2_fullname <- mo_fullname(res2) res2_fullname[res2_fullname %like% "\\(CoNS\\)"] <- "CoNS" res2_fullname[res2_fullname %like% "\\(CoPS\\)"] <- "CoPS" @@ -115,8 +117,8 @@ mo_shortname <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL) res2_fullname) # turn "Streptococcus group A" and "Streptococcus grupo A" to "GAS" res2_fullname_vector <- res2_fullname[res2_fullname == mo_fullname(res1)] res2_fullname[res2_fullname == mo_fullname(res1)] <- paste0(substr(mo_genus(res2_fullname_vector), 1, 1), - ". ", - suppressWarnings(mo_species(res2_fullname_vector))) + ". ", + suppressWarnings(mo_species(res2_fullname_vector))) if (sum(res1 == res2, na.rm = TRUE) > 0) { res1[res1 == res2] <- paste0(substr(mo_genus(res1[res1 == res2]), 1, 1), ". ", @@ -126,112 +128,115 @@ mo_shortname <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL) result <- as.character(res1) } else { x <- AMR::as.mo(x) - # return G. species - result <- paste0(substr(mo_genus(x), 1, 1), ". ", suppressWarnings(mo_species(x))) + result <- data.frame(mo = x) %>% + left_join(AMR::microorganisms, by = "mo") %>% + mutate(shortname = ifelse(!is.na(genus) & !is.na(species), paste0(substr(genus, 1, 1), ". ", species), NA_character_)) %>% + pull(shortname) } - result[result %in% c(". ", "(. ")] <- "" mo_translate(result, language = language) } #' @rdname mo_property #' @export mo_subspecies <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL) { - mo_property(x, "subspecies", Becker = Becker, Lancefield = Lancefield, language = language) + mo_translate(exec_as.mo(x, + Becker = Becker, + Lancefield = Lancefield, + property = "subspecies"), + language = language) } #' @rdname mo_property #' @export mo_species <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL) { - mo_property(x, "species", Becker = Becker, Lancefield = Lancefield, language = language) + x <- mo_validate(x = x, property = "species", Becker = Becker, Lancefield = Lancefield) + mo_translate(x, language = language) } #' @rdname mo_property #' @export mo_genus <- function(x, language = NULL) { - mo_property(x, "genus", language = language) + x <- mo_validate(x = x, property = "genus") + mo_translate(x, language = language) } #' @rdname mo_property #' @export mo_family <- function(x) { - mo_property(x, "family") + mo_validate(x = x, property = "family") } #' @rdname mo_property #' @export mo_order <- function(x) { - mo_property(x, "order") + mo_validate(x = x, property = "order") } #' @rdname mo_property #' @export mo_class <- function(x) { - mo_property(x, "class") + mo_validate(x = x, property = "class") } #' @rdname mo_property #' @export mo_phylum <- function(x) { - mo_property(x, "phylum") + mo_validate(x = x, property = "phylum") } #' @rdname mo_property #' @export mo_subkingdom <- function(x) { - mo_property(x, "subkingdom") + mo_validate(x = x, property = "subkingdom") } #' @rdname mo_property #' @export mo_type <- function(x, language = NULL) { - mo_property(x, "type", language = language) + x <- mo_validate(x = x, property = "type") + mo_translate(x, language = language) } #' @rdname mo_property #' @export mo_TSN <- function(x) { - mo_property(x, "tsn") + mo_validate(x = x, property = "tsn") } #' @rdname mo_property #' @export mo_gramstain <- function(x, language = NULL) { - mo_property(x, "gramstain", language = language) + x <- mo_validate(x = x, property = "gramstain") + mo_translate(x, language = language) } #' @rdname mo_property #' @importFrom data.table data.table as.data.table setkey #' @export mo_property <- function(x, property = 'fullname', Becker = FALSE, Lancefield = FALSE, language = NULL) { - property <- tolower(property[1]) + if (length(property) != 1L) { + stop("'property' must be of length 1.") + } if (!property %in% colnames(AMR::microorganisms)) { - stop("invalid property: ", property, " - use a column name of the `microorganisms` data set") + stop("invalid property: '", property, "' - use a column name of the `microorganisms` data set") } - if (Becker == TRUE | Lancefield == TRUE | !is.mo(x)) { - # this will give a warning if x cannot be coerced - x <- AMR::as.mo(x = x, Becker = Becker, Lancefield = Lancefield) - } - A <- data.table(mo = x, stringsAsFactors = FALSE) - B <- as.data.table(AMR::microorganisms) - setkey(B, mo) - result2 <- B[A, on = 'mo', ..property][[1]] - if (property == "tsn") { - result2 <- as.integer(result2) - } else { - # will else not retain `logical` class - result2[x %in% c("", NA) | result2 %in% c("", NA, "(no MO)")] <- "" + # this will give a warning if x cannot be coerced + res <- exec_as.mo(x = x, Becker = Becker, Lancefield = Lancefield, property = property) + + if (property != "tsn") { + res[x %in% c("", NA) | res %in% c("", NA, "(no MO)")] <- "" if (property %in% c("fullname", "shortname", "genus", "species", "subspecies", "type", "gramstain")) { - result2 <- mo_translate(result2, language = language) + res <- mo_translate(res, language = language) } } - result2 + res } #' @rdname mo_property #' @export mo_taxonomy <- function(x) { - x <- as.mo(x) + x <- AMR::as.mo(x) base::list(subkingdom = mo_subkingdom(x), phylum = mo_phylum(x), class = mo_class(x), @@ -247,7 +252,7 @@ mo_translate <- function(x, language) { if (is.null(language)) { language <- getOption("AMR_locale", default = "en")[1L] } else { - language <- tolower(language[1]) + language <- tolower(language[1L]) } if (language %in% c("en", "")) { return(x) @@ -364,3 +369,14 @@ mo_translate <- function(x, language) { ) } + +mo_validate <- function(x, property, Becker = FALSE, Lancefield = FALSE) { + if (!all(x %in% AMR::microorganisms[, property]) | Becker %in% c(TRUE, "all") | Lancefield == TRUE) { + exec_as.mo(x, + Becker = Becker, + Lancefield = Lancefield, + property = property) + } else { + x + } +} diff --git a/data/microorganisms.certe.rda b/data/microorganisms.certe.rda new file mode 100644 index 00000000..955cd948 Binary files /dev/null and b/data/microorganisms.certe.rda differ diff --git a/data/microorganisms.old.rda b/data/microorganisms.old.rda index e6b9233b..26f2a436 100644 Binary files a/data/microorganisms.old.rda and b/data/microorganisms.old.rda differ diff --git a/data/microorganisms.rda b/data/microorganisms.rda index d3c7e2cf..3b1b2d33 100755 Binary files a/data/microorganisms.rda and b/data/microorganisms.rda differ diff --git a/data/microorganisms.umcg.rda b/data/microorganisms.umcg.rda index 9247cad6..a46e345b 100755 Binary files a/data/microorganisms.umcg.rda and b/data/microorganisms.umcg.rda differ diff --git a/man/as.mo.Rd b/man/as.mo.Rd index a0b65538..88f0ccfe 100644 --- a/man/as.mo.Rd +++ b/man/as.mo.Rd @@ -65,7 +65,7 @@ This means that looking up human non-pathogenic microorganisms takes a longer ti \section{ITIS}{ \if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr} -This \code{AMR} package contains the \strong{complete microbial taxonomic data} (with seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3]. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. +This \code{AMR} package contains the \strong{complete microbial taxonomic data} (with seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, \url{https://www.itis.gov}). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3]. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. } \section{Source}{ diff --git a/man/microorganisms.Rd b/man/microorganisms.Rd index 0658a5b6..8198651e 100755 --- a/man/microorganisms.Rd +++ b/man/microorganisms.Rd @@ -34,7 +34,7 @@ A data set containing the complete microbial taxonomy of the kingdoms Bacteria, \section{ITIS}{ \if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr} -This \code{AMR} package contains the \strong{complete microbial taxonomic data} (with seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3]. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. +This \code{AMR} package contains the \strong{complete microbial taxonomic data} (with seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, \url{https://www.itis.gov}). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3]. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. } \seealso{ diff --git a/man/microorganisms.certe.Rd b/man/microorganisms.certe.Rd new file mode 100644 index 00000000..7b8ff185 --- /dev/null +++ b/man/microorganisms.certe.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{microorganisms.certe} +\alias{microorganisms.certe} +\title{Translation table for Certe} +\format{A \code{\link{tibble}} with 2,664 observations and 2 variables: +\describe{ + \item{\code{certe}}{Code of microorganism according to Certe MMB} + \item{\code{mo}}{Code of microorganism in \code{\link{microorganisms}}} +}} +\usage{ +microorganisms.certe +} +\description{ +A data set containing all bacteria codes of Certe MMB. These codes can be joined to data with an ID from \code{\link{microorganisms}$mo} (using \code{\link{left_join_microorganisms}}). GLIMS codes can also be translated to valid \code{MO}s with \code{\link{guess_mo}}. +} +\seealso{ +\code{\link{as.mo}} \code{\link{microorganisms}} +} +\keyword{datasets} diff --git a/man/microorganisms.old.Rd b/man/microorganisms.old.Rd index 19693632..71ccd54d 100644 --- a/man/microorganisms.old.Rd +++ b/man/microorganisms.old.Rd @@ -4,7 +4,7 @@ \name{microorganisms.old} \alias{microorganisms.old} \title{Data set with old taxonomic data from ITIS} -\format{A \code{\link{data.frame}} with 1,682 observations and 5 variables: +\format{A \code{\link{data.frame}} with 2,384 observations and 5 variables: \describe{ \item{\code{tsn}}{Old Taxonomic Serial Number (TSN), as defined by ITIS} \item{\code{name}}{Old taxonomic name of the microorganism as found in ITIS, see Source} @@ -19,12 +19,12 @@ microorganisms.old } \description{ -A data set containing old, previously valid, taxonomic names. This data set is used internally by \code{\link{as.mo}}. +A data set containing old (previously valid or accepted) taxonomic names according to ITIS. This data set is used internally by \code{\link{as.mo}}. } \section{ITIS}{ \if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr} -This \code{AMR} package contains the \strong{complete microbial taxonomic data} (with seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3]. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. +This \code{AMR} package contains the \strong{complete microbial taxonomic data} (with seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, \url{https://www.itis.gov}). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3]. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. } \seealso{ diff --git a/man/microorganisms.umcg.Rd b/man/microorganisms.umcg.Rd index 7586e66f..9808280b 100755 --- a/man/microorganisms.umcg.Rd +++ b/man/microorganisms.umcg.Rd @@ -7,7 +7,7 @@ \format{A \code{\link{tibble}} with 1,095 observations and 2 variables: \describe{ \item{\code{umcg}}{Code of microorganism according to UMCG MMB} - \item{\code{mo}}{Code of microorganism in \code{\link{microorganisms}}} + \item{\code{certe}}{Code of microorganism according to Certe MMB} }} \usage{ microorganisms.umcg @@ -16,6 +16,6 @@ microorganisms.umcg A data set containing all bacteria codes of UMCG MMB. These codes can be joined to data with an ID from \code{\link{microorganisms}$mo} (using \code{\link{left_join_microorganisms}}). GLIMS codes can also be translated to valid \code{MO}s with \code{\link{guess_mo}}. } \seealso{ -\code{\link{as.mo}} \code{\link{microorganisms}} +\code{\link{as.mo}} \code{\link{microorganisms.certe}} \code{\link{microorganisms}} } \keyword{datasets} diff --git a/man/mo_property.Rd b/man/mo_property.Rd index 92537432..30f0e165 100644 --- a/man/mo_property.Rd +++ b/man/mo_property.Rd @@ -65,7 +65,7 @@ mo_taxonomy(x) \item{property}{one of the column names of one of the \code{\link{microorganisms}} data set or \code{"shortname"}} } \value{ -A logical (in case of \code{mo_aerobic}), a list (in case of \code{mo_taxonomy}), a character otherwise +A \code{list} (in case of \code{mo_taxonomy}) or a \code{character} otherwise } \description{ Use these functions to return a specific property of a microorganism from the \code{\link{microorganisms}} data set. All input values will be evaluated internally with \code{\link{as.mo}}. @@ -73,7 +73,7 @@ Use these functions to return a specific property of a microorganism from the \c \section{ITIS}{ \if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr} -This \code{AMR} package contains the \strong{complete microbial taxonomic data} (with seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3]. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. +This \code{AMR} package contains the \strong{complete microbial taxonomic data} (with seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, \url{https://www.itis.gov}). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3]. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. } \section{Source}{ @@ -94,7 +94,7 @@ mo_order("E. coli") # "Enterobacteriales" mo_family("E. coli") # "Enterobacteriaceae" mo_genus("E. coli") # "Escherichia" mo_species("E. coli") # "coli" -mo_subspecies("E. coli") # "" +mo_subspecies("E. coli") # NA mo_fullname("E. coli") # "Escherichia coli" mo_shortname("E. coli") # "E. coli" mo_gramstain("E. coli") # "Gram negative" diff --git a/tests/testthat/test-mo.R b/tests/testthat/test-mo.R index f853fdb2..30829711 100644 --- a/tests/testthat/test-mo.R +++ b/tests/testthat/test-mo.R @@ -15,7 +15,7 @@ test_that("as.mo works", { expect_equal(as.character(as.mo("Escherichia species")), "B_ESCHR") expect_equal(as.character(as.mo("Escherichia")), "B_ESCHR") expect_equal(as.character(as.mo(" B_ESCHR_COL ")), "B_ESCHR_COL") - #expect_equal(as.character(as.mo("coli")), "B_ESCHR_COL") # not Campylobacter + expect_equal(as.character(as.mo("e coli")), "B_ESCHR_COL") # not Campylobacter expect_equal(as.character(as.mo("klpn")), "B_KLBSL_PNE") expect_equal(as.character(as.mo("Klebsiella")), "B_KLBSL") expect_equal(as.character(as.mo("K. pneu rhino")), "B_KLBSL_PNE_RHI") # K. pneumoniae subspp. rhinoscleromatis @@ -34,9 +34,6 @@ test_that("as.mo works", { expect_equal(as.character(as.mo("P. aer")), "B_PDMNS_AER") # not Pasteurella aerogenes - # expect_equal(as.character(as.mo("Negative rods")), "GNR") - # expect_equal(as.character(as.mo("Gram negative rods")), "GNR") - # GLIMS expect_equal(as.character(as.mo("bctfgr")), "B_BCTRD_FRA") diff --git a/tests/testthat/test-mo_property.R b/tests/testthat/test-mo_property.R index 4b77eaa2..0604c4f5 100644 --- a/tests/testthat/test-mo_property.R +++ b/tests/testthat/test-mo_property.R @@ -8,11 +8,13 @@ test_that("mo_property works", { expect_equal(mo_family("E. coli"), "Enterobacteriaceae") expect_equal(mo_genus("E. coli"), "Escherichia") expect_equal(mo_species("E. coli"), "coli") - expect_equal(mo_subspecies("E. coli"), "") + expect_equal(mo_subspecies("E. coli"), NA_character_) expect_equal(mo_fullname("E. coli"), "Escherichia coli") expect_equal(mo_type("E. coli", language = "en"), "Bacteria") expect_equal(mo_gramstain("E. coli", language = "en"), "Gram negative") expect_equal(class(mo_taxonomy("E. coli")), "list") + expect_equal(names(mo_taxonomy("E. coli")), c("subkingdom", "phylum", "class", "order", + "family", "genus", "species", "subspecies")) expect_equal(mo_shortname("MRSA"), "S. aureus") expect_equal(mo_shortname("MRSA", Becker = TRUE), "S. aureus") @@ -21,12 +23,8 @@ test_that("mo_property works", { expect_equal(mo_shortname("S. aga", Lancefield = TRUE), "GBS") # test integrity - # library(dplyr) - # rnd <- sample(1:nrow(AMR::microorganisms), 500, replace = FALSE) # random 500 rows - # MOs <- AMR::microorganisms %>% filter(!is.na(mo), - # species != "species", - # dplyr::row_number() %in% rnd) - # expect_identical(MOs$fullname, mo_fullname(MOs$fullname, language = "en")) + MOs <- AMR::microorganisms + expect_identical(MOs$fullname, mo_fullname(MOs$fullname, language = "en")) # check languages expect_equal(mo_type("E. coli", language = "de"), "Bakterien")