mirror of
https://github.com/msberends/AMR.git
synced 2025-07-08 21:22:01 +02:00
new: 1680 old taxonomic names
This commit is contained in:
@ -21,7 +21,7 @@
|
||||
#' Use these functions to return a specific property of an antibiotic from the \code{\link{antibiotics}} data set, based on their ATC code. Get such a code with \code{\link{as.atc}}.
|
||||
#' @param x a (vector of a) valid \code{\link{atc}} code or any text that can be coerced to a valid atc with \code{\link{as.atc}}
|
||||
#' @param property one of the column names of one of the \code{\link{antibiotics}} data set, like \code{"atc"} and \code{"official"}
|
||||
#' @param language language of the returned text, defaults to the systems language. Either one of \code{"en"} (English) or \code{"nl"} (Dutch).
|
||||
#' @param language language of the returned text, defaults to English (\code{"en"}) and can be set with \code{\link{getOption}("AMR_locale")}. Either one of \code{"en"} (English) or \code{"nl"} (Dutch).
|
||||
#' @rdname ab_property
|
||||
#' @return A vector of values. In case of \code{ab_tradenames}, if \code{x} is of length one, a vector will be returned. Otherwise a \code{\link{list}}, with \code{x} as names.
|
||||
#' @export
|
||||
@ -60,7 +60,7 @@ ab_atc <- function(x) {
|
||||
ab_official <- function(x, language = NULL) {
|
||||
|
||||
if (is.null(language)) {
|
||||
language <- Sys.locale()
|
||||
language <- getOption("AMR_locale", default = "en")[1L]
|
||||
} else {
|
||||
language <- tolower(language[1])
|
||||
}
|
||||
|
2
R/data.R
2
R/data.R
@ -150,7 +150,7 @@
|
||||
#'
|
||||
#' A data set containing old, previously valid, taxonomic names. This data set is used internally by \code{\link{as.mo}}.
|
||||
#' @inheritSection as.mo ITIS
|
||||
#' @format A \code{\link{data.frame}} with 58 observations and 5 variables:
|
||||
#' @format A \code{\link{data.frame}} with 1,682 observations and 5 variables:
|
||||
#' \describe{
|
||||
#' \item{\code{tsn}}{Old Taxonomic Serial Number (TSN), as defined by ITIS}
|
||||
#' \item{\code{name}}{Old taxonomic name of the microorganism as found in ITIS, see Source}
|
||||
|
20
R/misc.R
20
R/misc.R
@ -155,26 +155,6 @@ tbl_parse_guess <- function(tbl,
|
||||
tbl
|
||||
}
|
||||
|
||||
#' @importFrom dplyr case_when
|
||||
Sys.locale <- function() {
|
||||
alreadyset <- getOption("AMR_locale")
|
||||
if (!is.null(alreadyset)) {
|
||||
if (tolower(alreadyset) %in% c("en", "de", "nl", "es", "fr", "pt", "it")) {
|
||||
return(tolower(alreadyset))
|
||||
}
|
||||
}
|
||||
sys <- base::Sys.getlocale()
|
||||
case_when(
|
||||
sys %like% '(Deutsch|German|de_)' ~ "de",
|
||||
sys %like% '(Nederlands|Dutch|nl_)' ~ "nl",
|
||||
sys %like% '(Espa.ol|Spanish|es_)' ~ "es",
|
||||
sys %like% '(Fran.ais|French|fr_)' ~ "fr",
|
||||
sys %like% '(Portugu.s|Portuguese|pt_)' ~ "pt",
|
||||
sys %like% '(Italiano|Italian|it_)' ~ "it",
|
||||
TRUE ~ "en"
|
||||
)
|
||||
}
|
||||
|
||||
# transforms date format like "dddd d mmmm yyyy" to "%A %e %B %Y"
|
||||
date_generic <- function(format) {
|
||||
if (!grepl('%', format, fixed = TRUE)) {
|
||||
|
112
R/mo.R
112
R/mo.R
@ -60,7 +60,7 @@
|
||||
#' \code{guess_mo} is an alias of \code{as.mo}.
|
||||
#' @section ITIS:
|
||||
#' \if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr}
|
||||
#' This \code{AMR} package contains the \strong{complete microbial taxonomic data} from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3]. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa (from subkingdom to the subspecies level) are included in this package.
|
||||
#' This \code{AMR} package contains the \strong{complete microbial taxonomic data} (with seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3]. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS.
|
||||
# (source as section, so it can be inherited by mo_property:)
|
||||
#' @section Source:
|
||||
#' [1] Becker K \emph{et al.} \strong{Coagulase-Negative Staphylococci}. 2014. Clin Microbiol Rev. 27(4): 870–926. \url{https://dx.doi.org/10.1128/CMR.00109-13}
|
||||
@ -302,7 +302,8 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE
|
||||
next
|
||||
}
|
||||
found <- MOs_mostprevalent[mo.old == toupper(x_backup[i])
|
||||
| (substr(x_backup[i], 4, 6) == "SPP" & mo.old == substr(x_backup[i], 1, 3)), mo]
|
||||
| (substr(x_backup[i], 4, 6) == "SPP" & mo.old == substr(x_backup[i], 1, 3))
|
||||
| mo.old == substr(x_backup[i], 1, 3), mo]
|
||||
# is a valid old mo
|
||||
if (length(found) > 0) {
|
||||
x[i] <- found[1L]
|
||||
@ -395,7 +396,9 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE
|
||||
x[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
found <- MOs_allothers[mo.old == toupper(x_backup[i]), mo]
|
||||
found <- MOs_allothers[mo.old == toupper(x_backup[i])
|
||||
| (substr(x_backup[i], 4, 6) == "SPP" & mo.old == substr(x_backup[i], 1, 3))
|
||||
| mo.old == substr(x_backup[i], 1, 3), mo]
|
||||
# is a valid old mo
|
||||
if (length(found) > 0) {
|
||||
x[i] <- found[1L]
|
||||
@ -463,27 +466,47 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE
|
||||
|
||||
# look for old taxonomic names ----
|
||||
if (is.null(MOs_old)) {
|
||||
MOs_old <- as.data.table(microorganisms.old)
|
||||
MOs_old <- as.data.table(AMR::microorganisms.old)
|
||||
setkey(MOs_old, name, tsn_new)
|
||||
}
|
||||
found <- MOs_old[tolower(name) == tolower(x_backup[i]) |
|
||||
tsn == x_trimmed[i],]
|
||||
if (NROW(found) > 0) {
|
||||
x[i] <- MOs[tsn == found[1, tsn_new], mo]
|
||||
message("Note: '", found[1, name], "' was renamed to '",
|
||||
MOs[tsn == found[1, tsn_new], fullname], "' by ",
|
||||
found[1, authors], " in ", found[1, year])
|
||||
renamed_note(name_old = found[1, name],
|
||||
name_new = MOs[tsn == found[1, tsn_new], fullname],
|
||||
authors = found[1, authors],
|
||||
year = found[1, year])
|
||||
next
|
||||
}
|
||||
|
||||
# check for uncertain results ----
|
||||
# (1) try to strip off one element and check the remains
|
||||
if (allow_uncertain == TRUE) {
|
||||
# (1) look again for old taxonomic names, now for G. species ----
|
||||
found <- MOs_old[name %like% x_withspaces[i]
|
||||
| name %like% x_withspaces_start[i]
|
||||
| name %like% x[i],]
|
||||
if (NROW(found) > 0) {
|
||||
x[i] <- MOs[tsn == found[1, tsn_new], mo]
|
||||
warning("Uncertain interpretation: '",
|
||||
x_backup[i], "' -> '", found[1, name], "'",
|
||||
call. = FALSE, immediate. = TRUE)
|
||||
renamed_note(name_old = found[1, name],
|
||||
name_new = MOs[tsn == found[1, tsn_new], fullname],
|
||||
authors = found[1, authors],
|
||||
year = found[1, year])
|
||||
|
||||
next
|
||||
}
|
||||
|
||||
# (2) try to strip off one element and check the remains
|
||||
x_strip <- x_backup[i] %>% strsplit(" ") %>% unlist()
|
||||
x_strip <- x_strip[1:length(x_strip) - 1]
|
||||
x[i] <- suppressWarnings(suppressMessages(as.mo(x_strip)))
|
||||
if (!is.na(x[i])) {
|
||||
warning("Uncertain result: '", x_backup[i], "' -> '", MOs[mo == x[i], fullname], "' (", x[i], ")")
|
||||
warning("Uncertain interpretation: '",
|
||||
x_backup[i], "' -> '", MOs[mo == x[i], fullname], "' (", x[i], ")",
|
||||
call. = FALSE, immediate. = TRUE)
|
||||
next
|
||||
}
|
||||
}
|
||||
@ -506,28 +529,24 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE
|
||||
if (Becker == TRUE | Becker == "all") {
|
||||
# See Source. It's this figure:
|
||||
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4187637/figure/F3/
|
||||
CoNS <- MOs %>%
|
||||
filter(genus == "Staphylococcus",
|
||||
species %in% c("arlettae", "auricularis", "capitis",
|
||||
"caprae", "carnosus", "cohnii", "condimenti",
|
||||
"devriesei", "epidermidis", "equorum",
|
||||
"fleurettii", "gallinarum", "haemolyticus",
|
||||
"hominis", "jettensis", "kloosii", "lentus",
|
||||
"lugdunensis", "massiliensis", "microti",
|
||||
"muscae", "nepalensis", "pasteuri", "petrasii",
|
||||
"pettenkoferi", "piscifermentans", "rostri",
|
||||
"saccharolyticus", "saprophyticus", "sciuri",
|
||||
"stepanovicii", "simulans", "succinus",
|
||||
"vitulinus", "warneri", "xylosus")) %>%
|
||||
pull(mo)
|
||||
CoPS <- MOs %>%
|
||||
filter(genus == "Staphylococcus",
|
||||
species %in% c("simiae", "agnetis", "chromogenes",
|
||||
"delphini", "felis", "lutrae",
|
||||
"hyicus", "intermedius",
|
||||
"pseudintermedius", "pseudointermedius",
|
||||
"schleiferi")) %>%
|
||||
pull(mo)
|
||||
MOs_staph <- MOs[genus == "Staphylococcus"]
|
||||
setkey(MOs_staph, species)
|
||||
CoNS <- MOs_staph[species %in% c("arlettae", "auricularis", "capitis",
|
||||
"caprae", "carnosus", "cohnii", "condimenti",
|
||||
"devriesei", "epidermidis", "equorum",
|
||||
"fleurettii", "gallinarum", "haemolyticus",
|
||||
"hominis", "jettensis", "kloosii", "lentus",
|
||||
"lugdunensis", "massiliensis", "microti",
|
||||
"muscae", "nepalensis", "pasteuri", "petrasii",
|
||||
"pettenkoferi", "piscifermentans", "rostri",
|
||||
"saccharolyticus", "saprophyticus", "sciuri",
|
||||
"stepanovicii", "simulans", "succinus",
|
||||
"vitulinus", "warneri", "xylosus"), mo]
|
||||
CoPS <- MOs_staph[species %in% c("simiae", "agnetis", "chromogenes",
|
||||
"delphini", "felis", "lutrae",
|
||||
"hyicus", "intermedius",
|
||||
"pseudintermedius", "pseudointermedius",
|
||||
"schleiferi"), mo]
|
||||
x[x %in% CoNS] <- "B_STPHY_CNS"
|
||||
x[x %in% CoPS] <- "B_STPHY_CPS"
|
||||
if (Becker == "all") {
|
||||
@ -559,16 +578,24 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE
|
||||
}
|
||||
|
||||
# left join the found results to the original input values (x_input)
|
||||
df_found <- data.frame(input = as.character(unique(x_input)),
|
||||
DT_found <- data.table(input = as.character(unique(x_input)),
|
||||
found = x,
|
||||
key = "input",
|
||||
stringsAsFactors = FALSE)
|
||||
df_input <- data.frame(input = as.character(x_input),
|
||||
DT_input <- data.table(input = as.character(x_input),
|
||||
key = "input",
|
||||
stringsAsFactors = FALSE)
|
||||
x <- DT_found[DT_input, on = "input", found]
|
||||
|
||||
x <- df_input %>%
|
||||
left_join(df_found,
|
||||
by = "input") %>%
|
||||
pull(found)
|
||||
# df_found <- data.frame(input = as.character(unique(x_input)),
|
||||
# found = x,
|
||||
# stringsAsFactors = FALSE)
|
||||
# df_input <- data.frame(input = as.character(x_input),
|
||||
# stringsAsFactors = FALSE)
|
||||
# x <- df_input %>%
|
||||
# left_join(df_found,
|
||||
# by = "input") %>%
|
||||
# pull(found)
|
||||
|
||||
class(x) <- "mo"
|
||||
attr(x, 'package') <- 'AMR'
|
||||
@ -576,6 +603,17 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE
|
||||
x
|
||||
}
|
||||
|
||||
renamed_note <- function(name_old, name_new, authors, year) {
|
||||
msg <- paste0("Note: '", name_old, "' was renamed to '", name_new, "'")
|
||||
if (!authors %in% c("", NA)) {
|
||||
msg <- paste0(msg, " by ", authors)
|
||||
}
|
||||
if (!year %in% c("", NA)) {
|
||||
msg <- paste0(msg, " in ", year)
|
||||
}
|
||||
base::message(msg)
|
||||
}
|
||||
|
||||
|
||||
#' @rdname as.mo
|
||||
#' @export
|
||||
|
@ -20,9 +20,9 @@
|
||||
#'
|
||||
#' Use these functions to return a specific property of a microorganism from the \code{\link{microorganisms}} data set. All input values will be evaluated internally with \code{\link{as.mo}}.
|
||||
#' @param x any (vector of) text that can be coerced to a valid microorganism code with \code{\link{as.mo}}
|
||||
#' @param property one of the column names of one of the \code{\link{microorganisms}} data set, like \code{"mo"}, \code{"bactsys"}, \code{"family"}, \code{"genus"}, \code{"species"}, \code{"fullname"}, \code{"gramstain"} and \code{"aerobic"}
|
||||
#' @param property one of the column names of one of the \code{\link{microorganisms}} data set or \code{"shortname"}
|
||||
#' @inheritParams as.mo
|
||||
#' @param language language of the returned text, defaults to the systems language but can also be set with \code{\link{getOption}("AMR_locale")}. Either one of \code{"en"} (English), \code{"de"} (German), \code{"nl"} (Dutch), \code{"es"} (Spanish) or \code{"pt"} (Portuguese).
|
||||
#' @param language language of the returned text, defaults to English (\code{"en"}) and can also be set with \code{\link{getOption}("AMR_locale")}. Either one of \code{"en"} (English), \code{"de"} (German), \code{"nl"} (Dutch), \code{"es"} (Spanish) or \code{"pt"} (Portuguese).
|
||||
#' @inheritSection as.mo ITIS
|
||||
#' @inheritSection as.mo Source
|
||||
#' @rdname mo_property
|
||||
@ -113,8 +113,8 @@ mo_shortname <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL)
|
||||
res2_fullname <- gsub("Streptococcus (group|Gruppe|gruppe|groep|grupo|gruppo|groupe) (.)",
|
||||
"G\\2S",
|
||||
res2_fullname) # turn "Streptococcus group A" and "Streptococcus grupo A" to "GAS"
|
||||
res2_fullname_vector <- res2_fullname[res2_fullname == mo_fullname(x)]
|
||||
res2_fullname[res2_fullname == mo_fullname(x)] <- paste0(substr(mo_genus(res2_fullname_vector), 1, 1),
|
||||
res2_fullname_vector <- res2_fullname[res2_fullname == mo_fullname(res1)]
|
||||
res2_fullname[res2_fullname == mo_fullname(res1)] <- paste0(substr(mo_genus(res2_fullname_vector), 1, 1),
|
||||
". ",
|
||||
suppressWarnings(mo_species(res2_fullname_vector)))
|
||||
if (sum(res1 == res2, na.rm = TRUE) > 0) {
|
||||
@ -125,6 +125,7 @@ mo_shortname <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL)
|
||||
res1[res1 != res2] <- res2_fullname
|
||||
result <- as.character(res1)
|
||||
} else {
|
||||
x <- AMR::as.mo(x)
|
||||
# return G. species
|
||||
result <- paste0(substr(mo_genus(x), 1, 1), ". ", suppressWarnings(mo_species(x)))
|
||||
}
|
||||
@ -208,11 +209,9 @@ mo_property <- function(x, property = 'fullname', Becker = FALSE, Lancefield = F
|
||||
}
|
||||
if (Becker == TRUE | Lancefield == TRUE | !is.mo(x)) {
|
||||
# this will give a warning if x cannot be coerced
|
||||
result1 <- AMR::as.mo(x = x, Becker = Becker, Lancefield = Lancefield)
|
||||
} else {
|
||||
result1 <- x
|
||||
x <- AMR::as.mo(x = x, Becker = Becker, Lancefield = Lancefield)
|
||||
}
|
||||
A <- data.table(mo = result1, stringsAsFactors = FALSE)
|
||||
A <- data.table(mo = x, stringsAsFactors = FALSE)
|
||||
B <- as.data.table(AMR::microorganisms)
|
||||
setkey(B, mo)
|
||||
result2 <- B[A, on = 'mo', ..property][[1]]
|
||||
@ -246,7 +245,7 @@ mo_taxonomy <- function(x) {
|
||||
#' @importFrom dplyr %>% case_when
|
||||
mo_translate <- function(x, language) {
|
||||
if (is.null(language)) {
|
||||
language <- Sys.locale()
|
||||
language <- getOption("AMR_locale", default = "en")[1L]
|
||||
} else {
|
||||
language <- tolower(language[1])
|
||||
}
|
||||
|
Reference in New Issue
Block a user