1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-09 22:22:03 +02:00

as.mo improvement

This commit is contained in:
2019-02-26 12:33:26 +01:00
parent f25a4b3679
commit f9ded23023
67 changed files with 391 additions and 342 deletions

View File

@ -71,3 +71,24 @@
#' mo_fullname("C. elegans")
#' # [1] "Chroococcus limneticus elegans" # Because a microorganism was found
NULL
#' Version info of included Catalogue of Life
#'
#' This function returns a list with info about the included data from the Catalogue of Life. It also shows if the included version is their latest annual release. The Catalogue of Life releases their annual release in March each year.
#' @seealso \code{\link{microorganisms}}
#' @inheritSection catalogue_of_life Catalogue of Life
#' @inheritSection AMR Read more on our website!
#' @export
#' @examples
#' library(dplyr)
#' microorganisms %>% freq(kingdom)
#' microorganisms %>% group_by(kingdom) %>% freq(phylum, nmax = NULL)
catalogue_of_life_version <- function() {
# see the `catalogue_of_life` list in R/data.R
list(version = catalogue_of_life$version,
url = catalogue_of_life$url,
# annual release always somewhere in March
is_latest_annual_release = Sys.Date() < as.Date(paste0(catalogue_of_life$year + 1, "-04-01")),
n_species = nrow(AMR::microorganisms),
n_synonyms = nrow(AMR::microorganisms.old))
}

View File

@ -168,20 +168,6 @@ catalogue_of_life <- list(
url = "http://www.catalogueoflife.org/annual-checklist/2018"
)
#' Version info of included Catalogue of Life
#' @seealso \code{\link{microorganisms}}
#' @inheritSection catalogue_of_life Catalogue of Life
#' @inheritSection AMR Read more on our website!
#' @export
catalogue_of_life_version <- function() {
list(version = catalogue_of_life$version,
url = catalogue_of_life$url,
# annual release always somewhere in March
is_latest_annual_release = Sys.Date() < as.Date(paste0(catalogue_of_life$year + 1, "-04-01")),
no_of_species = nrow(AMR::microorganisms),
no_of_synonyms = nrow(AMR::microorganisms.old))
}
#' Data set with previously accepted taxonomic names
#'
#' A data set containing old (previously valid or accepted) taxonomic names according to the Catalogue of Life. This data set is used internally by \code{\link{as.mo}}.

19
R/mo.R
View File

@ -166,7 +166,12 @@
#' mutate(mo = as.mo(paste(genus, species)))
#' }
as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE, reference_df = get_mo_source()) {
if (all(x %in% AMR::microorganisms$fullname)
if (all(x %in% AMR::microorganisms$mo)
& isFALSE(Becker)
& isFALSE(Lancefield)
& is.null(reference_df)) {
y <- x
} else if (all(x %in% AMR::microorganisms$fullname)
& isFALSE(Becker)
& isFALSE(Lancefield)
& is.null(reference_df)) {
@ -179,12 +184,13 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
if (any(is.na(y))) {
y[is.na(y)] <- microorganismsDT[prevalence == 3][data.table(fullname = x[is.na(y)]), on = "fullname", "mo"][[1]]
}
return(y)
} else {
# will be checked for mo class in validation and uses exec_as.mo internally if necessary
y <- mo_validate(x = x, property = "mo",
Becker = Becker, Lancefield = Lancefield,
allow_uncertain = allow_uncertain, reference_df = reference_df)
}
# will be checked for mo class in validation
mo_validate(x = x, property = "mo",
Becker = Becker, Lancefield = Lancefield,
allow_uncertain = allow_uncertain, reference_df = reference_df)
structure(.Data = y, class = "mo")
}
#' @rdname as.mo
@ -891,6 +897,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
stringsAsFactors = FALSE)
df_input <- data.frame(input = as.character(x_input),
stringsAsFactors = FALSE)
x <- df_input %>%
left_join(df_found,
by = "input") %>%

View File

@ -132,7 +132,7 @@
#' mo_taxonomy("E. coli")
mo_fullname <- function(x, language = get_locale(), ...) {
x <- mo_validate(x = x, property = "fullname", ...)
mo_translate(x, language = language)
translate(x, language = language)
}
#' @rdname mo_property
@ -148,46 +148,64 @@ mo_shortname <- function(x, language = get_locale(), ...) {
if (is.null(Lancefield)) {
Lancefield <- FALSE
}
if (Becker %in% c(TRUE, "all") | Lancefield == TRUE) {
res1 <- AMR::as.mo(x, Becker = FALSE, Lancefield = FALSE, reference_df = dots$reference_df)
res2 <- suppressWarnings(AMR::as.mo(res1, ...))
res2_fullname <- mo_fullname(res2, language = language)
res2_fullname[res2_fullname %like% " \\(CoNS\\)"] <- "CoNS"
res2_fullname[res2_fullname %like% " \\(CoPS\\)"] <- "CoPS"
res2_fullname[res2_fullname %like% " \\(KNS\\)"] <- "KNS"
res2_fullname[res2_fullname %like% " \\(KPS\\)"] <- "KPS"
res2_fullname[res2_fullname %like% " \\(CNS\\)"] <- "CNS"
res2_fullname[res2_fullname %like% " \\(CPS\\)"] <- "CPS"
res2_fullname <- gsub("Streptococcus (group|Gruppe|gruppe|groep|grupo|gruppo|groupe) (.)",
"G\\2S",
res2_fullname) # turn "Streptococcus group A" and "Streptococcus grupo A" to "GAS"
res2_fullname_vector <- res2_fullname[res2_fullname == mo_fullname(res1)]
res2_fullname[res2_fullname == mo_fullname(res1)] <- paste0(substr(mo_genus(res2_fullname_vector), 1, 1),
". ",
suppressWarnings(mo_species(res2_fullname_vector)))
if (sum(res1 == res2, na.rm = TRUE) > 0) {
res1[res1 == res2] <- paste0(substr(mo_genus(res1[res1 == res2]), 1, 1),
". ",
suppressWarnings(mo_species(res1[res1 == res2])))
}
res1[res1 != res2] <- res2_fullname
result <- as.character(res1)
} else {
x <- AMR::as.mo(x, ...)
suppressWarnings(
result <- data.frame(mo = x) %>%
left_join(AMR::microorganisms, by = "mo") %>%
mutate(shortname = ifelse(!is.na(genus) & !is.na(species), paste0(substr(genus, 1, 1), ". ", species), NA_character_)) %>%
pull(shortname)
)
shorten <- function(x) {
# easiest: no transformations needed
x <- mo_fullname(x, language = "en")
# shorten for the ones that have a space: shorten first word and write out second word
shorten_these <- x %like% " " & !x %like% "Streptococcus group "
x[shorten_these] <- paste0(substr(x[shorten_these], 1, 1),
". ",
x[shorten_these] %>%
strsplit(" ", fixed = TRUE) %>%
unlist() %>%
.[2])
x
}
mo_translate(result, language = language)
if (isFALSE(Becker) & isFALSE(Lancefield)) {
result <- shorten(x)
} else {
# get result without transformations
res1 <- AMR::as.mo(x, Becker = FALSE, Lancefield = FALSE, reference_df = dots$reference_df)
# and result with transformations
res2 <- suppressWarnings(AMR::as.mo(res1, ...))
if (res1 == res2
& !res1 %like% "^B_STRPT_GR") {
result <- shorten(x)
} else {
res2_fullname <- mo_fullname(res2, language = language)
res2_fullname[res2_fullname %like% " \\(CoNS\\)"] <- "CoNS"
res2_fullname[res2_fullname %like% " \\(CoPS\\)"] <- "CoPS"
res2_fullname[res2_fullname %like% " \\(KNS\\)"] <- "KNS"
res2_fullname[res2_fullname %like% " \\(KPS\\)"] <- "KPS"
res2_fullname[res2_fullname %like% " \\(CNS\\)"] <- "CNS"
res2_fullname[res2_fullname %like% " \\(CPS\\)"] <- "CPS"
res2_fullname <- gsub("Streptococcus (group|Gruppe|gruppe|groep|grupo|gruppo|groupe) (.)",
"G\\2S",
res2_fullname) # turn "Streptococcus group A" and "Streptococcus grupo A" to "GAS"
res2_fullname_vector <- res2_fullname[res2_fullname == mo_fullname(res1)]
res2_fullname[res2_fullname == mo_fullname(res1)] <- paste0(substr(mo_genus(res2_fullname_vector), 1, 1),
". ",
suppressWarnings(mo_species(res2_fullname_vector)))
if (sum(res1 == res2, na.rm = TRUE) > 0) {
res1[res1 == res2] <- paste0(substr(mo_genus(res1[res1 == res2]), 1, 1),
". ",
suppressWarnings(mo_species(res1[res1 == res2])))
}
res1[res1 != res2] <- res2_fullname
result <- as.character(res1)
}
}
translate(result, language = language)
}
#' @rdname mo_property
#' @export
mo_subspecies <- function(x, language = get_locale(), ...) {
mo_translate(mo_validate(x = x, property = "subspecies", ...), language = language)
translate(validate(x = x, property = "subspecies", ...), language = language)
}
#' @rdname mo_property