mirror of
https://github.com/msberends/AMR.git
synced 2025-07-08 11:51:59 +02:00
(v0.7.1.9077) mo codes fix
This commit is contained in:
3
R/data.R
3
R/data.R
@ -98,12 +98,13 @@ catalogue_of_life <- list(
|
||||
#'
|
||||
#' A data set containing old (previously valid or accepted) taxonomic names according to the Catalogue of Life. This data set is used internally by \code{\link{as.mo}}.
|
||||
#' @inheritSection catalogue_of_life Catalogue of Life
|
||||
#' @format A \code{\link{data.frame}} with 24,246 observations and 4 variables:
|
||||
#' @format A \code{\link{data.frame}} with 24,246 observations and 5 variables:
|
||||
#' \describe{
|
||||
#' \item{\code{col_id}}{Catalogue of Life ID that was originally given}
|
||||
#' \item{\code{col_id_new}}{New Catalogue of Life ID that responds to an entry in the \code{\link{microorganisms}} data set}
|
||||
#' \item{\code{fullname}}{Old full taxonomic name of the microorganism}
|
||||
#' \item{\code{ref}}{Author(s) and year of concerning scientific publication}
|
||||
#' \item{\code{prevalence}}{Prevalence of the microorganism, see \code{?as.mo}}
|
||||
#' }
|
||||
#' @source Catalogue of Life: Annual Checklist (public online taxonomic database), \url{http://www.catalogueoflife.org} (check included annual version with \code{\link{catalogue_of_life_version}()}).
|
||||
#' @inheritSection AMR Read more on our website!
|
||||
|
58
R/mo.R
58
R/mo.R
@ -71,9 +71,10 @@
|
||||
#' \itemize{
|
||||
#' \item{Valid MO codes and full names: it first searches in already valid MO code and known genus/species combinations}
|
||||
#' \item{Human pathogenic prevalence: it first searches in more prevalent microorganisms, then less prevalent ones (see \emph{Microbial prevalence of pathogens in humans} below)}
|
||||
#' \item{Taxonomic kingdom: it first searches in Bacteria/Chromista, then Fungi, then Protozoa}
|
||||
#' \item{Taxonomic kingdom: it first searches in Bacteria, then Fungi, then Protozoa, then Archaea, then others}
|
||||
#' \item{Breakdown of input values: from here it starts to breakdown input values to find possible matches}
|
||||
#' }
|
||||
#'
|
||||
#'
|
||||
#' A couple of effects because of these rules:
|
||||
#' \itemize{
|
||||
@ -258,7 +259,6 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
|
||||
y <- mo_validate(x = x, property = "mo",
|
||||
Becker = Becker, Lancefield = Lancefield,
|
||||
allow_uncertain = uncertainty_level, reference_df = reference_df,
|
||||
#force_mo_history = isTRUE(list(...)$force_mo_history),
|
||||
...)
|
||||
}
|
||||
|
||||
@ -285,7 +285,7 @@ is.mo <- function(x) {
|
||||
# param force_mo_history logical - whether found result must be saved with set_mo_history (default FALSE on non-interactive sessions)
|
||||
# param disable_mo_history logical - whether set_mo_history and get_mo_history should be ignored
|
||||
# param debug logical - show different lookup texts while searching
|
||||
# param uncertain_check_prevalence integer - the prevalence to check for when running for uncertain results, follows microorganisms$prevalence
|
||||
# param reference_data_to_use data.frame - the data set to check for
|
||||
exec_as.mo <- function(x,
|
||||
Becker = FALSE,
|
||||
Lancefield = FALSE,
|
||||
@ -936,6 +936,7 @@ exec_as.mo <- function(x,
|
||||
|
||||
# NOW RUN THROUGH DIFFERENT PREVALENCE LEVELS
|
||||
check_per_prevalence <- function(data_to_check,
|
||||
data.old_to_check,
|
||||
a.x_backup,
|
||||
b.x_trimmed,
|
||||
c.x_trimmed_without_group,
|
||||
@ -1065,33 +1066,31 @@ exec_as.mo <- function(x,
|
||||
}
|
||||
|
||||
# MISCELLANEOUS ----
|
||||
|
||||
|
||||
# look for old taxonomic names ----
|
||||
# wait until prevalence == 2 to run the old taxonomic results on both prevalence == 1 and prevalence == 2
|
||||
if (nrow(data_to_check) == nrow(microorganismsDT[prevalence == 2])) {
|
||||
found <- microorganisms.oldDT[fullname_lower == tolower(a.x_backup)
|
||||
| fullname_lower %like_case% d.x_withspaces_start_end,]
|
||||
if (NROW(found) > 0) {
|
||||
col_id_new <- found[1, col_id_new]
|
||||
# when property is "ref" (which is the case in mo_ref, mo_authors and mo_year), return the old value, so:
|
||||
# mo_ref("Chlamydia psittaci") = "Page, 1968" (with warning)
|
||||
# mo_ref("Chlamydophila psittaci") = "Everett et al., 1999"
|
||||
if (property == "ref") {
|
||||
x[i] <- found[1, ref]
|
||||
} else {
|
||||
x[i] <- microorganismsDT[col_id == found[1, col_id_new], ..property][[1]]
|
||||
}
|
||||
options(mo_renamed_last_run = found[1, fullname])
|
||||
was_renamed(name_old = found[1, fullname],
|
||||
name_new = microorganismsDT[col_id == found[1, col_id_new], fullname],
|
||||
ref_old = found[1, ref],
|
||||
ref_new = microorganismsDT[col_id == found[1, col_id_new], ref],
|
||||
mo = microorganismsDT[col_id == found[1, col_id_new], mo])
|
||||
if (initial_search == TRUE) {
|
||||
set_mo_history(a.x_backup, get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
|
||||
}
|
||||
return(x[i])
|
||||
found <- data.old_to_check[fullname_lower == tolower(a.x_backup)
|
||||
| fullname_lower %like_case% d.x_withspaces_start_end,]
|
||||
if (NROW(found) > 0) {
|
||||
col_id_new <- found[1, col_id_new]
|
||||
# when property is "ref" (which is the case in mo_ref, mo_authors and mo_year), return the old value, so:
|
||||
# mo_ref("Chlamydia psittaci") = "Page, 1968" (with warning)
|
||||
# mo_ref("Chlamydophila psittaci") = "Everett et al., 1999"
|
||||
if (property == "ref") {
|
||||
x[i] <- found[1, ref]
|
||||
} else {
|
||||
x[i] <- microorganismsDT[col_id == found[1, col_id_new], ..property][[1]]
|
||||
}
|
||||
options(mo_renamed_last_run = found[1, fullname])
|
||||
was_renamed(name_old = found[1, fullname],
|
||||
name_new = microorganismsDT[col_id == found[1, col_id_new], fullname],
|
||||
ref_old = found[1, ref],
|
||||
ref_new = microorganismsDT[col_id == found[1, col_id_new], ref],
|
||||
mo = microorganismsDT[col_id == found[1, col_id_new], mo])
|
||||
if (initial_search == TRUE) {
|
||||
set_mo_history(a.x_backup, get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
|
||||
}
|
||||
return(x[i])
|
||||
}
|
||||
|
||||
# check for uncertain results ----
|
||||
@ -1119,7 +1118,7 @@ exec_as.mo <- function(x,
|
||||
if (isTRUE(debug)) {
|
||||
message("Running '", d.x_withspaces_start_end, "' and '", e.x_withspaces_start_only, "'")
|
||||
}
|
||||
found <- microorganisms.oldDT[fullname_lower %like_case% d.x_withspaces_start_end
|
||||
found <- data.old_to_check[fullname_lower %like_case% d.x_withspaces_start_end
|
||||
| fullname_lower %like_case% e.x_withspaces_start_only]
|
||||
if (NROW(found) > 0 & nchar(g.x_backup_without_spp) >= 6) {
|
||||
if (property == "ref") {
|
||||
@ -1521,6 +1520,7 @@ exec_as.mo <- function(x,
|
||||
|
||||
# FIRST TRY VERY PREVALENT IN HUMAN INFECTIONS ----
|
||||
x[i] <- check_per_prevalence(data_to_check = reference_data_to_use[prevalence == 1],
|
||||
data.old_to_check = microorganisms.oldDT[prevalence == 1],
|
||||
a.x_backup = x_backup[i],
|
||||
b.x_trimmed = x_trimmed[i],
|
||||
c.x_trimmed_without_group = x_trimmed_without_group[i],
|
||||
@ -1539,6 +1539,7 @@ exec_as.mo <- function(x,
|
||||
|
||||
# THEN TRY PREVALENT IN HUMAN INFECTIONS ----
|
||||
x[i] <- check_per_prevalence(data_to_check = reference_data_to_use[prevalence == 2],
|
||||
data.old_to_check = microorganisms.oldDT[prevalence == 2],
|
||||
a.x_backup = x_backup[i],
|
||||
b.x_trimmed = x_trimmed[i],
|
||||
c.x_trimmed_without_group = x_trimmed_without_group[i],
|
||||
@ -1557,6 +1558,7 @@ exec_as.mo <- function(x,
|
||||
|
||||
# THEN UNPREVALENT IN HUMAN INFECTIONS ----
|
||||
x[i] <- check_per_prevalence(data_to_check = reference_data_to_use[prevalence == 3],
|
||||
data.old_to_check = microorganisms.oldDT[prevalence == 3],
|
||||
a.x_backup = x_backup[i],
|
||||
b.x_trimmed = x_trimmed[i],
|
||||
c.x_trimmed_without_group = x_trimmed_without_group[i],
|
||||
|
@ -55,7 +55,7 @@ set_mo_history <- function(x, mo, uncertainty_level, force = FALSE, disable = FA
|
||||
# if (tryCatch(nrow(getOption("mo_remembered_results")), error = function(e) 1001) > 1000) {
|
||||
# return(base::invisible())
|
||||
# }
|
||||
if (is.null(mo_hist)) {
|
||||
if (is.null(mo_hist) & interactive()) {
|
||||
message(blue(paste0("NOTE: results are saved to ", mo_history_file(), ".")))
|
||||
}
|
||||
tryCatch(write.csv(rbind(mo_hist,
|
||||
|
BIN
R/sysdata.rda
BIN
R/sysdata.rda
Binary file not shown.
11
R/zzz.R
11
R/zzz.R
@ -28,7 +28,7 @@
|
||||
microorganisms.oldDT <- as.data.table(AMR::microorganisms.old)
|
||||
# for fullname_lower: keep only dots, letters, numbers, slashes, spaces and dashes
|
||||
microorganisms.oldDT$fullname_lower <- gsub("[^.a-z0-9/ \\-]+", "", tolower(microorganisms.oldDT$fullname))
|
||||
setkey(microorganisms.oldDT, col_id, fullname)
|
||||
setkey(microorganisms.oldDT, prevalence, fullname)
|
||||
|
||||
assign(x = "microorganismsDT",
|
||||
value = make_DT(),
|
||||
@ -81,12 +81,17 @@
|
||||
|
||||
#' @importFrom data.table as.data.table setkey
|
||||
make_DT <- function() {
|
||||
microorganismsDT <- as.data.table(AMR::microorganisms)
|
||||
microorganismsDT <- as.data.table(AMR::microorganisms %>%
|
||||
mutate(kingdom_index = case_when(kingdom == "Bacteria" ~ 1,
|
||||
kingdom == "Fungi" ~ 2,
|
||||
kingdom == "Protozoa" ~ 3,
|
||||
kingdom == "Archaea" ~ 4,
|
||||
TRUE ~ 6)))
|
||||
# for fullname_lower: keep only dots, letters, numbers, slashes, spaces and dashes
|
||||
microorganismsDT$fullname_lower <- gsub("[^.a-z0-9/ \\-]+", "", tolower(microorganismsDT$fullname))
|
||||
setkey(microorganismsDT,
|
||||
prevalence,
|
||||
kingdom,
|
||||
kingdom_index,
|
||||
fullname)
|
||||
microorganismsDT
|
||||
}
|
||||
|
Reference in New Issue
Block a user