1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-10 07:41:57 +02:00

as.mo improvement

This commit is contained in:
2019-02-23 16:02:31 +01:00
parent 41ad7a22c8
commit 1a6314769b
18 changed files with 292 additions and 294 deletions

27
R/mo.R
View File

@ -84,7 +84,6 @@
#' \itemize{
#' \item{\code{"Streptococcus group B (known as S. agalactiae)"}. The text between brackets will be removed and a warning will be thrown that the result \emph{Streptococcus group B} (\code{B_STRPT_GRB}) needs review.}
#' \item{\code{"S. aureus - please mind: MRSA"}. The last word will be stripped, after which the function will try to find a match. If it does not, the second last word will be stripped, etc. Again, a warning will be thrown that the result \emph{Staphylococcus aureus} (\code{B_STPHY_AUR}) needs review.}
#' \item{\code{"D. spartina"}. This is the abbreviation of an old taxonomic name: \emph{Didymosphaeria spartinae} (the last "e" was missing from the input). This fungus was renamed to \emph{Leptosphaeria obiones}, so a warning will be thrown that this result (\code{F_LPTSP_OBI}) needs review.}
#' \item{\code{"Fluoroquinolone-resistant Neisseria gonorrhoeae"}. The first word will be stripped, after which the function will try to find a match. A warning will be thrown that the result \emph{Neisseria gonorrhoeae} (\code{B_NESSR_GON}) needs review.}
#' }
#'
@ -156,6 +155,7 @@
#' mutate(mo = as.mo(paste(genus, species)))
#' }
as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE, reference_df = get_mo_source()) {
# will be checked for mo class in validation
mo <- mo_validate(x = x, property = "mo",
Becker = Becker, Lancefield = Lancefield,
allow_uncertain = allow_uncertain, reference_df = reference_df)
@ -170,7 +170,7 @@ is.mo <- function(x) {
#' @importFrom dplyr %>% pull left_join n_distinct progress_estimated filter
#' @importFrom data.table data.table as.data.table setkey
#' @importFrom crayon magenta red silver italic has_color
#' @importFrom crayon magenta red blue silver italic has_color
exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
allow_uncertain = TRUE, reference_df = get_mo_source(),
property = "mo", clear_options = TRUE) {
@ -210,12 +210,12 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
uncertainties <- character(0)
failures <- character(0)
x_input <- x
x <- trimws(x, which = "both")
# only check the uniques, which is way faster
x <- unique(x)
# remove empty values (to later fill them in again with NAs)
x <- x[!is.na(x) & !is.null(x) & !identical(x, "")]
# conversion of old MO codes from v0.5.0 (ITIS) to later versions (Catalogue of Life)
if (any(x %like% "^[BFP]_[A-Z]{3,7}")) {
leftpart <- gsub("^([BFP]_[A-Z]{3,7}).*", "\\1", x)
@ -271,7 +271,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
} else if (!all(x %in% microorganismsDT[[property]])) {
x_backup <- trimws(x, which = "both")
x_backup <- x # trimws(x, which = "both")
# remove spp and species
x <- trimws(gsub(" +(spp.?|ssp.?|sp.? |ss ?.?|subsp.?|subspecies|biovar |serovar |species)", " ", x_backup, ignore.case = TRUE), which = "both")
@ -323,6 +323,13 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
progress$tick()$print()
found <- microorganismsDT[mo == toupper(x_backup[i]), ..property][[1]]
# is a valid MO code
if (length(found) > 0) {
x[i] <- found[1L]
next
}
if (tolower(x_trimmed[i]) %in% c("", "xxx", "other", "none", "unknown")) {
# empty and nonsense values, ignore without warning ("xxx" is WHONET code for 'no growth')
x[i] <- NA_character_
@ -510,11 +517,11 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
return(found[1L])
}
found <- data_to_check[mo == toupper(a.x_backup), ..property][[1]]
# is a valid mo
if (length(found) > 0) {
return(found[1L])
}
# found <- data_to_check[mo == toupper(a.x_backup), ..property][[1]]
# # is a valid mo
# if (length(found) > 0) {
# return(found[1L])
# }
found <- data_to_check[tolower(fullname) == tolower(c.x_trimmed_without_group), ..property][[1]]
if (length(found) > 0) {
return(found[1L])
@ -872,7 +879,7 @@ TEMPORARY_TAXONOMY <- function(x) {
x
}
#' @importFrom crayon blue italic
#' @importFrom crayon italic
was_renamed <- function(name_old, name_new, ref_old = "", ref_new = "", mo = "") {
if (!is.na(ref_old)) {
ref_old <- paste0(" (", ref_old, ")")