1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-08 16:02:02 +02:00

as.mo improvements

This commit is contained in:
2019-02-23 18:08:28 +01:00
parent 1a6314769b
commit f16a152d06
11 changed files with 388 additions and 346 deletions

16
R/mo.R
View File

@ -210,6 +210,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
uncertainties <- character(0)
failures <- character(0)
x_input <- x
# already strip leading and trailing spaces
x <- trimws(x, which = "both")
# only check the uniques, which is way faster
x <- unique(x)
@ -218,6 +219,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
# conversion of old MO codes from v0.5.0 (ITIS) to later versions (Catalogue of Life)
if (any(x %like% "^[BFP]_[A-Z]{3,7}")) {
print("is any")
leftpart <- gsub("^([BFP]_[A-Z]{3,7}).*", "\\1", x)
if (any(leftpart %in% names(mo_codes_v0.5.0))) {
rightpart <- gsub("^[BFP]_[A-Z]{3,7}(.*)", "\\1", x)
@ -241,8 +243,8 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
)
}
# all empty
if (all(identical(trimws(x_input), "") | is.na(x_input))) {
# all empty
if (property == "mo") {
return(structure(rep(NA_character_, length(x_input)), class = "mo"))
} else {
@ -264,6 +266,11 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
# existing mo codes when not looking for property "mo", like mo_genus("B_ESCHR_COL")
x <- microorganismsDT[data.table(mo = x), on = "mo", ..property][[1]]
} else if (all(x %in% microorganismsDT[prevalence == 1, "fullname"][[1]])) {
# we need special treatment for prevalent full names, they are likely!
# e.g. as.mo("Staphylococcus aureus")
x <- microorganismsDT[prevalence == 1][data.table(fullname = x), on = "fullname", ..property][[1]]
} else if (all(toupper(x) %in% microorganisms.codes[, "code"])) {
# commonly used MO codes
y <- as.data.table(microorganisms.codes)[data.table(code = toupper(x)), on = "code", ]
@ -271,7 +278,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
} else if (!all(x %in% microorganismsDT[[property]])) {
x_backup <- x # trimws(x, which = "both")
x_backup <- x
# remove spp and species
x <- trimws(gsub(" +(spp.?|ssp.?|sp.? |ss ?.?|subsp.?|subspecies|biovar |serovar |species)", " ", x_backup, ignore.case = TRUE), which = "both")
@ -517,11 +524,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
return(found[1L])
}
# found <- data_to_check[mo == toupper(a.x_backup), ..property][[1]]
# # is a valid mo
# if (length(found) > 0) {
# return(found[1L])
# }
found <- data_to_check[tolower(fullname) == tolower(c.x_trimmed_without_group), ..property][[1]]
if (length(found) > 0) {
return(found[1L])