(v0.7.1.9038) mo algorithm inprovements

This commit is contained in:
dr. M.S. (Matthijs) Berends 2019-08-12 19:07:15 +02:00
parent c44c5e3913
commit 5f2733349e
10 changed files with 226 additions and 130 deletions

View File

@ -1,5 +1,5 @@
Package: AMR
Version: 0.7.1.9037
Version: 0.7.1.9038
Date: 2019-08-12
Title: Antimicrobial Resistance Analysis
Authors@R: c(

View File

@ -1,4 +1,4 @@
# AMR 0.7.1.9037
# AMR 0.7.1.9038
### Breaking
* Function `freq()` has moved to a new package, [`clean`](https://github.com/msberends/clean) ([CRAN link](https://cran.r-project.org/package=clean)). Creating frequency tables is actually not the scope of this package (never was) and this function has matured a lot over the last two years. Therefore, a new package was created for data cleaning and checking and it perfectly fits the `freq()` function. The [`clean`](https://github.com/msberends/clean) package is available on CRAN and will be installed automatically when updating the `AMR` package, that now imports it. In a later stage, the `skewness()` and `kurtosis()` functions will be moved to the `clean` package too.

328
R/mo.R
View File

@ -193,19 +193,19 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
require("AMR")
# check onLoad() in R/zzz.R: data tables are created there.
}
# WHONET: xxx = no growth
x[tolower(as.character(paste0(x, ""))) %in% c("", "xxx", "na", "nan")] <- NA_character_
uncertainty_level <- translate_allow_uncertain(allow_uncertain)
# mo_hist <- get_mo_history(x, uncertainty_level, force = isTRUE(list(...)$force_mo_history))
if (mo_source_isvalid(reference_df)
& isFALSE(Becker)
& isFALSE(Lancefield)
& !is.null(reference_df)
& all(x %in% reference_df[,1][[1]])) {
# has valid own reference_df
# (data.table not faster here)
reference_df <- reference_df %>% filter(!is.na(mo))
@ -225,18 +225,18 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
left_join(reference_df, by = "x") %>%
pull("mo")
)
} else if (all(x %in% AMR::microorganisms$mo)
& isFALSE(Becker)
& isFALSE(Lancefield)) {
y <- x
# } else if (!any(is.na(mo_hist))
# & isFALSE(Becker)
# & isFALSE(Lancefield)) {
# # check previously found results
# y <- mo_hist
} else if (all(tolower(x) %in% microorganismsDT$fullname_lower)
& isFALSE(Becker)
& isFALSE(Lancefield)) {
@ -257,7 +257,7 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
}
# save them to history
set_mo_history(x, y, 0, force = isTRUE(list(...)$force_mo_history))
} else {
# will be checked for mo class in validation and uses exec_as.mo internally if necessary
y <- mo_validate(x = x, property = "mo",
@ -266,8 +266,8 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
force_mo_history = isTRUE(list(...)$force_mo_history),
...)
}
to_class_mo(y)
}
@ -286,6 +286,7 @@ is.mo <- function(x) {
#' @importFrom crayon magenta red blue silver italic
# param property a column name of AMR::microorganisms
# param initial_search logical - is FALSE when coming from uncertain tries, which uses exec_as.mo internally too
# param dyslexia_mode logical - also check for characters that resemble others
# param force_mo_history logical - whether found result must be saved with set_mo_history (default FALSE on non-interactive sessions)
# param debug logical - show different lookup texts while searching
exec_as.mo <- function(x,
@ -295,23 +296,24 @@ exec_as.mo <- function(x,
reference_df = get_mo_source(),
property = "mo",
initial_search = TRUE,
dyslexia_mode = FALSE,
force_mo_history = FALSE,
debug = FALSE) {
if (!"AMR" %in% base::.packages()) {
require("AMR")
# check onLoad() in R/zzz.R: data tables are created there.
}
# WHONET: xxx = no growth
x[tolower(as.character(paste0(x, ""))) %in% c("", "xxx", "na", "nan")] <- NA_character_
if (initial_search == TRUE) {
options(mo_failures = NULL)
options(mo_uncertainties = NULL)
options(mo_renamed = NULL)
}
if (NCOL(x) == 2) {
# support tidyverse selection like: df %>% select(colA, colB)
# paste these columns together
@ -325,20 +327,20 @@ exec_as.mo <- function(x,
stop('`x` can be 2 columns at most', call. = FALSE)
}
x[is.null(x)] <- NA
# support tidyverse selection like: df %>% select(colA)
if (!is.vector(x) & !is.null(dim(x))) {
x <- pull(x, 1)
}
}
notes <- character(0)
uncertainties <- data.frame(input = character(0),
fullname = character(0),
mo = character(0))
failures <- character(0)
uncertainty_level <- translate_allow_uncertain(allow_uncertain)
x_input <- x
# already strip leading and trailing spaces
x <- trimws(x, which = "both")
@ -350,7 +352,7 @@ exec_as.mo <- function(x,
& !is.null(x)
& !identical(x, "")
& !identical(x, "xxx")]
# conversion of old MO codes from v0.5.0 (ITIS) to later versions (Catalogue of Life)
if (any(x %like% "^[BFP]_[A-Z]{3,7}") & !all(x %in% microorganisms$mo)) {
leftpart <- gsub("^([BFP]_[A-Z]{3,7}).*", "\\1", x)
@ -372,7 +374,7 @@ exec_as.mo <- function(x,
pull(new)
}
}
# defined df to check for
if (!is.null(reference_df)) {
if (!mo_source_isvalid(reference_df)) {
@ -391,7 +393,7 @@ exec_as.mo <- function(x,
reference_df[] <- lapply(reference_df, as.character)
)
}
# all empty
if (all(identical(trimws(x_input), "") | is.na(x_input) | length(x) == 0)) {
if (property == "mo") {
@ -399,7 +401,7 @@ exec_as.mo <- function(x,
} else {
return(rep(NA_character_, length(x_input)))
}
} else if (all(x %in% reference_df[, 1][[1]])) {
# all in reference df
colnames(reference_df)[1] <- "x"
@ -409,7 +411,7 @@ exec_as.mo <- function(x,
left_join(AMR::microorganisms, by = "mo") %>%
pull(property)
)
} else if (all(x %in% AMR::microorganisms$mo)) {
# existing mo codes when not looking for property "mo", like mo_genus("B_ESCHR_COL")
y <- microorganismsDT[prevalence == 1][data.table(mo = x), on = "mo", ..property][[1]]
@ -424,7 +426,7 @@ exec_as.mo <- function(x,
..property][[1]]
}
x <- y
} else if (all(x %in% read_mo_history(uncertainty_level,
force = force_mo_history)$x)) {
# previously found code
@ -432,7 +434,7 @@ exec_as.mo <- function(x,
uncertainty_level,
force = force_mo_history)),
on = "mo", ..property][[1]]
} else if (all(tolower(x) %in% microorganismsDT$fullname_lower)) {
# we need special treatment for very prevalent full names, they are likely!
# e.g. as.mo("Staphylococcus aureus")
@ -448,30 +450,30 @@ exec_as.mo <- function(x,
..property][[1]]
}
x <- y
} else if (all(toupper(x) %in% AMR::microorganisms.codes$code)) {
# commonly used MO codes
y <- as.data.table(AMR::microorganisms.codes)[data.table(code = toupper(x)), on = "code", ]
# save them to history
set_mo_history(x, y$mo, 0, force = force_mo_history)
x <- microorganismsDT[data.table(mo = y[["mo"]]), on = "mo", ..property][[1]]
} else if (!all(x %in% AMR::microorganisms[, property])) {
strip_whitespace <- function(x) {
# all whitespaces (tab, new lines, etc.) should be one space
# and spaces before and after should be omitted
trimws(gsub("[\\s]+", " ", x, perl = TRUE), which = "both")
}
x <- strip_whitespace(x)
x_backup <- x
# remove spp and species
x <- gsub(" +(spp.?|ssp.?|sp.? |ss ?.?|subsp.?|subspecies|biovar |serovar |species)", " ", x_backup, ignore.case = TRUE)
x <- strip_whitespace(x)
x_backup_without_spp <- x
x_species <- paste(x, "species")
# translate to English for supported languages of mo_property
@ -490,7 +492,7 @@ exec_as.mo <- function(x,
# remove genus as first word
x <- gsub("^Genus ", "", x)
# allow characters that resemble others ----
if (initial_search == FALSE) {
if (dyslexia_mode == TRUE) {
x <- tolower(x)
x <- gsub("[iy]+", "[iy]+", x)
x <- gsub("(c|k|q|qu|s|z|x|ks)+", "(c|k|q|qu|s|z|x|ks)+", x)
@ -512,7 +514,7 @@ exec_as.mo <- function(x,
x <- gsub("e\\+n(?![a-z[])", "(e+n|u+(c|k|q|qu|s|z|x|ks)+)", x, ignore.case = TRUE, perl = TRUE)
}
x <- strip_whitespace(x)
x_trimmed <- x
x_trimmed_species <- paste(x_trimmed, "species")
x_trimmed_without_group <- gsub(" gro.u.p$", "", x_trimmed, ignore.case = TRUE)
@ -526,7 +528,7 @@ exec_as.mo <- function(x,
x_withspaces_start_only <- paste0('^', x_withspaces)
x_withspaces_end_only <- paste0(x_withspaces, '$')
x_withspaces_start_end <- paste0('^', x_withspaces, '$')
if (isTRUE(debug)) {
cat(paste0('x "', x, '"\n'))
cat(paste0('x_species "', x_species, '"\n'))
@ -539,13 +541,13 @@ exec_as.mo <- function(x,
cat(paste0('x_trimmed_species "', x_trimmed_species, '"\n'))
cat(paste0('x_trimmed_without_group "', x_trimmed_without_group, '"\n'))
}
progress <- progress_estimated(n = length(x), min_time = 3)
for (i in 1:length(x)) {
progress$tick()$print()
if (initial_search == TRUE) {
found <- microorganismsDT[mo == get_mo_history(x_backup[i],
uncertainty_level,
@ -557,14 +559,14 @@ exec_as.mo <- function(x,
next
}
}
found <- microorganismsDT[mo == toupper(x_backup[i]), ..property][[1]]
# is a valid MO code
if (length(found) > 0) {
x[i] <- found[1L]
next
}
found <- microorganismsDT[fullname_lower %in% tolower(c(x_backup[i], x_backup_without_spp[i])), ..property][[1]]
# most probable: is exact match in fullname
if (length(found) > 0) {
@ -574,7 +576,7 @@ exec_as.mo <- function(x,
}
next
}
found <- microorganismsDT[col_id == x_backup[i], ..property][[1]]
# is a valid Catalogue of Life ID
if (NROW(found) > 0) {
@ -584,14 +586,14 @@ exec_as.mo <- function(x,
}
next
}
# WHONET: xxx = no growth
if (tolower(as.character(paste0(x_backup_without_spp[i], ""))) %in% c("", "xxx", "na", "nan")) {
x[i] <- NA_character_
next
}
if (tolower(x_backup_without_spp[i]) %in% c("other", "none", "unknown")) {
# empty and nonsense values, ignore without warning
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
@ -600,7 +602,7 @@ exec_as.mo <- function(x,
}
next
}
# check for very small input, but ignore the O antigens of E. coli
if (nchar(gsub("[^a-zA-Z]", "", x_trimmed[i])) < 3
& !x_backup_without_spp[i] %like% "O?(26|103|104|104|111|121|145|157)") {
@ -629,7 +631,7 @@ exec_as.mo <- function(x,
}
next
}
if (x_backup_without_spp[i] %like% "virus") {
# there is no fullname like virus, so don't try to coerce it
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
@ -639,7 +641,7 @@ exec_as.mo <- function(x,
}
next
}
# translate known trivial abbreviations to genus + species ----
if (!is.na(x_trimmed[i])) {
if (toupper(x_backup_without_spp[i]) %in% c('MRSA', 'MSSA', 'VISA', 'VRSA')) {
@ -830,7 +832,7 @@ exec_as.mo <- function(x,
next
}
}
# FIRST TRY FULLNAMES AND CODES ----
# if only genus is available, return only genus
if (all(!c(x[i], x_trimmed[i]) %like% " ")) {
@ -854,7 +856,7 @@ exec_as.mo <- function(x,
}
# rest of genus only is in allow_uncertain part.
}
# TRY OTHER SOURCES ----
# WHONET and other common LIS codes
if (toupper(x_backup[i]) %in% AMR::microorganisms.codes[, 1]) {
@ -879,7 +881,7 @@ exec_as.mo <- function(x,
}
}
}
# allow no codes less than 4 characters long, was already checked for WHONET above
if (nchar(x_backup_without_spp[i]) < 4) {
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
@ -889,7 +891,7 @@ exec_as.mo <- function(x,
}
next
}
check_per_prevalence <- function(data_to_check,
a.x_backup,
b.x_trimmed,
@ -898,19 +900,19 @@ exec_as.mo <- function(x,
e.x_withspaces_start_only,
f.x_withspaces_end_only,
g.x_backup_without_spp) {
# try probable: trimmed version of fullname ----
found <- data_to_check[fullname_lower %in% tolower(g.x_backup_without_spp), ..property][[1]]
if (length(found) > 0) {
return(found[1L])
}
# try any match keeping spaces ----
found <- data_to_check[fullname %like% d.x_withspaces_start_end, ..property][[1]]
if (length(found) > 0 & nchar(g.x_backup_without_spp) >= 6) {
return(found[1L])
}
# try any match keeping spaces, not ending with $ ----
found <- data_to_check[fullname %like% paste0(trimws(e.x_withspaces_start_only), " "), ..property][[1]]
if (length(found) > 0) {
@ -920,21 +922,21 @@ exec_as.mo <- function(x,
if (length(found) > 0 & nchar(g.x_backup_without_spp) >= 6) {
return(found[1L])
}
# try any match keeping spaces, not start with ^ ----
found <- data_to_check[fullname %like% paste0(" ", trimws(f.x_withspaces_end_only)), ..property][[1]]
if (length(found) > 0) {
return(found[1L])
}
# try a trimmed version
found <- data_to_check[fullname_lower %like% b.x_trimmed
| fullname_lower %like% c.x_trimmed_without_group, ..property][[1]]
if (length(found) > 0 & nchar(g.x_backup_without_spp) >= 6) {
return(found[1L])
}
# try splitting of characters in the middle and then find ID ----
# only when text length is 6 or lower
# like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus
@ -949,18 +951,18 @@ exec_as.mo <- function(x,
return(found[1L])
}
}
# try fullname without start and without nchar limit of >= 6 ----
# like "K. pneu rhino" >> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH
found <- data_to_check[fullname %like% e.x_withspaces_start_only, ..property][[1]]
if (length(found) > 0) {
return(found[1L])
}
# didn't found any
return(NA_character_)
}
# FIRST TRY VERY PREVALENT IN HUMAN INFECTIONS ----
x[i] <- check_per_prevalence(data_to_check = microorganismsDT[prevalence == 1],
a.x_backup = x_backup[i],
@ -1006,9 +1008,9 @@ exec_as.mo <- function(x,
}
next
}
# MISCELLANEOUS ----
# look for old taxonomic names ----
found <- microorganisms.oldDT[fullname_lower == tolower(x_backup[i])
| fullname %like% x_withspaces_start_end[i],]
@ -1032,7 +1034,7 @@ exec_as.mo <- function(x,
}
next
}
# check for uncertain results ----
uncertain_fn <- function(a.x_backup,
b.x_trimmed,
@ -1040,17 +1042,22 @@ exec_as.mo <- function(x,
d.x_withspaces_start_only,
f.x_withspaces_end_only,
g.x_backup_without_spp) {
if (uncertainty_level == 0) {
# do not allow uncertainties
return(NA_character_)
}
if (uncertainty_level >= 1) {
now_checks_for_uncertainty_level <- 1
# (1) look again for old taxonomic names, now for G. species ----
if (isTRUE(debug)) {
cat("\n[UNCERTAINLY LEVEL 1] (1) look again for old taxonomic names, now for G. species\n")
}
if (isTRUE(debug)) {
message("Running '", c.x_withspaces_start_end, "' and '", d.x_withspaces_start_only, "'")
}
found <- microorganisms.oldDT[fullname %like% c.x_withspaces_start_end
| fullname %like% d.x_withspaces_start_only]
if (NROW(found) > 0 & nchar(g.x_backup_without_spp) >= 6) {
@ -1068,7 +1075,7 @@ exec_as.mo <- function(x,
ref_new = microorganismsDT[col_id == found[1, col_id_new], ref],
mo = microorganismsDT[col_id == found[1, col_id_new], mo])
uncertainties <<- rbind(uncertainties,
data.frame(uncertainty = 1,
data.frame(uncertainty = now_checks_for_uncertainty_level,
input = a.x_backup,
fullname = found[1, fullname],
mo = paste("CoL", found[1, col_id])))
@ -1077,18 +1084,26 @@ exec_as.mo <- function(x,
}
return(x)
}
# (2) Try with misspelled input ----
# just rerun with initial_search = FALSE will used the extensive regex part above
# just rerun with dyslexia_mode = TRUE will used the extensive regex part above
if (isTRUE(debug)) {
cat("\n[UNCERTAINLY LEVEL 1] (2) Try with misspelled input\n")
}
found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup, initial_search = FALSE, allow_uncertain = FALSE, debug = debug)))
if (isTRUE(debug)) {
message("Running '", a.x_backup, "'")
}
# first try without dyslexia mode
found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug)))
if (empty_result(found)) {
# then with dyslexia mode
found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug)))
}
if (!empty_result(found)) {
found_result <- found
found <- microorganismsDT[mo == found, ..property][[1]]
uncertainties <<- rbind(uncertainties,
data.frame(uncertainty = 1,
data.frame(uncertainty = now_checks_for_uncertainty_level,
input = a.x_backup,
fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
mo = found_result[1L]))
@ -1098,21 +1113,25 @@ exec_as.mo <- function(x,
return(found[1L])
}
}
if (uncertainty_level >= 2) {
now_checks_for_uncertainty_level <- 2
# (3) look for genus only, part of name ----
if (isTRUE(debug)) {
cat("\n[UNCERTAINLY LEVEL 2] (3) look for genus only, part of name\n")
}
if (nchar(g.x_backup_without_spp) > 4 & !b.x_trimmed %like% " ") {
if (!grepl("^[A-Z][a-z]+", b.x_trimmed, ignore.case = FALSE)) {
if (isTRUE(debug)) {
message("Running '", paste(b.x_trimmed, "species"), "'")
}
# not when input is like Genustext, because then Neospora would lead to Actinokineospora
found <- microorganismsDT[fullname_lower %like% paste(b.x_trimmed, "species"), ..property][[1]]
if (length(found) > 0) {
x[i] <- found[1L]
uncertainties <<- rbind(uncertainties,
data.frame(uncertainty = 2,
data.frame(uncertainty = now_checks_for_uncertainty_level,
input = a.x_backup,
fullname = microorganismsDT[mo == found[1L], fullname][[1]],
mo = found[1L]))
@ -1123,19 +1142,27 @@ exec_as.mo <- function(x,
}
}
}
# (4) strip values between brackets ----
if (isTRUE(debug)) {
cat("\n[UNCERTAINLY LEVEL 2] (4) strip values between brackets\n")
}
a.x_backup_stripped <- gsub("( *[(].*[)] *)", " ", a.x_backup)
a.x_backup_stripped <- trimws(gsub(" +", " ", a.x_backup_stripped))
found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup_stripped, initial_search = FALSE, allow_uncertain = FALSE, debug = debug)))
if (isTRUE(debug)) {
message("Running '", a.x_backup_stripped, "'")
}
# first try without dyslexia mode
found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup_stripped, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug)))
if (empty_result(found)) {
# then with dyslexia mode
found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup_stripped, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug)))
}
if (!empty_result(found) & nchar(g.x_backup_without_spp) >= 6) {
found_result <- found
found <- microorganismsDT[mo == found, ..property][[1]]
uncertainties <<- rbind(uncertainties,
data.frame(uncertainty = 2,
data.frame(uncertainty = now_checks_for_uncertainty_level,
input = a.x_backup,
fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
mo = found_result[1L]))
@ -1144,7 +1171,7 @@ exec_as.mo <- function(x,
}
return(found[1L])
}
# (5a) try to strip off half an element from end and check the remains ----
if (isTRUE(debug)) {
cat("\n[UNCERTAINLY LEVEL 2] (5a) try to strip off half an element from end and check the remains\n")
@ -1156,13 +1183,21 @@ exec_as.mo <- function(x,
lastword_half <- substr(lastword, 1, as.integer(nchar(lastword) / 2))
# remove last half of the second term
x_strip_collapsed <- paste(c(x_strip[1:(length(x_strip) - i)], lastword_half), collapse = " ")
if (nchar(x_strip_collapsed) >= 4) {
found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, allow_uncertain = FALSE, debug = debug)))
if (nchar(x_strip_collapsed) >= 4 & nchar(lastword_half) > 2) {
if (isTRUE(debug)) {
message("Running '", x_strip_collapsed, "'")
}
# first try without dyslexia mode
found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug)))
if (empty_result(found)) {
# then with dyslexia mode
found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug)))
}
if (!empty_result(found)) {
found_result <- found
found <- microorganismsDT[mo == found, ..property][[1]]
uncertainties <<- rbind(uncertainties,
data.frame(uncertainty = 2,
data.frame(uncertainty = now_checks_for_uncertainty_level,
input = a.x_backup,
fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
mo = found_result[1L]))
@ -1181,13 +1216,21 @@ exec_as.mo <- function(x,
if (length(x_strip) > 1) {
for (i in 1:(length(x_strip) - 1)) {
x_strip_collapsed <- paste(x_strip[1:(length(x_strip) - i)], collapse = " ")
if (nchar(x_strip_collapsed) >= 4) {
found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, allow_uncertain = FALSE, debug = debug)))
if (nchar(x_strip_collapsed) >= 6) {
if (isTRUE(debug)) {
message("Running '", x_strip_collapsed, "'")
}
# first try without dyslexia mode
found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug)))
if (empty_result(found)) {
# then with dyslexia mode
found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug)))
}
if (!empty_result(found)) {
found_result <- found
found <- microorganismsDT[mo == found, ..property][[1]]
uncertainties <<- rbind(uncertainties,
data.frame(uncertainty = 2,
data.frame(uncertainty = now_checks_for_uncertainty_level,
input = a.x_backup,
fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
mo = found_result[1L]))
@ -1208,7 +1251,7 @@ exec_as.mo <- function(x,
found_result <- found
found <- microorganismsDT[mo == found, ..property][[1]]
uncertainties <<- rbind(uncertainties,
data.frame(uncertainty = 2,
data.frame(uncertainty = now_checks_for_uncertainty_level,
input = a.x_backup,
fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
mo = found_result[1L]))
@ -1222,7 +1265,7 @@ exec_as.mo <- function(x,
found_result <- found
found <- microorganismsDT[mo == found, ..property][[1]]
uncertainties <<- rbind(uncertainties,
data.frame(uncertainty = 2,
data.frame(uncertainty = now_checks_for_uncertainty_level,
input = a.x_backup,
fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
mo = found_result[1L]))
@ -1239,14 +1282,22 @@ exec_as.mo <- function(x,
if (length(x_strip) > 1 & nchar(g.x_backup_without_spp) >= 6) {
for (i in 2:(length(x_strip))) {
x_strip_collapsed <- paste(x_strip[i:length(x_strip)], collapse = " ")
found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, allow_uncertain = FALSE, debug = debug)))
if (isTRUE(debug)) {
message("Running '", x_strip_collapsed, "'")
}
# first try without dyslexia mode
found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug)))
if (empty_result(found)) {
# then with dyslexia mode
found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug)))
}
if (!empty_result(found)) {
found_result <- found
found <- microorganismsDT[mo == found_result[1L], ..property][[1]]
# uncertainty level 2 only if searched part contains a space (otherwise it will be found with lvl 3)
if (x_strip_collapsed %like% " ") {
uncertainties <<- rbind(uncertainties,
data.frame(uncertainty = 2,
data.frame(uncertainty = now_checks_for_uncertainty_level,
input = a.x_backup,
fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
mo = found_result[1L]))
@ -1259,22 +1310,32 @@ exec_as.mo <- function(x,
}
}
}
if (uncertainty_level >= 3) {
# (7) try to strip off one element from start and check the remains ----
now_checks_for_uncertainty_level <- 3
# (7a) try to strip off one element from start and check the remains (any text size) ----
if (isTRUE(debug)) {
cat("\n[UNCERTAINLY LEVEL 3] (7) try to strip off one element from start and check the remains\n")
cat("\n[UNCERTAINLY LEVEL 3] (7a) try to strip off one element from start and check the remains (any text size)\n")
}
x_strip <- a.x_backup %>% strsplit(" ") %>% unlist()
if (length(x_strip) > 1 & nchar(g.x_backup_without_spp) >= 6) {
for (i in 2:(length(x_strip))) {
x_strip_collapsed <- paste(x_strip[i:length(x_strip)], collapse = " ")
found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, allow_uncertain = FALSE, debug = debug)))
if (isTRUE(debug)) {
message("Running '", x_strip_collapsed, "'")
}
# first try without dyslexia mode
found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug)))
if (empty_result(found)) {
# then with dyslexia mode
found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug)))
}
if (!empty_result(found)) {
found_result <- found
found <- microorganismsDT[mo == found, ..property][[1]]
uncertainties <<- rbind(uncertainties,
data.frame(uncertainty = 3,
data.frame(uncertainty = now_checks_for_uncertainty_level,
input = a.x_backup,
fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
mo = found_result[1L]))
@ -1285,18 +1346,53 @@ exec_as.mo <- function(x,
}
}
}
# (7b) try to strip off one element from end and check the remains (any text size) ----
# (this is in fact 5b but without nchar limit of >=6)
if (isTRUE(debug)) {
cat("\n[UNCERTAINLY LEVEL 3] (7b) try to strip off one element from end and check the remains (any text size)\n")
}
if (length(x_strip) > 1) {
for (i in 1:(length(x_strip) - 1)) {
x_strip_collapsed <- paste(x_strip[1:(length(x_strip) - i)], collapse = " ")
if (isTRUE(debug)) {
message("Running '", x_strip_collapsed, "'")
}
# first try without dyslexia mode
found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug)))
if (empty_result(found)) {
# then with dyslexia mode
found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug)))
}
if (!empty_result(found)) {
found_result <- found
found <- microorganismsDT[mo == found, ..property][[1]]
uncertainties <<- rbind(uncertainties,
data.frame(uncertainty = now_checks_for_uncertainty_level,
input = a.x_backup,
fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
mo = found_result[1L]))
if (initial_search == TRUE) {
set_mo_history(a.x_backup, get_mo_code(found[1L], property), 2, force = force_mo_history)
}
return(found[1L])
}
}
}
# (8) part of a name (very unlikely match) ----
if (isTRUE(debug)) {
cat("\n[UNCERTAINLY LEVEL 3] (8) part of a name (very unlikely match)\n")
}
if (isTRUE(debug)) {
message("Running '", f.x_withspaces_end_only, "'")
}
found <- microorganismsDT[fullname %like% f.x_withspaces_end_only]
if (nrow(found) > 0) {
found_result <- found[["mo"]]
if (!empty_result(found_result) & nchar(g.x_backup_without_spp) >= 6) {
found <- microorganismsDT[mo == found_result[1L], ..property][[1]]
uncertainties <<- rbind(uncertainties,
data.frame(uncertainty = 3,
data.frame(uncertainty = now_checks_for_uncertainty_level,
input = a.x_backup,
fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
mo = found_result[1L]))
@ -1307,7 +1403,7 @@ exec_as.mo <- function(x,
}
}
}
# didn't found in uncertain results too
return(NA_character_)
}
@ -1321,7 +1417,7 @@ exec_as.mo <- function(x,
# no set_mo_history here - it is already set in uncertain_fn()
next
}
# no results found: make them UNKNOWN ----
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
if (initial_search == TRUE) {
@ -1330,7 +1426,7 @@ exec_as.mo <- function(x,
}
}
}
# handling failures ----
failures <- failures[!failures %in% c(NA, NULL, NaN)]
if (length(failures) > 0 & initial_search == TRUE) {
@ -1355,7 +1451,7 @@ exec_as.mo <- function(x,
# handling uncertainties ----
if (NROW(uncertainties) > 0 & initial_search == TRUE) {
options(mo_uncertainties = as.list(distinct(uncertainties, input, .keep_all = TRUE)))
plural <- c("", "it")
if (NROW(uncertainties) > 1) {
plural <- c("s", "them")
@ -1366,7 +1462,7 @@ exec_as.mo <- function(x,
call. = FALSE,
immediate. = TRUE) # thus will always be shown, even if >= warnings
}
# Becker ----
if (Becker == TRUE | Becker == "all") {
# See Source. It's this figure:
@ -1391,11 +1487,11 @@ exec_as.mo <- function(x,
"pseudintermedius", "pseudointermedius",
"schweitzeri", "argenteus")
| (species == "schleiferi" & subspecies == "coagulans"), ..property][[1]]
# warn when species found that are not in Becker (2014, PMID 25278577) and Becker (2019, PMID 30872103)
post_Becker <- c("argensis", "caeli", "cornubiensis", "edaphicus")
if (any(x %in% MOs_staph[species %in% post_Becker, ..property][[1]])) {
warning("Becker ", italic("et al."), " (2014, 2019) does not contain these species named after their publication: ",
italic(paste("S.",
sort(mo_species(unique(x[x %in% MOs_staph[species %in% post_Becker, ..property][[1]]]))),
@ -1404,14 +1500,14 @@ exec_as.mo <- function(x,
call. = FALSE,
immediate. = TRUE)
}
x[x %in% CoNS] <- microorganismsDT[mo == 'B_STPHY_CNS', ..property][[1]][1L]
x[x %in% CoPS] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L]
if (Becker == "all") {
x[x %in% microorganismsDT[mo %like% '^B_STPHY_AUR', ..property][[1]]] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L]
}
}
# Lancefield ----
if (Lancefield == TRUE | Lancefield == "all") {
# group A - S. pyogenes
@ -1435,37 +1531,37 @@ exec_as.mo <- function(x,
# group K - S. salivarius
x[x == microorganismsDT[mo == 'B_STRPT_SAL', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPT_GRK', ..property][[1]][1L]
}
# Wrap up ----------------------------------------------------------------
# comply to x, which is also unique and without empty values
x_input_unique_nonempty <- unique(x_input[!is.na(x_input)
& !is.null(x_input)
& !identical(x_input, "")
& !identical(x_input, "xxx")])
# left join the found results to the original input values (x_input)
df_found <- data.frame(input = as.character(x_input_unique_nonempty),
found = as.character(x),
stringsAsFactors = FALSE)
df_input <- data.frame(input = as.character(x_input),
stringsAsFactors = FALSE)
suppressWarnings(
x <- df_input %>%
left_join(df_found,
by = "input") %>%
pull(found)
)
if (property == "mo") {
x <- to_class_mo(x)
}
if (length(mo_renamed()) > 0) {
print(mo_renamed())
}
x
}
@ -1494,7 +1590,7 @@ was_renamed <- function(name_old, name_new, ref_old = "", ref_new = "", mo = "")
old_values <- gsub("et al.", italic("et al."), old_values)
new_values <- paste0(italic(name_new), ref_new, mo)
new_values <- gsub("et al.", italic("et al."), new_values)
names(new_values) <- old_values
total <- c(getOption("mo_renamed"), new_values)
options(mo_renamed = total[order(names(total))])
@ -1604,7 +1700,7 @@ print.mo_uncertainties <- function(x, ...) {
"\n(1 = ", green("renamed/misspelled"),
", 2 = ", yellow("uncertain"),
", 3 = ", red("very uncertain"), ")\n"))
msg <- ""
for (i in 1:nrow(x)) {
if (x[i, "uncertainty"] == 1) {
@ -1633,11 +1729,11 @@ mo_renamed <- function() {
if (is.null(items)) {
return(NULL)
}
items <- strip_style(items)
names(items) <- strip_style(names(items))
structure(.Data = items,
class = c("mo_renamed", "character"))
class = c("mo_renamed", "character"))
}
#' @exportMethod print.mo_renamed
@ -1666,7 +1762,7 @@ unregex <- function(x) {
get_mo_code <- function(x, property) {
# don't use right now
return(NULL)
if (property == "mo") {
unique(x)
} else {

View File

@ -78,7 +78,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9037</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9038</span>
</span>
</div>

View File

@ -78,7 +78,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9037</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9038</span>
</span>
</div>

View File

@ -78,7 +78,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9037</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9038</span>
</span>
</div>

View File

@ -42,7 +42,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9037</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9038</span>
</span>
</div>

View File

@ -78,7 +78,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9037</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9038</span>
</span>
</div>
@ -225,9 +225,9 @@
</div>
<div id="amr-0-7-1-9037" class="section level1">
<div id="amr-0-7-1-9038" class="section level1">
<h1 class="page-header">
<a href="#amr-0-7-1-9037" class="anchor"></a>AMR 0.7.1.9037<small> Unreleased </small>
<a href="#amr-0-7-1-9038" class="anchor"></a>AMR 0.7.1.9038<small> Unreleased </small>
</h1>
<div id="breaking" class="section level3">
<h3 class="hasAnchor">
@ -1236,7 +1236,7 @@ Using <code><a href="../reference/as.mo.html">as.mo(..., allow_uncertain = 3)</a
<div id="tocnav">
<h2>Contents</h2>
<ul class="nav nav-pills nav-stacked">
<li><a href="#amr-0-7-1-9037">0.7.1.9037</a></li>
<li><a href="#amr-0-7-1-9038">0.7.1.9038</a></li>
<li><a href="#amr-0-7-1">0.7.1</a></li>
<li><a href="#amr-0-7-0">0.7.0</a></li>
<li><a href="#amr-0-6-1">0.6.1</a></li>

View File

@ -78,7 +78,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9037</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9038</span>
</span>
</div>

View File

@ -197,8 +197,8 @@ test_that("as.mo works", {
print(mo_renamed())
# check uncertain names
expect_equal(suppressWarnings(as.character(as.mo("esco extra_text", allow_uncertain = FALSE))), "UNKNOWN")
expect_equal(suppressWarnings(as.character(as.mo("esco extra_text", allow_uncertain = TRUE))), "B_ESCHR_COL")
expect_equal(suppressWarnings(as.character(as.mo("staaur extratest", allow_uncertain = TRUE))), "B_STPHY_AUR")
expect_equal(suppressWarnings(as.character(as.mo("staaur extratest", allow_uncertain = FALSE))), "UNKNOWN")
expect_warning(as.mo("esco extra_text", allow_uncertain = TRUE))
expect_equal(suppressWarnings(as.character(as.mo("unexisting aureus", allow_uncertain = 3))), "B_STPHY_AUR")
expect_equal(suppressWarnings(as.character(as.mo("unexisting staphy", allow_uncertain = 3))), "B_STPHY")
@ -270,7 +270,7 @@ test_that("as.mo works", {
expect_equal(as.character(as.mo("F_CANDD_GLB")), "F_CANDD_GLA")
# debug mode
expect_output(print(suppressWarnings(as.mo("kshgcjkhsdgkshjdfsfvsdfv", debug = TRUE, allow_uncertain = 3))))
expect_output(print(suppressMessages(suppressWarnings(as.mo("kshgcjkhsdgkshjdfsfvsdfv", debug = TRUE, allow_uncertain = 3)))))
# ..coccus
expect_equal(as.character(as.mo(c("meningococ", "gonococ", "pneumococ"))),