1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-08 23:21:56 +02:00

(v0.9.0.9005) as.mo for G. species

This commit is contained in:
2019-12-21 10:56:06 +01:00
parent cc8cd043e1
commit ba3ce77f02
21 changed files with 501 additions and 361 deletions

View File

@ -21,12 +21,12 @@
#' Class 'disk'
#'
#' This transforms a vector to a new class [`disk`], which is a growth zone size (around an antibiotic disk) in millimeters between 6 and 99.
#' This transforms a vector to a new class [`disk`], which is a growth zone size (around an antibiotic disk) in millimeters between 6 and 50.
#' @rdname as.disk
#' @param x vector
#' @param na.rm a logical indicating whether missing values should be removed
#' @details Interpret disk values as RSI values with [as.rsi()]. It supports guidelines from EUCAST and CLSI.
#' @return Ordered integer factor with new class [`disk`]
#' @return An [`integer`] with additional new class [`disk`]
#' @aliases disk
#' @export
#' @seealso [as.rsi()]
@ -56,8 +56,8 @@ as.disk <- function(x, na.rm = FALSE) {
# force it to be integer
x <- suppressWarnings(as.integer(x))
# disks can never be less than 9 mm (size of a disk) or more than 50 mm
x[x < 6 | x > 99] <- NA_integer_
# disks can never be less than 6 mm (size of smallest disk) or more than 50 mm
x[x < 6 | x > 50] <- NA_integer_
na_after <- length(x[is.na(x)])
if (na_before != na_after) {

274
R/mo.R
View File

@ -228,7 +228,7 @@ as.mo <- function(x,
& isFALSE(Lancefield)) {
# check previously found results
y <- mo_hist
} else {
# will be checked for mo class in validation and uses exec_as.mo internally if necessary
y <- mo_validate(x = x, property = "mo",
@ -273,7 +273,7 @@ exec_as.mo <- function(x,
disable_mo_history = getOption("AMR_disable_mo_history", FALSE),
debug = FALSE,
reference_data_to_use = microorganismsDT) {
load_AMR_package()
# WHONET: xxx = no growth
@ -391,7 +391,9 @@ exec_as.mo <- function(x,
} else if (all(x %in% reference_data_to_use$mo)) {
# existing mo codes when not looking for property "mo", like mo_genus("B_ESCHR_COL")
y <- reference_data_to_use[prevalence == 1][data.table(mo = x), on = "mo", ..property][[1]]
y <- reference_data_to_use[prevalence == 1][data.table(mo = x),
on = "mo",
..property][[1]]
if (any(is.na(y))) {
y[is.na(y)] <- reference_data_to_use[prevalence == 2][data.table(mo = x[is.na(y)]),
on = "mo",
@ -420,21 +422,29 @@ exec_as.mo <- function(x,
} else if (all(tolower(x) %in% reference_data_to_use$fullname_lower)) {
# we need special treatment for very prevalent full names, they are likely!
# e.g. as.mo("Staphylococcus aureus")
x <- reference_data_to_use[data.table(fullname_lower = tolower(x)), on = "fullname_lower", ..property][[1]]
x <- reference_data_to_use[data.table(fullname_lower = tolower(x)),
on = "fullname_lower",
..property][[1]]
} else if (all(toupper(x) %in% AMR::microorganisms.codes$code)) {
# commonly used MO codes
y <- as.data.table(AMR::microorganisms.codes)[data.table(code = toupper(x)), on = "code", ]
y <- as.data.table(AMR::microorganisms.codes)[data.table(code = toupper(x)),
on = "code", ]
# save them to history
set_mo_history(x, y$mo, 0, force = force_mo_history, disable = disable_mo_history)
x <- reference_data_to_use[data.table(mo = y[["mo"]]), on = "mo", ..property][[1]]
x <- reference_data_to_use[data.table(mo = y[["mo"]]),
on = "mo",
..property][[1]]
} else if (all(x %in% microorganisms.translation$mo_old)) {
# is an old mo code, used in previous versions of this package
old_mo_warning <- TRUE
y <- as.data.table(microorganisms.translation)[data.table(mo_old = x), on = "mo_old", "mo_new"][[1]]
y <- reference_data_to_use[data.table(mo = y), on = "mo", ..property][[1]]
y <- as.data.table(microorganisms.translation)[data.table(mo_old = x),
on = "mo_old", "mo_new"][[1]]
y <- reference_data_to_use[data.table(mo = y),
on = "mo",
..property][[1]]
# don't save to history, as all items are already in microorganisms.translation
x <- y
@ -557,7 +567,7 @@ exec_as.mo <- function(x,
}
progress <- progress_estimated(n = length(x), min_time = 3)
for (i in seq_len(length(x))) {
progress$tick()$print()
@ -580,7 +590,8 @@ exec_as.mo <- function(x,
next
}
found <- reference_data_to_use[mo == toupper(x_backup[i]), ..property][[1]]
found <- reference_data_to_use[mo == toupper(x_backup[i]),
..property][[1]]
# is a valid MO code
if (length(found) > 0) {
x[i] <- found[1L]
@ -590,17 +601,19 @@ exec_as.mo <- function(x,
if (x_backup[i] %in% microorganisms.translation$mo_old) {
# is an old mo code, used in previous versions of this package
old_mo_warning <- TRUE
found <- reference_data_to_use[mo == microorganisms.translation[which(microorganisms.translation$mo_old == x_backup[i]), "mo_new"], ..property][[1]]
found <- reference_data_to_use[mo == microorganisms.translation[which(microorganisms.translation$mo_old == x_backup[i]), "mo_new"],
..property][[1]]
if (length(found) > 0) {
x[i] <- found[1L]
# don't save to history, as all items are already in microorganisms.translation
next
}
}
if (toupper(x_backup_untouched[i]) %in% microorganisms.codes$code) {
# is a WHONET code, like "HA-"
found <- microorganismsDT[mo == microorganisms.codes[which(microorganisms.codes$code == toupper(x_backup_untouched[i])), "mo"][1L], ..property][[1]]
found <- microorganismsDT[mo == microorganisms.codes[which(microorganisms.codes$code == toupper(x_backup_untouched[i])), "mo"][1L],
..property][[1]]
if (length(found) > 0) {
x[i] <- found[1L]
# don't save to history, as all items are already in microorganisms.codes
@ -608,7 +621,8 @@ exec_as.mo <- function(x,
}
}
found <- reference_data_to_use[fullname_lower %in% tolower(c(x_backup[i], x_backup_without_spp[i])), ..property][[1]]
found <- reference_data_to_use[fullname_lower %in% tolower(c(x_backup[i], x_backup_without_spp[i])),
..property][[1]]
# most probable: is exact match in fullname
if (length(found) > 0) {
x[i] <- found[1L]
@ -618,7 +632,20 @@ exec_as.mo <- function(x,
next
}
found <- reference_data_to_use[col_id == x_backup[i], ..property][[1]]
found <- reference_data_to_use[g_species %in% gsub("[^a-z0-9/ \\-]+", "",
tolower(c(x_backup[i], x_backup_without_spp[i]))),
..property][[1]]
# very probable: is G. species
if (length(found) > 0) {
x[i] <- found[1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
next
}
found <- reference_data_to_use[col_id == x_backup[i],
..property][[1]]
# is a valid Catalogue of Life ID
if (NROW(found) > 0) {
x[i] <- found[1L]
@ -632,19 +659,22 @@ exec_as.mo <- function(x,
if (any(toupper(c(x_backup[i], x_backup_without_spp[i])) %in% AMR::microorganisms.codes$code)) {
mo_found <- AMR::microorganisms.codes[which(AMR::microorganisms.codes$code %in% toupper(c(x_backup[i], x_backup_without_spp[i]))), "mo"][1L]
if (length(mo_found) > 0) {
x[i] <- microorganismsDT[mo == mo_found, ..property][[1]][1L]
x[i] <- microorganismsDT[mo == mo_found,
..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
next
}
}
if (!is.null(reference_df)) {
# self-defined reference
if (x_backup[i] %in% reference_df[, 1]) {
ref_mo <- reference_df[reference_df[, 1] == x_backup[i], "mo"][[1L]]
if (ref_mo %in% microorganismsDT[, mo]) {
x[i] <- microorganismsDT[mo == ref_mo, ..property][[1]][1L]
x[i] <- microorganismsDT[mo == ref_mo,
..property][[1]][1L]
next
} else {
warning("Value '", x_backup[i], "' was found in reference_df, but '", ref_mo, "' is not a valid MO code.", call. = FALSE)
@ -660,7 +690,8 @@ exec_as.mo <- function(x,
if (tolower(x_backup_without_spp[i]) %in% c("other", "none", "unknown")) {
# empty and nonsense values, ignore without warning
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
x[i] <- microorganismsDT[mo == "UNKNOWN",
..property][[1]]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -671,7 +702,8 @@ exec_as.mo <- function(x,
if (nchar(gsub("[^a-zA-Z]", "", x_trimmed[i])) < 3
& !x_backup_without_spp[i] %like_case% "[Oo]?(26|103|104|104|111|121|145|157)") {
# fewer than 3 chars and not looked for species, add as failure
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
x[i] <- microorganismsDT[mo == "UNKNOWN",
..property][[1]]
if (initial_search == TRUE) {
failures <- c(failures, x_backup[i])
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
@ -689,7 +721,8 @@ exec_as.mo <- function(x,
if (!is.na(x_trimmed[i])) {
if (toupper(x_backup_without_spp[i]) %in% c("MRSA", "MSSA", "VISA", "VRSA")
| x_backup_without_spp[i] %like_case% " (mrsa|mssa|visa|vrsa) ") {
x[i] <- microorganismsDT[mo == "B_STPHY_AURS", ..property][[1]][1L]
x[i] <- microorganismsDT[mo == "B_STPHY_AURS",
..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -697,7 +730,8 @@ exec_as.mo <- function(x,
}
if (toupper(x_backup_without_spp[i]) %in% c("MRSE", "MSSE")
| x_backup_without_spp[i] %like_case% " (mrse|msse) ") {
x[i] <- microorganismsDT[mo == "B_STPHY_EPDR", ..property][[1]][1L]
x[i] <- microorganismsDT[mo == "B_STPHY_EPDR",
..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -706,7 +740,8 @@ exec_as.mo <- function(x,
if (toupper(x_backup_without_spp[i]) == "VRE"
| x_backup_without_spp[i] %like_case% " vre "
| x_backup_without_spp[i] %like_case% "(enterococci|enterokok|enterococo)[a-z]*?$") {
x[i] <- microorganismsDT[mo == "B_ENTRC", ..property][[1]][1L]
x[i] <- microorganismsDT[mo == "B_ENTRC",
..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -727,7 +762,8 @@ exec_as.mo <- function(x,
if (toupper(x_backup_without_spp[i]) %in% c("AIEC", "ATEC", "DAEC", "EAEC", "EHEC", "EIEC", "EPEC", "ETEC", "NMEC", "STEC", "UPEC")
# also support O-antigens of E. coli: O26, O103, O104, O111, O121, O145, O157
| x_backup_without_spp[i] %like_case% "o?(26|103|104|111|121|145|157)") {
x[i] <- microorganismsDT[mo == "B_ESCHR_COLI", ..property][[1]][1L]
x[i] <- microorganismsDT[mo == "B_ESCHR_COLI",
..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -736,7 +772,8 @@ exec_as.mo <- function(x,
if (toupper(x_backup_without_spp[i]) == "MRPA"
| x_backup_without_spp[i] %like_case% " mrpa ") {
# multi resistant P. aeruginosa
x[i] <- microorganismsDT[mo == "B_PSDMN_ARGN", ..property][[1]][1L]
x[i] <- microorganismsDT[mo == "B_PSDMN_ARGN",
..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -744,7 +781,8 @@ exec_as.mo <- function(x,
}
if (toupper(x_backup_without_spp[i]) == "CRSM") {
# co-trim resistant S. maltophilia
x[i] <- microorganismsDT[mo == "B_STNTR_MLTP", ..property][[1]][1L]
x[i] <- microorganismsDT[mo == "B_STNTR_MLTP",
..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -753,7 +791,8 @@ exec_as.mo <- function(x,
if (toupper(x_backup_without_spp[i]) %in% c("PISP", "PRSP", "VISP", "VRSP")
| x_backup_without_spp[i] %like_case% " (pisp|prsp|visp|vrsp) ") {
# peni I, peni R, vanco I, vanco R: S. pneumoniae
x[i] <- microorganismsDT[mo == "B_STRPT_PNMN", ..property][[1]][1L]
x[i] <- microorganismsDT[mo == "B_STRPT_PNMN",
..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -761,7 +800,8 @@ exec_as.mo <- function(x,
}
if (x_backup_without_spp[i] %like_case% "^g[abcdfghk]s$") {
# Streptococci, like GBS = Group B Streptococci (B_STRPT_GRPB)
x[i] <- microorganismsDT[mo == toupper(gsub("g([abcdfghk])s", "B_STRPT_GRP\\1", x_backup_without_spp[i])), ..property][[1]][1L]
x[i] <- microorganismsDT[mo == toupper(gsub("g([abcdfghk])s", "B_STRPT_GRP\\1", x_backup_without_spp[i])),
..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -769,7 +809,8 @@ exec_as.mo <- function(x,
}
if (x_backup_without_spp[i] %like_case% "(streptococ|streptokok).* [abcdfghk]$") {
# Streptococci in different languages, like "estreptococos grupo B"
x[i] <- microorganismsDT[mo == toupper(gsub(".*(streptococ|streptokok|estreptococ).* ([abcdfghk])$", "B_STRPT_GRP\\2", x_backup_without_spp[i])), ..property][[1]][1L]
x[i] <- microorganismsDT[mo == toupper(gsub(".*(streptococ|streptokok|estreptococ).* ([abcdfghk])$", "B_STRPT_GRP\\2", x_backup_without_spp[i])),
..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -777,7 +818,8 @@ exec_as.mo <- function(x,
}
if (x_backup_without_spp[i] %like_case% "group [abcdfghk] (streptococ|streptokok|estreptococ)") {
# Streptococci in different languages, like "Group A Streptococci"
x[i] <- microorganismsDT[mo == toupper(gsub(".*group ([abcdfghk]) (streptococ|streptokok|estreptococ).*", "B_STRPT_GRP\\1", x_backup_without_spp[i])), ..property][[1]][1L]
x[i] <- microorganismsDT[mo == toupper(gsub(".*group ([abcdfghk]) (streptococ|streptokok|estreptococ).*", "B_STRPT_GRP\\1", x_backup_without_spp[i])),
..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -785,7 +827,8 @@ exec_as.mo <- function(x,
}
if (x_backup_without_spp[i] %like_case% "haemoly.*strept") {
# Haemolytic streptococci in different languages
x[i] <- microorganismsDT[mo == "B_STRPT_HAEM", ..property][[1]][1L]
x[i] <- microorganismsDT[mo == "B_STRPT_HAEM",
..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -796,7 +839,8 @@ exec_as.mo <- function(x,
| x_trimmed[i] %like_case% "[ck]oagulas[ea] negatie?[vf]"
| x_backup_without_spp[i] %like_case% "[ck]o?ns[^a-z]?$") {
# coerce S. coagulase negative
x[i] <- microorganismsDT[mo == "B_STPHY_CONS", ..property][[1]][1L]
x[i] <- microorganismsDT[mo == "B_STPHY_CONS",
..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -806,7 +850,8 @@ exec_as.mo <- function(x,
| x_trimmed[i] %like_case% "[ck]oagulas[ea] positie?[vf]"
| x_backup_without_spp[i] %like_case% "[ck]o?ps[^a-z]?$") {
# coerce S. coagulase positive
x[i] <- microorganismsDT[mo == "B_STPHY_COPS", ..property][[1]][1L]
x[i] <- microorganismsDT[mo == "B_STPHY_COPS",
..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -817,7 +862,8 @@ exec_as.mo <- function(x,
| x_backup_without_spp[i] %like_case% "strepto.* mil+er+i"
| x_backup_without_spp[i] %like_case% "mgs[^a-z]?$") {
# Milleri Group Streptococcus (MGS)
x[i] <- microorganismsDT[mo == "B_STRPT_MILL", ..property][[1]][1L]
x[i] <- microorganismsDT[mo == "B_STRPT_MILL",
..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -827,7 +873,8 @@ exec_as.mo <- function(x,
| x_backup_without_spp[i] %like_case% "strepto.* viridans"
| x_backup_without_spp[i] %like_case% "vgs[^a-z]?$") {
# Viridans Group Streptococcus (VGS)
x[i] <- microorganismsDT[mo == "B_STRPT_VIRI", ..property][[1]][1L]
x[i] <- microorganismsDT[mo == "B_STRPT_VIRI",
..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -837,7 +884,8 @@ exec_as.mo <- function(x,
| x_backup_without_spp[i] %like_case% "negatie?[vf]"
| x_trimmed[i] %like_case% "gram[ -]?neg.*") {
# coerce Gram negatives
x[i] <- microorganismsDT[mo == "B_GRAMN", ..property][[1]][1L]
x[i] <- microorganismsDT[mo == "B_GRAMN",
..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -847,7 +895,8 @@ exec_as.mo <- function(x,
| x_backup_without_spp[i] %like_case% "positie?[vf]"
| x_trimmed[i] %like_case% "gram[ -]?pos.*") {
# coerce Gram positives
x[i] <- microorganismsDT[mo == "B_GRAMP", ..property][[1]][1L]
x[i] <- microorganismsDT[mo == "B_GRAMP",
..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -855,7 +904,8 @@ exec_as.mo <- function(x,
}
if (x_backup_without_spp[i] %like_case% "mycoba[ck]teri.[nm]?$") {
# coerce Gram positives
x[i] <- microorganismsDT[mo == "B_MYCBC", ..property][[1]][1L]
x[i] <- microorganismsDT[mo == "B_MYCBC",
..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -865,14 +915,16 @@ exec_as.mo <- function(x,
if (x_backup_without_spp[i] %like_case% "salmonella [a-z]+ ?.*") {
if (x_backup_without_spp[i] %like_case% "salmonella group") {
# Salmonella Group A to Z, just return S. species for now
x[i] <- microorganismsDT[mo == "B_SLMNL", ..property][[1]][1L]
x[i] <- microorganismsDT[mo == "B_SLMNL",
..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
next
} else if (grepl("[sS]almonella [A-Z][a-z]+ ?.*", x_backup[i], ignore.case = FALSE)) {
# Salmonella with capital letter species like "Salmonella Goettingen" - they're all S. enterica
x[i] <- microorganismsDT[mo == "B_SLMNL_ENTR", ..property][[1]][1L]
x[i] <- microorganismsDT[mo == "B_SLMNL_ENTR",
..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -887,7 +939,8 @@ exec_as.mo <- function(x,
# trivial names known to the field:
if ("meningococcus" %like_case% x_trimmed[i]) {
# coerce Neisseria meningitidis
x[i] <- microorganismsDT[mo == "B_NESSR_MNNG", ..property][[1]][1L]
x[i] <- microorganismsDT[mo == "B_NESSR_MNNG",
..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -895,7 +948,8 @@ exec_as.mo <- function(x,
}
if ("gonococcus" %like_case% x_trimmed[i]) {
# coerce Neisseria gonorrhoeae
x[i] <- microorganismsDT[mo == "B_NESSR_GNRR", ..property][[1]][1L]
x[i] <- microorganismsDT[mo == "B_NESSR_GNRR",
..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -903,7 +957,8 @@ exec_as.mo <- function(x,
}
if ("pneumococcus" %like_case% x_trimmed[i]) {
# coerce Streptococcus penumoniae
x[i] <- microorganismsDT[mo == "B_STRPT_PNMN", ..property][[1]][1L]
x[i] <- microorganismsDT[mo == "B_STRPT_PNMN",
..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -928,7 +983,8 @@ exec_as.mo <- function(x,
# if only genus is available, return only genus
if (all(!c(x[i], b.x_trimmed) %like_case% " ")) {
found <- data_to_check[fullname_lower %in% c(h.x_species, i.x_trimmed_species), ..property][[1]]
found <- data_to_check[fullname_lower %in% c(h.x_species, i.x_trimmed_species),
..property][[1]]
if (length(found) > 0) {
x[i] <- found[1L]
if (initial_search == TRUE) {
@ -937,7 +993,8 @@ exec_as.mo <- function(x,
return(x[i])
}
if (nchar(g.x_backup_without_spp) >= 6) {
found <- data_to_check[fullname_lower %like_case% paste0("^", unregex(g.x_backup_without_spp), "[a-z]+"), ..property][[1]]
found <- data_to_check[fullname_lower %like_case% paste0("^", unregex(g.x_backup_without_spp), "[a-z]+"),
..property][[1]]
if (length(found) > 0) {
x[i] <- found[1L]
if (initial_search == TRUE) {
@ -951,7 +1008,8 @@ exec_as.mo <- function(x,
# allow no codes less than 4 characters long, was already checked for WHONET earlier
if (nchar(g.x_backup_without_spp) < 4) {
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
x[i] <- microorganismsDT[mo == "UNKNOWN",
..property][[1]]
if (initial_search == TRUE) {
failures <- c(failures, a.x_backup)
set_mo_history(a.x_backup, get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
@ -960,36 +1018,42 @@ exec_as.mo <- function(x,
}
# try probable: trimmed version of fullname ----
found <- data_to_check[fullname_lower %in% tolower(g.x_backup_without_spp), ..property][[1]]
found <- data_to_check[fullname_lower %in% tolower(g.x_backup_without_spp),
..property][[1]]
if (length(found) > 0) {
return(found[1L])
}
# try any match keeping spaces ----
found <- data_to_check[fullname_lower %like_case% d.x_withspaces_start_end, ..property][[1]]
found <- data_to_check[fullname_lower %like_case% d.x_withspaces_start_end,
..property][[1]]
if (length(found) > 0 & nchar(g.x_backup_without_spp) >= 6) {
return(found[1L])
}
# try any match keeping spaces, not ending with $ ----
found <- data_to_check[fullname_lower %like_case% paste0(trimws(e.x_withspaces_start_only), " "), ..property][[1]]
found <- data_to_check[fullname_lower %like_case% paste0(trimws(e.x_withspaces_start_only), " "),
..property][[1]]
if (length(found) > 0) {
return(found[1L])
}
found <- data_to_check[fullname_lower %like_case% e.x_withspaces_start_only, ..property][[1]]
found <- data_to_check[fullname_lower %like_case% e.x_withspaces_start_only,
..property][[1]]
if (length(found) > 0 & nchar(g.x_backup_without_spp) >= 6) {
return(found[1L])
}
# try any match keeping spaces, not start with ^ ----
found <- data_to_check[fullname_lower %like_case% paste0(" ", trimws(f.x_withspaces_end_only)), ..property][[1]]
found <- data_to_check[fullname_lower %like_case% paste0(" ", trimws(f.x_withspaces_end_only)),
..property][[1]]
if (length(found) > 0) {
return(found[1L])
}
# try a trimmed version
found <- data_to_check[fullname_lower %like_case% b.x_trimmed
| fullname_lower %like_case% c.x_trimmed_without_group, ..property][[1]]
| fullname_lower %like_case% c.x_trimmed_without_group,
..property][[1]]
if (length(found) > 0 & nchar(g.x_backup_without_spp) >= 6) {
return(found[1L])
}
@ -1004,7 +1068,8 @@ exec_as.mo <- function(x,
g.x_backup_without_spp %>% substr(1, x_length / 2),
".* ",
g.x_backup_without_spp %>% substr((x_length / 2) + 1, x_length))
found <- data_to_check[fullname_lower %like_case% x_split, ..property][[1]]
found <- data_to_check[fullname_lower %like_case% x_split,
..property][[1]]
if (length(found) > 0) {
return(found[1L])
}
@ -1012,7 +1077,8 @@ exec_as.mo <- function(x,
# try fullname without start and without nchar limit of >= 6 ----
# like "K. pneu rhino" >> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH
found <- data_to_check[fullname_lower %like_case% e.x_withspaces_start_only, ..property][[1]]
found <- data_to_check[fullname_lower %like_case% e.x_withspaces_start_only,
..property][[1]]
if (length(found) > 0) {
return(found[1L])
}
@ -1031,7 +1097,8 @@ exec_as.mo <- function(x,
if (property == "ref") {
x[i] <- found[1, ref]
} else {
x[i] <- microorganismsDT[col_id == found[1, col_id_new], ..property][[1]]
x[i] <- microorganismsDT[col_id == found[1, col_id_new],
..property][[1]]
}
options(mo_renamed_last_run = found[1, fullname])
was_renamed(name_old = found[1, fullname],
@ -1077,7 +1144,8 @@ exec_as.mo <- function(x,
# mo_ref("Chlamydophila psittaci) = "Everett et al., 1999"
x <- found[1, ref]
} else {
x <- microorganismsDT[col_id == found[1, col_id_new], ..property][[1]]
x <- microorganismsDT[col_id == found[1, col_id_new],
..property][[1]]
}
was_renamed(name_old = found[1, fullname],
name_new = microorganismsDT[col_id == found[1, col_id_new], fullname],
@ -1109,7 +1177,8 @@ exec_as.mo <- function(x,
}
if (!empty_result(found)) {
found_result <- found
found <- reference_data_to_use[mo == found, ..property][[1]]
found <- reference_data_to_use[mo == found,
..property][[1]]
uncertainties <<- rbind(uncertainties,
format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level,
input = a.x_backup,
@ -1135,7 +1204,8 @@ exec_as.mo <- function(x,
message("Running '", paste(b.x_trimmed, "species"), "'")
}
# not when input is like Genustext, because then Neospora would lead to Actinokineospora
found <- uncertain.reference_data_to_use[fullname_lower %like_case% paste(b.x_trimmed, "species"), ..property][[1]]
found <- uncertain.reference_data_to_use[fullname_lower %like_case% paste(b.x_trimmed, "species"),
..property][[1]]
if (length(found) > 0) {
x[i] <- found[1L]
uncertainties <<- rbind(uncertainties,
@ -1167,7 +1237,8 @@ exec_as.mo <- function(x,
}
if (!empty_result(found) & nchar(g.x_backup_without_spp) >= 6) {
found_result <- found
found <- reference_data_to_use[mo == found, ..property][[1]]
found <- reference_data_to_use[mo == found,
..property][[1]]
uncertainties <<- rbind(uncertainties,
format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level,
input = a.x_backup,
@ -1194,7 +1265,8 @@ exec_as.mo <- function(x,
}
if (!empty_result(found) & nchar(g.x_backup_without_spp) >= 6) {
found_result <- found
found <- reference_data_to_use[mo == found, ..property][[1]]
found <- reference_data_to_use[mo == found,
..property][[1]]
uncertainties <<- rbind(uncertainties,
format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level,
input = a.x_backup,
@ -1228,7 +1300,8 @@ exec_as.mo <- function(x,
}
if (!empty_result(found)) {
found_result <- found
found <- reference_data_to_use[mo == found, ..property][[1]]
found <- reference_data_to_use[mo == found,
..property][[1]]
uncertainties <<- rbind(uncertainties,
format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level,
input = a.x_backup,
@ -1260,7 +1333,8 @@ exec_as.mo <- function(x,
}
if (!empty_result(found)) {
found_result <- found
found <- reference_data_to_use[mo == found, ..property][[1]]
found <- reference_data_to_use[mo == found,
..property][[1]]
uncertainties <<- rbind(uncertainties,
format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level,
input = a.x_backup,
@ -1280,7 +1354,8 @@ exec_as.mo <- function(x,
if (b.x_trimmed %like_case% "yeast") {
found <- "F_YEAST"
found_result <- found
found <- microorganismsDT[mo == found, ..property][[1]]
found <- microorganismsDT[mo == found,
..property][[1]]
uncertainties <<- rbind(uncertainties,
format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level,
input = a.x_backup,
@ -1293,7 +1368,8 @@ exec_as.mo <- function(x,
if (b.x_trimmed %like_case% "(fungus|fungi)" & !b.x_trimmed %like_case% "fungiphrya") {
found <- "F_FUNGUS"
found_result <- found
found <- microorganismsDT[mo == found, ..property][[1]]
found <- microorganismsDT[mo == found,
..property][[1]]
uncertainties <<- rbind(uncertainties,
format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level,
input = a.x_backup,
@ -1322,7 +1398,8 @@ exec_as.mo <- function(x,
}
if (!empty_result(found)) {
found_result <- found
found <- reference_data_to_use[mo == found_result[1L], ..property][[1]]
found <- reference_data_to_use[mo == found_result[1L],
..property][[1]]
# uncertainty level 2 only if searched part contains a space (otherwise it will be found with lvl 3)
if (x_strip_collapsed %like_case% " ") {
uncertainties <<- rbind(uncertainties,
@ -1362,7 +1439,8 @@ exec_as.mo <- function(x,
}
if (!empty_result(found)) {
found_result <- found
found <- reference_data_to_use[mo == found, ..property][[1]]
found <- reference_data_to_use[mo == found,
..property][[1]]
uncertainties <<- rbind(uncertainties,
format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level,
input = a.x_backup,
@ -1393,7 +1471,8 @@ exec_as.mo <- function(x,
}
if (!empty_result(found)) {
found_result <- found
found <- reference_data_to_use[mo == found, ..property][[1]]
found <- reference_data_to_use[mo == found,
..property][[1]]
uncertainties <<- rbind(uncertainties,
format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level,
input = a.x_backup,
@ -1417,7 +1496,8 @@ exec_as.mo <- function(x,
if (nrow(found) > 0) {
found_result <- found[["mo"]]
if (!empty_result(found_result) & nchar(g.x_backup_without_spp) >= 6) {
found <- reference_data_to_use[mo == found_result[1L], ..property][[1]]
found <- reference_data_to_use[mo == found_result[1L],
..property][[1]]
uncertainties <<- rbind(uncertainties,
format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level,
input = a.x_backup,
@ -1525,7 +1605,8 @@ exec_as.mo <- function(x,
}
# no results found: make them UNKNOWN ----
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
x[i] <- microorganismsDT[mo == "UNKNOWN",
..property][[1]]
if (initial_search == TRUE) {
failures <- c(failures, x_backup[i])
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
@ -1586,56 +1667,76 @@ exec_as.mo <- function(x,
"saccharolyticus", "saprophyticus", "sciuri",
"stepanovicii", "simulans", "succinus",
"vitulinus", "warneri", "xylosus")
| (species == "schleiferi" & subspecies %in% c("schleiferi", "")), ..property][[1]]
| (species == "schleiferi" & subspecies %in% c("schleiferi", "")),
..property][[1]]
CoPS <- MOs_staph[species %in% c("simiae", "agnetis",
"delphini", "lutrae",
"hyicus", "intermedius",
"pseudintermedius", "pseudointermedius",
"schweitzeri", "argenteus")
| (species == "schleiferi" & subspecies == "coagulans"), ..property][[1]]
| (species == "schleiferi" & subspecies == "coagulans"),
..property][[1]]
# warn when species found that are not in Becker (2014, PMID 25278577) and Becker (2019, PMID 30872103)
post_Becker <- c("argensis", "caeli", "cornubiensis", "edaphicus")
if (any(x %in% MOs_staph[species %in% post_Becker, ..property][[1]])) {
if (any(x %in% MOs_staph[species %in% post_Becker,
..property][[1]])) {
warning("Becker ", italic("et al."), " (2014, 2019) does not contain these species named after their publication: ",
italic(paste("S.",
sort(mo_species(unique(x[x %in% MOs_staph[species %in% post_Becker, ..property][[1]]]))),
sort(mo_species(unique(x[x %in% MOs_staph[species %in% post_Becker,
..property][[1]]]))),
collapse = ", ")),
".",
call. = FALSE,
immediate. = TRUE)
}
x[x %in% CoNS] <- microorganismsDT[mo == "B_STPHY_CONS", ..property][[1]][1L]
x[x %in% CoPS] <- microorganismsDT[mo == "B_STPHY_COPS", ..property][[1]][1L]
x[x %in% CoNS] <- microorganismsDT[mo == "B_STPHY_CONS",
..property][[1]][1L]
x[x %in% CoPS] <- microorganismsDT[mo == "B_STPHY_COPS",
..property][[1]][1L]
if (Becker == "all") {
x[x %in% microorganismsDT[mo %like_case% "^B_STPHY_AURS", ..property][[1]]] <- microorganismsDT[mo == "B_STPHY_COPS", ..property][[1]][1L]
x[x %in% microorganismsDT[mo %like_case% "^B_STPHY_AURS",
..property][[1]]] <- microorganismsDT[mo == "B_STPHY_COPS",
..property][[1]][1L]
}
}
# Lancefield ----
if (Lancefield == TRUE | Lancefield == "all") {
# group A - S. pyogenes
x[x == microorganismsDT[mo == "B_STRPT_PYGN", ..property][[1]][1L]] <- microorganismsDT[mo == "B_STRPT_GRPA", ..property][[1]][1L]
x[x == microorganismsDT[mo == "B_STRPT_PYGN",
..property][[1]][1L]] <- microorganismsDT[mo == "B_STRPT_GRPA",
..property][[1]][1L]
# group B - S. agalactiae
x[x == microorganismsDT[mo == "B_STRPT_AGLC", ..property][[1]][1L]] <- microorganismsDT[mo == "B_STRPT_GRPB", ..property][[1]][1L]
x[x == microorganismsDT[mo == "B_STRPT_AGLC",
..property][[1]][1L]] <- microorganismsDT[mo == "B_STRPT_GRPB",
..property][[1]][1L]
# group C
S_groupC <- microorganismsDT %>% filter(genus == "Streptococcus",
species %in% c("equisimilis", "equi",
"zooepidemicus", "dysgalactiae")) %>%
pull(property)
x[x %in% S_groupC] <- microorganismsDT[mo == "B_STRPT_GRPC", ..property][[1]][1L]
x[x %in% S_groupC] <- microorganismsDT[mo == "B_STRPT_GRPC",
..property][[1]][1L]
if (Lancefield == "all") {
# all Enterococci
x[x %like% "^(Enterococcus|B_ENTRC)"] <- microorganismsDT[mo == "B_STRPT_GRPD", ..property][[1]][1L]
x[x %like% "^(Enterococcus|B_ENTRC)"] <- microorganismsDT[mo == "B_STRPT_GRPD",
..property][[1]][1L]
}
# group F - S. anginosus
x[x == microorganismsDT[mo == "B_STRPT_ANGN", ..property][[1]][1L]] <- microorganismsDT[mo == "B_STRPT_GRPF", ..property][[1]][1L]
x[x == microorganismsDT[mo == "B_STRPT_ANGN",
..property][[1]][1L]] <- microorganismsDT[mo == "B_STRPT_GRPF",
..property][[1]][1L]
# group H - S. sanguinis
x[x == microorganismsDT[mo == "B_STRPT_SNGN", ..property][[1]][1L]] <- microorganismsDT[mo == "B_STRPT_GRPH", ..property][[1]][1L]
x[x == microorganismsDT[mo == "B_STRPT_SNGN",
..property][[1]][1L]] <- microorganismsDT[mo == "B_STRPT_GRPH",
..property][[1]][1L]
# group K - S. salivarius
x[x == microorganismsDT[mo == "B_STRPT_SLVR", ..property][[1]][1L]] <- microorganismsDT[mo == "B_STRPT_GRPK", ..property][[1]][1L]
x[x == microorganismsDT[mo == "B_STRPT_SLVR",
..property][[1]][1L]] <- microorganismsDT[mo == "B_STRPT_GRPK",
..property][[1]][1L]
}
# Wrap up ----------------------------------------------------------------
@ -1805,7 +1906,8 @@ as.data.frame.mo <- function(x, ...) {
"[<-.mo" <- function(i, j, ..., value) {
y <- NextMethod()
attributes(y) <- attributes(i)
class_integrity_check(y, "microorganism code", c(as.character(AMR::microorganisms$mo), as.character(microorganisms.translation$mo_old)))
class_integrity_check(y, "microorganism code", c(as.character(AMR::microorganisms$mo),
as.character(microorganisms.translation$mo_old)))
}
#' @exportMethod [[<-.mo
#' @export
@ -1813,7 +1915,8 @@ as.data.frame.mo <- function(x, ...) {
"[[<-.mo" <- function(i, j, ..., value) {
y <- NextMethod()
attributes(y) <- attributes(i)
class_integrity_check(y, "microorganism code", c(as.character(AMR::microorganisms$mo), as.character(microorganisms.translation$mo_old)))
class_integrity_check(y, "microorganism code", c(as.character(AMR::microorganisms$mo),
as.character(microorganisms.translation$mo_old)))
}
#' @exportMethod c.mo
#' @export
@ -1821,7 +1924,8 @@ as.data.frame.mo <- function(x, ...) {
c.mo <- function(x, ...) {
y <- NextMethod()
attributes(y) <- attributes(x)
class_integrity_check(y, "microorganism code", c(as.character(AMR::microorganisms$mo), as.character(microorganisms.translation$mo_old)))
class_integrity_check(y, "microorganism code", c(as.character(AMR::microorganisms$mo),
as.character(microorganisms.translation$mo_old)))
}
#' @rdname as.mo

34
R/zzz.R
View File

@ -21,21 +21,18 @@
#' @importFrom data.table as.data.table setkey
.onLoad <- function(libname, pkgname) {
# packageStartupMessage("Loading taxonomic reference data")
# get new functions not available in older versions of R
backports::import(pkgname)
# register data
microorganisms.oldDT <- as.data.table(AMR::microorganisms.old)
# for fullname_lower: keep only dots, letters, numbers, slashes, spaces and dashes
microorganisms.oldDT$fullname_lower <- gsub("[^.a-z0-9/ \\-]+", "", tolower(microorganisms.oldDT$fullname))
setkey(microorganisms.oldDT, prevalence, fullname)
assign(x = "microorganismsDT",
value = make_DT(),
envir = asNamespace("AMR"))
assign(x = "microorganisms.oldDT",
value = microorganisms.oldDT,
value = make_oldDT(),
envir = asNamespace("AMR"))
assign(x = "mo_codes_v0.5.0",
@ -62,7 +59,9 @@ make_DT <- function() {
# work with Viridans Group Streptococci, etc.
tolower(trimws(ifelse(genus == "",
fullname,
paste(genus, species, subspecies)))))) %>%
paste(genus, species, subspecies))))),
# add a column with only "e coli" like combinations
g_species = gsub("^([a-z])[a-z]+ ([a-z]+) ?.*", "\\1 \\2", fullname_lower)) %>%
as.data.table()
# so arrange data on prevalence first, then kingdom, then full name
@ -73,6 +72,25 @@ make_DT <- function() {
microorganismsDT
}
#' @importFrom data.table as.data.table setkey
#' @importFrom dplyr %>% mutate
make_oldDT <- function() {
microorganisms.oldDT <- AMR::microorganisms.old %>%
mutate(
# for fullname_lower: keep only dots, letters,
# numbers, slashes, spaces and dashes
fullname_lower = gsub("[^.a-z0-9/ \\-]+", "", tolower(fullname)),
# add a column with only "e coli" like combinations
g_species = gsub("^([a-z])[a-z]+ ([a-z]+) ?.*", "\\1 \\2", fullname_lower)) %>%
as.data.table()
# so arrange data on prevalence first, then full name
setkey(microorganisms.oldDT,
prevalence,
fullname)
microorganisms.oldDT
}
make_trans_tbl <- function() {
# conversion of old MO codes from v0.5.0 (ITIS) to later versions (Catalogue of Life)
c(B_ACHRMB = "B_ACHRM", B_ANNMA = "B_ACTNS", B_ACLLS = "B_ALCYC",