1
0
mirror of https://github.com/msberends/AMR.git synced 2025-01-25 23:44:34 +01:00

fix unit tests

This commit is contained in:
dr. M.S. (Matthijs) Berends 2022-10-04 14:41:02 +02:00
parent 082e52a0dd
commit 37f6db5ccd
19 changed files with 297 additions and 234 deletions

View File

@ -1,5 +1,34 @@
#!/bin/sh #!/bin/sh
# ==================================================================== #
# TITLE #
# AMR: An R Package for Working with Antimicrobial Resistance Data #
# #
# SOURCE #
# https://github.com/msberends/AMR #
# #
# CITE AS #
# Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C #
# (2022). AMR: An R Package for Working with Antimicrobial Resistance #
# Data. Journal of Statistical Software, 104(3), 1-31. #
# doi:10.18637/jss.v104.i03 #
# #
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
# #
# Visit our website for the full manual and a complete tutorial about #
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
# ==================================================================== #
echo "Running pre-commit hook..." echo "Running pre-commit hook..."
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -1,5 +1,5 @@
Package: AMR Package: AMR
Version: 1.8.2.9026 Version: 1.8.2.9027
Date: 2022-10-04 Date: 2022-10-04
Title: Antimicrobial Resistance Data Analysis Title: Antimicrobial Resistance Data Analysis
Description: Functions to simplify and standardise antimicrobial resistance (AMR) Description: Functions to simplify and standardise antimicrobial resistance (AMR)

View File

@ -1,4 +1,4 @@
# AMR 1.8.2.9026 # AMR 1.8.2.9027
This version will eventually become v2.0! We're happy to reach a new major milestone soon! This version will eventually become v2.0! We're happy to reach a new major milestone soon!

View File

@ -409,7 +409,7 @@ word_wrap <- function(...,
msg <- paste0(c(...), collapse = "") msg <- paste0(c(...), collapse = "")
if (isTRUE(as_note)) { if (isTRUE(as_note)) {
msg <- paste0(pkg_env$info_icon, " ", gsub("^note:? ?", "", msg, ignore.case = TRUE)) msg <- paste0(AMR_env$info_icon, " ", gsub("^note:? ?", "", msg, ignore.case = TRUE))
} }
if (msg %like% "\n") { if (msg %like% "\n") {
@ -742,14 +742,14 @@ meet_criteria <- function(object,
# if object is missing, or another error: # if object is missing, or another error:
tryCatch(invisible(object), tryCatch(invisible(object),
error = function(e) pkg_env$meet_criteria_error_txt <- e$message error = function(e) AMR_env$meet_criteria_error_txt <- e$message
) )
if (!is.null(pkg_env$meet_criteria_error_txt)) { if (!is.null(AMR_env$meet_criteria_error_txt)) {
error_txt <- pkg_env$meet_criteria_error_txt error_txt <- AMR_env$meet_criteria_error_txt
pkg_env$meet_criteria_error_txt <- NULL AMR_env$meet_criteria_error_txt <- NULL
stop(error_txt, call. = FALSE) # don't use stop_() here, our pkg may not be loaded yet stop(error_txt, call. = FALSE) # don't use stop_() here, our pkg may not be loaded yet
} }
pkg_env$meet_criteria_error_txt <- NULL AMR_env$meet_criteria_error_txt <- NULL
if (is.null(object)) { if (is.null(object)) {
stop_if(allow_NULL == FALSE, "argument `", obj_name, "` must not be NULL", call = call_depth) stop_if(allow_NULL == FALSE, "argument `", obj_name, "` must not be NULL", call = call_depth)
@ -990,9 +990,9 @@ message_not_thrown_before <- function(fn, ..., entire_session = FALSE) {
# this is to prevent that messages/notes will be printed for every dplyr group or more than once per session # this is to prevent that messages/notes will be printed for every dplyr group or more than once per session
# e.g. this would show a msg 4 times: example_isolates %>% group_by(ward) %>% filter(mo_is_gram_negative()) # e.g. this would show a msg 4 times: example_isolates %>% group_by(ward) %>% filter(mo_is_gram_negative())
salt <- gsub("[^a-zA-Z0-9|_-]", "?", substr(paste(c(...), sep = "|", collapse = "|"), 1, 512), perl = TRUE) salt <- gsub("[^a-zA-Z0-9|_-]", "?", substr(paste(c(...), sep = "|", collapse = "|"), 1, 512), perl = TRUE)
not_thrown_before <- is.null(pkg_env[[paste0("thrown_msg.", fn, ".", salt)]]) || not_thrown_before <- is.null(AMR_env[[paste0("thrown_msg.", fn, ".", salt)]]) ||
!identical( !identical(
pkg_env[[paste0("thrown_msg.", fn, ".", salt)]], AMR_env[[paste0("thrown_msg.", fn, ".", salt)]],
unique_call_id( unique_call_id(
entire_session = entire_session, entire_session = entire_session,
match_fn = fn match_fn = fn
@ -1003,7 +1003,7 @@ message_not_thrown_before <- function(fn, ..., entire_session = FALSE) {
assign( assign(
x = paste0("thrown_msg.", fn, ".", salt), x = paste0("thrown_msg.", fn, ".", salt),
value = unique_call_id(entire_session = entire_session, match_fn = fn), value = unique_call_id(entire_session = entire_session, match_fn = fn),
envir = pkg_env envir = AMR_env
) )
} }
not_thrown_before not_thrown_before
@ -1354,11 +1354,11 @@ percentage <- function(x, digits = NULL, ...) {
} }
time_start_tracking <- function() { time_start_tracking <- function() {
pkg_env$time_start <- round(as.double(Sys.time()) * 1000) AMR_env$time_start <- round(as.double(Sys.time()) * 1000)
} }
time_track <- function(name = NULL) { time_track <- function(name = NULL) {
paste("(until now:", trimws(round(as.double(Sys.time()) * 1000) - pkg_env$time_start), "ms)") paste("(until now:", trimws(round(as.double(Sys.time()) * 1000) - AMR_env$time_start), "ms)")
} }
trimws2 <- function(..., whitespace = "[\u0009\u000A\u000B\u000C\u000D\u0020\u0085\u00A0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u200C\u200D\u2028\u2029\u202F\u205F\u2060\u3000\uFEFF]") { trimws2 <- function(..., whitespace = "[\u0009\u000A\u000B\u000C\u000D\u0020\u0085\u00A0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u200C\u200D\u2028\u2029\u202F\u205F\u2060\u3000\uFEFF]") {
@ -1370,19 +1370,19 @@ trimws2 <- function(..., whitespace = "[\u0009\u000A\u000B\u000C\u000D\u0020\u00
# Faster data.table implementations ---- # Faster data.table implementations ----
match <- function(x, ...) { match <- function(x, ...) {
if (isTRUE(pkg_env$has_data.table) && is.character(x)) { if (isTRUE(AMR_env$has_data.table) && is.character(x)) {
# data.table::chmatch() is 35% faster than base::match() for character # data.table::chmatch() is 35% faster than base::match() for character
getExportedValue(name = "chmatch", ns = asNamespace("data.table"))(x, ...) getExportedValue(name = "chmatch", ns = asNamespace("data.table"))(x, ...)
} else { } else {
base::match(x, ...) base::match(x, ...)
} }
} }
`%in%` <- function(x, ...) { `%in%` <- function(x, table) {
if (isTRUE(pkg_env$has_data.table) && is.character(x)) { if (isTRUE(AMR_env$has_data.table) && is.character(x) && is.character(table)) {
# data.table::`%chin%`() is 20% faster than base::`%in%`() for character # data.table::`%chin%`() is 20-50% faster than base::`%in%`() for character
getExportedValue(name = "%chin%", ns = asNamespace("data.table"))(x, ...) getExportedValue(name = "%chin%", ns = asNamespace("data.table"))(x, table)
} else { } else {
base::`%in%`(x, ...) base::`%in%`(x, table)
} }
} }

View File

@ -1178,7 +1178,7 @@ eucast_dosage <- function(ab, administration = "iv", version_breakpoints = 11.0)
meet_criteria(administration, allow_class = "character", is_in = dosage$administration[!is.na(dosage$administration)], has_length = 1) meet_criteria(administration, allow_class = "character", is_in = dosage$administration[!is.na(dosage$administration)], has_length = 1)
meet_criteria(version_breakpoints, allow_class = c("numeric", "integer"), has_length = 1, is_in = as.double(names(EUCAST_VERSION_BREAKPOINTS))) meet_criteria(version_breakpoints, allow_class = c("numeric", "integer"), has_length = 1, is_in = as.double(names(EUCAST_VERSION_BREAKPOINTS)))
# show used version_breakpoints number once per session (pkg_env will reload every session) # show used version_breakpoints number once per session (AMR_env will reload every session)
if (message_not_thrown_before("eucast_dosage", "v", gsub("[^0-9]", "", version_breakpoints), entire_session = TRUE)) { if (message_not_thrown_before("eucast_dosage", "v", gsub("[^0-9]", "", version_breakpoints), entire_session = TRUE)) {
message_( message_(
"Dosages for antimicrobial drugs, as meant for ", "Dosages for antimicrobial drugs, as meant for ",

View File

@ -112,17 +112,17 @@ get_column_abx <- function(x,
entire_session = FALSE, entire_session = FALSE,
match_fn = fn match_fn = fn
), ),
pkg_env$get_column_abx.call AMR_env$get_column_abx.call
)) { )) {
# so within the same call, within the same environment, we got here again. # so within the same call, within the same environment, we got here again.
# but we could've come from another function within the same call, so now only check the columns that changed # but we could've come from another function within the same call, so now only check the columns that changed
# first remove the columns that are not existing anymore # first remove the columns that are not existing anymore
previous <- pkg_env$get_column_abx.out previous <- AMR_env$get_column_abx.out
current <- previous[previous %in% colnames(x)] current <- previous[previous %in% colnames(x)]
# then compare columns in current call with columns in original call # then compare columns in current call with columns in original call
new_cols <- colnames(x)[!colnames(x) %in% pkg_env$get_column_abx.checked_cols] new_cols <- colnames(x)[!colnames(x) %in% AMR_env$get_column_abx.checked_cols]
if (length(new_cols) > 0) { if (length(new_cols) > 0) {
# these columns did not exist in the last call, so add them # these columns did not exist in the last call, so add them
new_cols_rsi <- get_column_abx(x[, new_cols, drop = FALSE], reuse_previous_result = FALSE, info = FALSE, sort = FALSE) new_cols_rsi <- get_column_abx(x[, new_cols, drop = FALSE], reuse_previous_result = FALSE, info = FALSE, sort = FALSE)
@ -132,11 +132,11 @@ get_column_abx <- function(x,
} }
# update pkg environment to improve speed on next run # update pkg environment to improve speed on next run
pkg_env$get_column_abx.out <- current AMR_env$get_column_abx.out <- current
pkg_env$get_column_abx.checked_cols <- colnames(x) AMR_env$get_column_abx.checked_cols <- colnames(x)
# and return right values # and return right values
return(pkg_env$get_column_abx.out) return(AMR_env$get_column_abx.out)
} }
meet_criteria(x, allow_class = "data.frame") meet_criteria(x, allow_class = "data.frame")
@ -243,9 +243,9 @@ get_column_abx <- function(x,
if (info == TRUE && all_okay == TRUE) { if (info == TRUE && all_okay == TRUE) {
message_("No columns found.") message_("No columns found.")
} }
pkg_env$get_column_abx.call <- unique_call_id(entire_session = FALSE, match_fn = fn) AMR_env$get_column_abx.call <- unique_call_id(entire_session = FALSE, match_fn = fn)
pkg_env$get_column_abx.checked_cols <- colnames(x.bak) AMR_env$get_column_abx.checked_cols <- colnames(x.bak)
pkg_env$get_column_abx.out <- out AMR_env$get_column_abx.out <- out
return(out) return(out)
} }
@ -320,9 +320,9 @@ get_column_abx <- function(x,
} }
} }
pkg_env$get_column_abx.call <- unique_call_id(entire_session = FALSE, match_fn = fn) AMR_env$get_column_abx.call <- unique_call_id(entire_session = FALSE, match_fn = fn)
pkg_env$get_column_abx.checked_cols <- colnames(x.bak) AMR_env$get_column_abx.checked_cols <- colnames(x.bak)
pkg_env$get_column_abx.out <- out AMR_env$get_column_abx.out <- out
out out
} }

View File

@ -1955,14 +1955,14 @@ run_custom_mdro_guideline <- function(df, guideline, info) {
for (i in seq_len(n_dots)) { for (i in seq_len(n_dots)) {
qry <- tryCatch(eval(parse(text = guideline[[i]]$query), envir = df, enclos = parent.frame()), qry <- tryCatch(eval(parse(text = guideline[[i]]$query), envir = df, enclos = parent.frame()),
error = function(e) { error = function(e) {
pkg_env$err_msg <- e$message AMR_env$err_msg <- e$message
return("error") return("error")
} }
) )
if (identical(qry, "error")) { if (identical(qry, "error")) {
warning_("in `custom_mdro_guideline()`: rule ", i, warning_("in `custom_mdro_guideline()`: rule ", i,
" (`", as.character(guideline[[i]]$query), "`) was ignored because of this error message: ", " (`", as.character(guideline[[i]]$query), "`) was ignored because of this error message: ",
pkg_env$err_msg, AMR_env$err_msg,
call = FALSE, call = FALSE,
add_fn = font_red add_fn = font_red
) )

314
R/mo.R
View File

@ -180,8 +180,10 @@ as.mo <- function(x,
# ignore cases that match the ignore pattern # ignore cases that match the ignore pattern
x <- replace_ignore_pattern(x, ignore_pattern) x <- replace_ignore_pattern(x, ignore_pattern)
x_lower <- tolower(x)
# WHONET: xxx = no growth # WHONET: xxx = no growth
x[tolower(x) %in% c("", "xxx", "na", "nan")] <- NA_character_ x[x_lower %in% c("", "xxx", "na", "nan")] <- NA_character_
out <- rep(NA_character_, length(x)) out <- rep(NA_character_, length(x))
@ -195,9 +197,11 @@ as.mo <- function(x,
# From MO code ---- # From MO code ----
out[is.na(out) & x %in% MO_lookup$mo] <- x[is.na(out) & x %in% MO_lookup$mo] out[is.na(out) & x %in% MO_lookup$mo] <- x[is.na(out) & x %in% MO_lookup$mo]
# From full name ---- # From full name ----
out[is.na(out) & x %in% MO_lookup$fullname] <- MO_lookup$mo[match(x[is.na(out) & x %in% MO_lookup$fullname], MO_lookup$fullname)] out[is.na(out) & x_lower %in% MO_lookup$fullname_lower] <- MO_lookup$mo[match(x_lower[is.na(out) & x_lower %in% MO_lookup$fullname_lower], MO_lookup$fullname_lower)]
# one exception: "Fungi" matches the kingdom, but instead it should return the 'unknown' code for fungi
out[out == "F_[KNG]_FUNGI"] <- "F_FUNGUS"
# From known codes ---- # From known codes ----
out[is.na(out) & x %in% AMR::microorganisms.codes$code] <- AMR::microorganisms.codes$mo[match(x[is.na(out) & x %in% AMR::microorganisms.codes$code], AMR::microorganisms.codes$code)] out[is.na(out) & toupper(x) %in% AMR::microorganisms.codes$code] <- AMR::microorganisms.codes$mo[match(toupper(x)[is.na(out) & toupper(x) %in% AMR::microorganisms.codes$code], AMR::microorganisms.codes$code)]
# From SNOMED ---- # From SNOMED ----
if (any(is.na(out) & !is.na(x)) && any(is.na(out) & x %in% unlist(microorganisms$snomed), na.rm = TRUE)) { if (any(is.na(out) & !is.na(x)) && any(is.na(out) & x %in% unlist(microorganisms$snomed), na.rm = TRUE)) {
# found this extremely fast gem here: https://stackoverflow.com/a/11002456/4575331 # found this extremely fast gem here: https://stackoverflow.com/a/11002456/4575331
@ -208,7 +212,7 @@ as.mo <- function(x,
out[is.na(out)] <- convert_colloquial_input(x[is.na(out)]) out[is.na(out)] <- convert_colloquial_input(x[is.na(out)])
# From previous hits in this session ---- # From previous hits in this session ----
old <- out old <- out
out[is.na(out) & paste(x, minimum_matching_score) %in% pkg_env$mo_previously_coerced$x] <- pkg_env$mo_previously_coerced$mo[match(paste(x, minimum_matching_score)[is.na(out) & paste(x, minimum_matching_score) %in% pkg_env$mo_previously_coerced$x], pkg_env$mo_previously_coerced$x)] out[is.na(out) & paste(x, minimum_matching_score) %in% AMR_env$mo_previously_coerced$x] <- AMR_env$mo_previously_coerced$mo[match(paste(x, minimum_matching_score)[is.na(out) & paste(x, minimum_matching_score) %in% AMR_env$mo_previously_coerced$x], AMR_env$mo_previously_coerced$x)]
new <- out new <- out
if (isTRUE(info) && message_not_thrown_before("as.mo", old, new, entire_session = TRUE) && any(is.na(old) & !is.na(new), na.rm = TRUE)) { if (isTRUE(info) && message_not_thrown_before("as.mo", old, new, entire_session = TRUE) && any(is.na(old) & !is.na(new), na.rm = TRUE)) {
message_( message_(
@ -220,8 +224,8 @@ as.mo <- function(x,
# For all other input ---- # For all other input ----
if (any(is.na(out) & !is.na(x))) { if (any(is.na(out) & !is.na(x))) {
# reset uncertainties # reset uncertainties
pkg_env$mo_uncertainties <- pkg_env$mo_uncertainties[0, ] AMR_env$mo_uncertainties <- AMR_env$mo_uncertainties[0, ]
pkg_env$mo_failures <- NULL AMR_env$mo_failures <- NULL
# Laboratory systems: remove (translated) entries like "no growth", "not E. coli", etc. # Laboratory systems: remove (translated) entries like "no growth", "not E. coli", etc.
x[trimws2(x) %like% translate_into_language("no .*growth", language = language)] <- NA_character_ x[trimws2(x) %like% translate_into_language("no .*growth", language = language)] <- NA_character_
@ -238,8 +242,6 @@ as.mo <- function(x,
x_coerced <- vapply(FUN.VALUE = character(1), x_unique, function(x_search) { x_coerced <- vapply(FUN.VALUE = character(1), x_unique, function(x_search) {
progress$tick() progress$tick()
print(x_search)
# some required cleaning steps # some required cleaning steps
x_out <- trimws2(x_search) x_out <- trimws2(x_search)
# this applies the `remove_from_input` argument, which defaults to mo_cleaning_regex() # this applies the `remove_from_input` argument, which defaults to mo_cleaning_regex()
@ -248,7 +250,11 @@ as.mo <- function(x,
x_search_cleaned <- x_out x_search_cleaned <- x_out
x_out <- tolower(x_out) x_out <- tolower(x_out)
print(x_out) # input must not be too short
if (nchar(x_out) < 3) {
return("UNKNOWN")
}
# take out the parts, split by space # take out the parts, split by space
x_parts <- strsplit(gsub("-", " ", x_out, fixed = TRUE), " ", fixed = TRUE)[[1]] x_parts <- strsplit(gsub("-", " ", x_out, fixed = TRUE), " ", fixed = TRUE)[[1]]
@ -282,7 +288,7 @@ as.mo <- function(x,
} else { } else {
mo_to_search <- MO_lookup$fullname[filtr] mo_to_search <- MO_lookup$fullname[filtr]
} }
pkg_env$mo_to_search <- mo_to_search AMR_env$mo_to_search <- mo_to_search
# determine the matching score on the original search value # determine the matching score on the original search value
m <- mo_matching_score(x = x_search_cleaned, n = mo_to_search) m <- mo_matching_score(x = x_search_cleaned, n = mo_to_search)
if (is.null(minimum_matching_score)) { if (is.null(minimum_matching_score)) {
@ -302,20 +308,21 @@ as.mo <- function(x,
result_mo <- NA_character_ result_mo <- NA_character_
} else { } else {
result_mo <- MO_lookup$mo[match(top_hits[1], MO_lookup$fullname)] result_mo <- MO_lookup$mo[match(top_hits[1], MO_lookup$fullname)]
pkg_env$mo_uncertainties <- rbind(pkg_env$mo_uncertainties, AMR_env$mo_uncertainties <- rbind(AMR_env$mo_uncertainties,
data.frame( data.frame(
minimum_matching_score = ifelse(is.null(minimum_matching_score), "NULL", minimum_matching_score),
original_input = x_search, original_input = x_search,
input = x_search_cleaned, input = x_search_cleaned,
fullname = top_hits[1], fullname = top_hits[1],
mo = result_mo, mo = result_mo,
candidates = ifelse(length(top_hits) > 1, paste(top_hits[2:min(26, length(top_hits))], collapse = ", "), ""), candidates = ifelse(length(top_hits) > 1, paste(top_hits[2:min(26, length(top_hits))], collapse = ", "), ""),
minimum_matching_score = ifelse(is.null(minimum_matching_score), "NULL", minimum_matching_score),
keep_synonyms = keep_synonyms,
stringsAsFactors = FALSE stringsAsFactors = FALSE
), ),
stringsAsFactors = FALSE stringsAsFactors = FALSE
) )
# save to package env to save time for next time # save to package env to save time for next time
pkg_env$mo_previously_coerced <- unique(rbind(pkg_env$mo_previously_coerced, AMR_env$mo_previously_coerced <- unique(rbind(AMR_env$mo_previously_coerced,
data.frame( data.frame(
x = paste(x_search, minimum_matching_score), x = paste(x_search, minimum_matching_score),
mo = result_mo, mo = result_mo,
@ -334,21 +341,21 @@ as.mo <- function(x,
out[is.na(out)] <- x_coerced[match(x[is.na(out)], x_unique)] out[is.na(out)] <- x_coerced[match(x[is.na(out)], x_unique)]
# Throw note about uncertainties ---- # Throw note about uncertainties ----
if (isTRUE(info) && NROW(pkg_env$mo_uncertainties) > 0) { if (isTRUE(info) && NROW(AMR_env$mo_uncertainties) > 0) {
if (message_not_thrown_before("as.mo", "uncertainties", pkg_env$mo_uncertainties$original_input)) { if (message_not_thrown_before("as.mo", "uncertainties", AMR_env$mo_uncertainties$original_input)) {
plural <- c("", "this") plural <- c("", "this")
if (length(pkg_env$mo_uncertainties$original_input) > 1) { if (length(AMR_env$mo_uncertainties$original_input) > 1) {
plural <- c("s", "these uncertainties") plural <- c("s", "these uncertainties")
} }
if (length(pkg_env$mo_uncertainties$original_input) <= 3) { if (length(AMR_env$mo_uncertainties$original_input) <= 3) {
examples <- vector_and(paste0( examples <- vector_and(paste0(
'"', pkg_env$mo_uncertainties$original_input, '"', AMR_env$mo_uncertainties$original_input,
'" (assumed ', font_italic(pkg_env$mo_uncertainties$fullname, collapse = NULL), ")" '" (assumed ', font_italic(AMR_env$mo_uncertainties$fullname, collapse = NULL), ")"
), ),
quotes = FALSE quotes = FALSE
) )
} else { } else {
examples <- paste0(nr2char(length(pkg_env$mo_uncertainties$original_input)), " microorganism", plural[1]) examples <- paste0(nr2char(length(AMR_env$mo_uncertainties$original_input)), " microorganism", plural[1])
} }
msg <- paste0( msg <- paste0(
"Microorganism translation was uncertain for ", examples, "Microorganism translation was uncertain for ", examples,
@ -364,18 +371,18 @@ as.mo <- function(x,
gbif_matches[!gbif_matches %in% AMR::microorganisms$gbif] <- NA gbif_matches[!gbif_matches %in% AMR::microorganisms$gbif] <- NA
lpsn_matches <- AMR::microorganisms$lpsn_renamed_to[match(out, AMR::microorganisms$mo)] lpsn_matches <- AMR::microorganisms$lpsn_renamed_to[match(out, AMR::microorganisms$mo)]
lpsn_matches[!lpsn_matches %in% AMR::microorganisms$lpsn] <- NA lpsn_matches[!lpsn_matches %in% AMR::microorganisms$lpsn] <- NA
pkg_env$mo_renamed <- list(old = out[!is.na(gbif_matches) | !is.na(lpsn_matches)], AMR_env$mo_renamed <- list(old = out[!is.na(gbif_matches) | !is.na(lpsn_matches)],
gbif_matches = gbif_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)], gbif_matches = gbif_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)],
lpsn_matches = lpsn_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)]) lpsn_matches = lpsn_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)])
if (isFALSE(keep_synonyms)) { if (isFALSE(keep_synonyms)) {
out[which(!is.na(gbif_matches))] <- AMR::microorganisms$mo[match(gbif_matches[which(!is.na(gbif_matches))], AMR::microorganisms$gbif)] out[which(!is.na(gbif_matches))] <- AMR::microorganisms$mo[match(gbif_matches[which(!is.na(gbif_matches))], AMR::microorganisms$gbif)]
out[which(!is.na(lpsn_matches))] <- AMR::microorganisms$mo[match(lpsn_matches[which(!is.na(lpsn_matches))], AMR::microorganisms$lpsn)] out[which(!is.na(lpsn_matches))] <- AMR::microorganisms$mo[match(lpsn_matches[which(!is.na(lpsn_matches))], AMR::microorganisms$lpsn)]
if (isTRUE(info) && length(pkg_env$mo_renamed$old) > 0) { if (isTRUE(info) && length(AMR_env$mo_renamed$old) > 0) {
print(mo_renamed(), extra_txt = " (use `keep_synonyms = TRUE` to leave uncorrected)") print(mo_renamed(), extra_txt = " (use `keep_synonyms = TRUE` to leave uncorrected)")
} }
} else if (is.null(getOption("AMR_keep_synonyms")) && length(pkg_env$mo_renamed$old) > 0 && message_not_thrown_before("as.mo", "keep_synonyms_warning", entire_session = TRUE)) { } else if (is.null(getOption("AMR_keep_synonyms")) && length(AMR_env$mo_renamed$old) > 0 && message_not_thrown_before("as.mo", "keep_synonyms_warning", entire_session = TRUE)) {
# keep synonyms is TRUE, so check if any do have synonyms # keep synonyms is TRUE, so check if any do have synonyms
warning_("Function `as.mo()` returned some old taxonomic names. Use `as.mo(..., keep_synonyms = FALSE)` to clean the input to currently accepted taxonomic names, or set the R option `AMR_keep_synonyms` to `FALSE`. This warning will be shown once per session.") warning_("Function `as.mo()` returned ", nr2char(length(unique(AMR_env$mo_renamed$old))), " old taxonomic name", ifelse(length(unique(AMR_env$mo_renamed$old)) > 1, "s", ""), ". Use `as.mo(..., keep_synonyms = FALSE)` to clean the input to currently accepted taxonomic names, or set the R option `AMR_keep_synonyms` to `FALSE`. This warning will be shown once per session.")
} }
# Apply Becker ---- # Apply Becker ----
@ -432,7 +439,10 @@ as.mo <- function(x,
# All unknowns ---- # All unknowns ----
out[is.na(out) & !is.na(x)] <- "UNKNOWN" out[is.na(out) & !is.na(x)] <- "UNKNOWN"
pkg_env$mo_failures <- unique(x[out == "UNKNOWN" & x != "UNKNOWN" & !is.na(x)]) AMR_env$mo_failures <- unique(x[out == "UNKNOWN" & x != "UNKNOWN" & !is.na(x)])
if (length(AMR_env$mo_failures) > 0) {
warning_("The following input could not be coerced and was returned as \"UNKNOWN\": ", vector_and(AMR_env$mo_failures, quotes = TRUE), ".\nYou can retrieve this list with `mo_failures()`.")
}
# Return class ---- # Return class ----
set_clean_class(out, set_clean_class(out,
@ -440,12 +450,73 @@ as.mo <- function(x,
) )
} }
# OTHER DOCUMENTED FUNCTIONS ----------------------------------------------
#' @rdname as.mo #' @rdname as.mo
#' @export #' @export
is.mo <- function(x) { is.mo <- function(x) {
inherits(x, "mo") inherits(x, "mo")
} }
#' @rdname as.mo
#' @export
mo_uncertainties <- function() {
set_clean_class(AMR_env$mo_uncertainties, new_class = c("mo_uncertainties", "data.frame"))
}
#' @rdname as.mo
#' @export
mo_renamed <- function() {
x <- AMR_env$mo_renamed
x$new <- synonym_mo_to_accepted_mo(x$old)
mo_old <- AMR::microorganisms$fullname[match(x$old, AMR::microorganisms$mo)]
mo_new <- AMR::microorganisms$fullname[match(x$new, AMR::microorganisms$mo)]
ref_old <- AMR::microorganisms$ref[match(x$old, AMR::microorganisms$mo)]
ref_new <- AMR::microorganisms$ref[match(x$new, AMR::microorganisms$mo)]
df_renamed <- data.frame(old = mo_old,
new = mo_new,
ref_old = ref_old,
ref_new = ref_new,
stringsAsFactors = FALSE)
df_renamed <- unique(df_renamed)
df_renamed <- df_renamed[order(df_renamed$old), , drop = FALSE]
set_clean_class(df_renamed, new_class = c("mo_renamed", "data.frame"))
}
#' @rdname as.mo
#' @export
mo_failures <- function() {
AMR_env$mo_failures
}
#' @rdname as.mo
#' @export
mo_reset_session <- function() {
if (NROW(AMR_env$mo_previously_coerced) > 0) {
message_("Reset ", nr2char(NROW(AMR_env$mo_previously_coerced)), " previously matched input value", ifelse(NROW(AMR_env$mo_previously_coerced) > 1, "s", ""), ".")
AMR_env$mo_previously_coerced <- AMR_env$mo_previously_coerced[0, , drop = FALSE]
AMR_env$mo_uncertainties <- AMR_env$mo_uncertainties[0, , drop = FALSE]
} else {
message_("No previously matched input values to reset.")
}
}
#' @rdname as.mo
#' @export
mo_cleaning_regex <- function() {
paste0(
"(",
"[^A-Za-z- \\(\\)\\[\\]{}]+",
"|",
"([({]|\\[).+([})]|\\])",
"|",
"(^| )(e?spp|e?ssp|e?ss|e?sp|e?subsp|sube?species|biovar|biotype|serovar|e?species)[.]*( |$))")
}
# UNDOCUMENTED METHODS ----------------------------------------------------
# will be exported using s3_register() in R/zzz.R # will be exported using s3_register() in R/zzz.R
pillar_shaft.mo <- function(x, ...) { pillar_shaft.mo <- function(x, ...) {
out <- format(x) out <- format(x)
@ -675,18 +746,6 @@ rep.mo <- function(x, ...) {
y y
} }
#' @rdname as.mo
#' @export
mo_failures <- function() {
pkg_env$mo_failures
}
#' @rdname as.mo
#' @export
mo_uncertainties <- function() {
set_clean_class(pkg_env$mo_uncertainties, new_class = c("mo_uncertainties", "data.frame"))
}
#' @method print mo_uncertainties #' @method print mo_uncertainties
#' @export #' @export
#' @noRd #' @noRd
@ -768,7 +827,13 @@ print.mo_uncertainties <- function(x, ...) {
), ),
collapse = "\n" collapse = "\n"
), ),
# Add "Based on {input}" text if it differs from the original input
ifelse(x[i, ]$original_input != x[i, ]$input, paste0(strrep(" ", nchar(x[i, ]$original_input) + 6), "Based on input \"", x[i, ]$input, "\""), ""), ifelse(x[i, ]$original_input != x[i, ]$input, paste0(strrep(" ", nchar(x[i, ]$original_input) + 6), "Based on input \"", x[i, ]$input, "\""), ""),
# Add note if result was coerced to accepted taxonomic name
ifelse(x[i, ]$keep_synonyms == FALSE & x[i, ]$mo %in% AMR::microorganisms$mo[which(AMR::microorganisms$status == "synonym")],
paste0(strrep(" ", nchar(x[i, ]$original_input) + 6),
font_red(paste0("This old taxonomic name was converted to ", font_italic(AMR::microorganisms$fullname[match(synonym_mo_to_accepted_mo(x[i, ]$mo), AMR::microorganisms$mo)], collapse = NULL), " (", synonym_mo_to_accepted_mo(x[i, ]$mo), ")."), collapse = NULL)),
""),
candidates, candidates,
sep = "\n" sep = "\n"
) )
@ -777,30 +842,6 @@ print.mo_uncertainties <- function(x, ...) {
cat(txt) cat(txt)
} }
#' @rdname as.mo
#' @export
mo_renamed <- function() {
x <- pkg_env$mo_renamed
x$new <- ifelse(is.na(x$lpsn_matches),
AMR::microorganisms$mo[match(x$gbif_matches, AMR::microorganisms$gbif)],
AMR::microorganisms$mo[match(x$lpsn_matches, AMR::microorganisms$lpsn)])
mo_old <- AMR::microorganisms$fullname[match(x$old, AMR::microorganisms$mo)]
mo_new <- AMR::microorganisms$fullname[match(x$new, AMR::microorganisms$mo)]
ref_old <- AMR::microorganisms$ref[match(x$old, AMR::microorganisms$mo)]
ref_new <- AMR::microorganisms$ref[match(x$new, AMR::microorganisms$mo)]
df_renamed <- data.frame(old = mo_old,
new = mo_new,
ref_old = ref_old,
ref_new = ref_new,
stringsAsFactors = FALSE)
df_renamed <- unique(df_renamed)
df_renamed <- df_renamed[order(df_renamed$old), , drop = FALSE]
set_clean_class(df_renamed, new_class = c("mo_renamed", "data.frame"))
}
#' @method print mo_renamed #' @method print mo_renamed
#' @export #' @export
#' @noRd #' @noRd
@ -812,6 +853,8 @@ print.mo_renamed <- function(x, extra_txt = "", n = 25, ...) {
x$ref_old[!is.na(x$ref_old)] <- paste0(" (", gsub("et al.", font_italic("et al."), x$ref_old[!is.na(x$ref_old)], fixed = TRUE), ")") x$ref_old[!is.na(x$ref_old)] <- paste0(" (", gsub("et al.", font_italic("et al."), x$ref_old[!is.na(x$ref_old)], fixed = TRUE), ")")
x$ref_new[!is.na(x$ref_new)] <- paste0(" (", gsub("et al.", font_italic("et al."), x$ref_new[!is.na(x$ref_new)], fixed = TRUE), ")") x$ref_new[!is.na(x$ref_new)] <- paste0(" (", gsub("et al.", font_italic("et al."), x$ref_new[!is.na(x$ref_new)], fixed = TRUE), ")")
x$ref_old[is.na(x$ref_old)] <- " (author unknown)"
x$ref_new[is.na(x$ref_new)] <- " (author unknown)"
rows <- seq_len(min(NROW(x), n)) rows <- seq_len(min(NROW(x), n))
@ -825,28 +868,57 @@ print.mo_renamed <- function(x, extra_txt = "", n = 25, ...) {
) )
} }
#' @rdname as.mo # UNDOCUMENTED HELPER FUNCTIONS -------------------------------------------
#' @export
mo_reset_session <- function() {
if (NROW(pkg_env$mo_previously_coerced) > 0) {
message_("Reset ", NROW(pkg_env$mo_previously_coerced), " previously matched input values.")
pkg_env$mo_previously_coerced <- pkg_env$mo_previously_coerced[0, , drop = FALSE]
pkg_env$mo_uncertainties <- pkg_env$mo_uncertainties[0, , drop = FALSE]
} else {
message_("No previously matched input values to reset.")
}
}
#' @rdname as.mo convert_colloquial_input <- function(x) {
#' @export x.bak <- trimws2(x)
mo_cleaning_regex <- function() { x <- trimws2(tolower(x))
paste0( out <- rep(NA_character_, length(x))
"(",
"[^A-Za-z- \\(\\)\\[\\]{}]+", # Streptococci, like GBS = Group B Streptococci (B_STRPT_GRPB)
"|", out[x %like_case% "^g[abcdfghkl]s$"] <- gsub("g([abcdfghkl])s",
"([({]|\\[).+([})]|\\])", "B_STRPT_GRP\\U\\1",
"|", x[x %like_case% "^g[abcdfghkl]s$"],
"(^| )(e?spp|e?ssp|e?ss|e?sp|e?subsp|sube?species|biovar|biotype|serovar|e?species)( |$))") perl = TRUE)
# Streptococci in different languages, like "estreptococos grupo B"
out[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"] <- gsub(".*e?strepto[ck]o[ck].* ([abcdfghkl])$",
"B_STRPT_GRP\\U\\1",
x[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"],
perl = TRUE)
out[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"] <- gsub(".*group ([abcdfghkl]) strepto[ck]o[ck].*",
"B_STRPT_GRP\\U\\1",
x[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"],
perl = TRUE)
out[x %like_case% "ha?emoly.*strep"] <- "B_STRPT_HAEM"
out[x %like_case% "(strepto.* mil+er+i|^mgs[^a-z]*$)"] <- "B_STRPT_MILL"
out[x %like_case% "mil+er+i gr"] <- "B_STRPT_MILL"
out[x %like_case% "((strepto|^s).* viridans|^vgs[^a-z]*$)"] <- "B_STRPT_VIRI"
# CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese)
out[x %like_case% "([ck]oagulas[ea].negatie?[vf]|^[ck]o?ns[^a-z]*$)"] <- "B_STPHY_CONS"
out[x %like_case% "([ck]oagulas[ea].positie?[vf]|^[ck]o?ps[^a-z]*$)"] <- "B_STPHY_COPS"
# Gram stains
out[x %like_case% "gram[ -]?neg.*|negatie?[vf]"] <- "B_GRAMN"
out[x %like_case% "gram[ -]?pos.*|positie?[vf]"] <- "B_GRAMP"
# yeasts and fungi
out[x %like_case% "^yeast?"] <- "F_YEAST"
out[x %like_case% "^fung(us|i)"] <- "F_FUNGUS"
# Salmonella city names, starting with capital species name - they are all S. enterica
out[x.bak %like_case% "[sS]almonella [A-Z][a-z]+ ?.*" & x %unlike% "typhi"] <- "B_SLMNL_ENTR"
out[x %like_case% "salmonella group"] <- "B_SLMNL"
# trivial names known to the field
out[x %like_case% "meningo[ck]o[ck]"] <- "B_NESSR_MNNG"
out[x %like_case% "gono[ck]o[ck]"] <- "B_NESSR_GNRR"
out[x %like_case% "pneumo[ck]o[ck]"] <- "B_STRPT_PNMN"
# unexisting names (xxx and con are WHONET codes)
out[x %in% c("con", "other", "none", "unknown") | x %like_case% "virus"] <- "UNKNOWN"
out
} }
nr2char <- function(x) { nr2char <- function(x) {
@ -861,17 +933,6 @@ nr2char <- function(x) {
} }
} }
get_mo_uncertainties <- function() {
remember <- list(uncertainties = pkg_env$mo_uncertainties)
# empty them, otherwise e.g. mo_shortname("Chlamydophila psittaci") will give 3 notes
pkg_env$mo_uncertainties <- NULL
remember
}
load_mo_uncertainties <- function(metadata) {
pkg_env$mo_uncertainties <- metadata$uncertainties
}
parse_and_convert <- function(x) { parse_and_convert <- function(x) {
if (tryCatch(is.character(x) && all(Encoding(x) == "unknown", na.rm = TRUE), error = function(e) FALSE)) { if (tryCatch(is.character(x) && all(Encoding(x) == "unknown", na.rm = TRUE), error = function(e) FALSE)) {
return(trimws2(x)) return(trimws2(x))
@ -1008,51 +1069,24 @@ repair_reference_df <- function(reference_df) {
reference_df reference_df
} }
convert_colloquial_input <- function(x) { get_mo_uncertainties <- function() {
x.bak <- trimws2(x) remember <- list(uncertainties = AMR_env$mo_uncertainties)
x <- trimws2(tolower(x)) # empty them, otherwise e.g. mo_shortname("Chlamydophila psittaci") will give 3 notes
out <- rep(NA_character_, length(x)) AMR_env$mo_uncertainties <- NULL
remember
# Streptococci, like GBS = Group B Streptococci (B_STRPT_GRPB) }
out[x %like_case% "^g[abcdfghkl]s$"] <- gsub("g([abcdfghkl])s",
"B_STRPT_GRP\\U\\1", load_mo_uncertainties <- function(metadata) {
x[x %like_case% "^g[abcdfghkl]s$"], AMR_env$mo_uncertainties <- metadata$uncertainties
perl = TRUE) }
# Streptococci in different languages, like "estreptococos grupo B"
out[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"] <- gsub(".*e?strepto[ck]o[ck].* ([abcdfghkl])$", synonym_mo_to_accepted_mo <- function(x) {
"B_STRPT_GRP\\U\\1", x_gbif <- AMR::microorganisms$gbif_renamed_to[match(x, AMR::microorganisms$mo)]
x[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"], x_lpsn <- AMR::microorganisms$lpsn_renamed_to[match(x, AMR::microorganisms$mo)]
perl = TRUE) x_gbif[!x_gbif %in% AMR::microorganisms$gbif] <- NA
out[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"] <- gsub(".*group ([abcdfghkl]) strepto[ck]o[ck].*", x_lpsn[!x_lpsn %in% AMR::microorganisms$lpsn] <- NA
"B_STRPT_GRP\\U\\1",
x[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"], ifelse(is.na(x_lpsn),
perl = TRUE) AMR::microorganisms$mo[match(x_gbif, AMR::microorganisms$gbif)],
out[x %like_case% "ha?emoly.*strep"] <- "B_STRPT_HAEM" AMR::microorganisms$mo[match(x_lpsn, AMR::microorganisms$lpsn)])
out[x %like_case% "(strepto.* mil+er+i|^mgs[^a-z]*$)"] <- "B_STRPT_MILL"
out[x %like_case% "((strepto|^s).* viridans|^vgs[^a-z]*$)"] <- "B_STRPT_VIRI"
# CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese)
out[x %like_case% "([ck]oagulas[ea].negatie?[vf]|^[ck]o?ns[^a-z]*$)"] <- "B_STPHY_CONS"
out[x %like_case% "([ck]oagulas[ea].positie?[vf]|^[ck]o?ps[^a-z]*$)"] <- "B_STPHY_COPS"
# Gram stains
out[x %like_case% "gram[ -]?neg.*|negatie?[vf]"] <- "B_GRAMN"
out[x %like_case% "gram[ -]?pos.*|positie?[vf]"] <- "B_GRAMP"
# yeasts and fungi
out[x %like_case% "^yeast?"] <- "F_YEAST"
out[x %like_case% "^fung(us|i)"] <- "F_FUNGUS"
# Salmonella city names, starting with capital species name - they are all S. enterica
out[x.bak %like_case% "[sS]almonella [A-Z][a-z]+ ?.*" & x %unlike% "typhi"] <- "B_SLMNL_ENTR"
# trivial names known to the field
out[x %like_case% "meningo[ck]o[ck]"] <- "B_NESSR_MNNG"
out[x %like_case% "gono[ck]o[ck]"] <- "B_NESSR_GNRR"
out[x %like_case% "pneumo[ck]o[ck]"] <- "B_STRPT_PNMN"
# unexisting names (xxx and con are WHONET codes)
out[x %in% c("con", "other", "none", "unknown") | x %like_case% "virus"] <- "UNKNOWN"
out
} }

View File

@ -80,6 +80,9 @@ mo_matching_score <- function(x, n) {
# only keep one space # only keep one space
x <- gsub(" +", " ", x) x <- gsub(" +", " ", x)
# start with a capital letter
substr(x, 1, 1) <- toupper(substr(x, 1, 1))
# n is always a taxonomically valid full name # n is always a taxonomically valid full name
if (length(n) == 1) { if (length(n) == 1) {
n <- rep(n, length(x)) n <- rep(n, length(x))

View File

@ -33,13 +33,13 @@
#' @param x any [character] (vector) that can be coerced to a valid microorganism code with [as.mo()]. Can be left blank for auto-guessing the column containing microorganism codes if used in a data set, see *Examples*. #' @param x any [character] (vector) that can be coerced to a valid microorganism code with [as.mo()]. Can be left blank for auto-guessing the column containing microorganism codes if used in a data set, see *Examples*.
#' @param property one of the column names of the [microorganisms] data set: `r vector_or(colnames(microorganisms), sort = FALSE, quotes = TRUE)`, or must be `"shortname"` #' @param property one of the column names of the [microorganisms] data set: `r vector_or(colnames(microorganisms), sort = FALSE, quotes = TRUE)`, or must be `"shortname"`
#' @inheritParams as.mo #' @inheritParams as.mo
#' @param ... other arguments passed on to [as.mo()], such as 'allow_uncertain' and 'ignore_pattern' #' @param ... other arguments passed on to [as.mo()], such as 'minimum_matching_score', 'ignore_pattern', and 'remove_from_input'
#' @param ab any (vector of) text that can be coerced to a valid antibiotic code with [as.ab()] #' @param ab any (vector of) text that can be coerced to a valid antibiotic code with [as.ab()]
#' @param open browse the URL using [`browseURL()`][utils::browseURL()] #' @param open browse the URL using [`browseURL()`][utils::browseURL()]
#' @details All functions will, at default, keep old taxonomic properties. Please refer to this example, knowing that *Escherichia blattae* was renamed to *Shimwellia blattae* in 2010: #' @details All functions will, at default, keep old taxonomic properties. Please refer to this example, knowing that *Escherichia blattae* was renamed to *Shimwellia blattae* in 2010:
#' - `mo_name("Escherichia blattae")` will return `"Shimwellia blattae"` (with a message about the renaming) #' - `mo_name("Escherichia blattae")` will return `"Shimwellia blattae"` (with a message about the renaming)
#' - `mo_ref("Escherichia blattae")` will return `"Burgess et al., 1973"` (with a message about the renaming) #' - `mo_ref("Escherichia blattae", keep_synonyms = TRUE)` will return `"Burgess et al., 1973"` (with a warning about the renaming)
#' - `mo_ref("Shimwellia blattae")` will return `"Priest et al., 2010"` (without a message) #' - `mo_ref("Shimwellia blattae", keep_synonyms = FALSE)` will return `"Priest et al., 2010"` (without a message)
#' #'
#' The short name - [mo_shortname()] - almost always returns the first character of the genus and the full species, like `"E. coli"`. Exceptions are abbreviations of staphylococci (such as *"CoNS"*, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (such as *"GBS"*, Group B Streptococci). Please bear in mind that e.g. *E. coli* could mean *Escherichia coli* (kingdom of Bacteria) as well as *Entamoeba coli* (kingdom of Protozoa). Returning to the full name will be done using [as.mo()] internally, giving priority to bacteria and human pathogens, i.e. `"E. coli"` will be considered *Escherichia coli*. In other words, `mo_fullname(mo_shortname("Entamoeba coli"))` returns `"Escherichia coli"`. #' The short name - [mo_shortname()] - almost always returns the first character of the genus and the full species, like `"E. coli"`. Exceptions are abbreviations of staphylococci (such as *"CoNS"*, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (such as *"GBS"*, Group B Streptococci). Please bear in mind that e.g. *E. coli* could mean *Escherichia coli* (kingdom of Bacteria) as well as *Entamoeba coli* (kingdom of Protozoa). Returning to the full name will be done using [as.mo()] internally, giving priority to bacteria and human pathogens, i.e. `"E. coli"` will be considered *Escherichia coli*. In other words, `mo_fullname(mo_shortname("Entamoeba coli"))` returns `"Escherichia coli"`.
#' #'
@ -504,7 +504,7 @@ mo_is_intrinsic_resistant <- function(x, ab, language = get_AMR_locale(), keep_s
stop_("length of `x` and `ab` must be equal, or one of them must be of length 1.") stop_("length of `x` and `ab` must be equal, or one of them must be of length 1.")
} }
# show used version number once per session (pkg_env will reload every session) # show used version number once per session (AMR_env will reload every session)
if (message_not_thrown_before("mo_is_intrinsic_resistant", "version.mo", entire_session = TRUE)) { if (message_not_thrown_before("mo_is_intrinsic_resistant", "version.mo", entire_session = TRUE)) {
message_( message_(
"Determining intrinsic resistance based on ", "Determining intrinsic resistance based on ",

View File

@ -134,7 +134,7 @@ set_mo_source <- function(path, destination = getOption("AMR_mo_source", "~/mo_s
stop_ifnot(interactive(), "this function can only be used in interactive mode, since it must ask for the user's permission to write a file to their file system.") stop_ifnot(interactive(), "this function can only be used in interactive mode, since it must ask for the user's permission to write a file to their file system.")
if (is.null(path) || path %in% c(FALSE, "")) { if (is.null(path) || path %in% c(FALSE, "")) {
pkg_env$mo_source <- NULL AMR_env$mo_source <- NULL
if (file.exists(mo_source_destination)) { if (file.exists(mo_source_destination)) {
unlink(mo_source_destination) unlink(mo_source_destination)
message_("Removed mo_source file '", font_bold(mo_source_destination), "'", message_("Removed mo_source file '", font_bold(mo_source_destination), "'",
@ -227,7 +227,7 @@ set_mo_source <- function(path, destination = getOption("AMR_mo_source", "~/mo_s
attr(df, "mo_source_destination") <- mo_source_destination attr(df, "mo_source_destination") <- mo_source_destination
attr(df, "mo_source_timestamp") <- file.mtime(path) attr(df, "mo_source_timestamp") <- file.mtime(path)
saveRDS(df, mo_source_destination) saveRDS(df, mo_source_destination)
pkg_env$mo_source <- df AMR_env$mo_source <- df
message_( message_(
action, " mo_source file '", font_bold(mo_source_destination), action, " mo_source file '", font_bold(mo_source_destination),
"' (", formatted_filesize(mo_source_destination), "' (", formatted_filesize(mo_source_destination),
@ -247,24 +247,24 @@ get_mo_source <- function(destination = getOption("AMR_mo_source", "~/mo_source.
} }
return(NULL) return(NULL)
} }
if (is.null(pkg_env$mo_source)) { if (is.null(AMR_env$mo_source)) {
pkg_env$mo_source <- readRDS(path.expand(destination)) AMR_env$mo_source <- readRDS(path.expand(destination))
} }
old_time <- attributes(pkg_env$mo_source)$mo_source_timestamp old_time <- attributes(AMR_env$mo_source)$mo_source_timestamp
new_time <- file.mtime(attributes(pkg_env$mo_source)$mo_source_location) new_time <- file.mtime(attributes(AMR_env$mo_source)$mo_source_location)
if (interactive() && !identical(old_time, new_time)) { if (interactive() && !identical(old_time, new_time)) {
# source file was updated, also update reference # source file was updated, also update reference
set_mo_source(attributes(pkg_env$mo_source)$mo_source_location) set_mo_source(attributes(AMR_env$mo_source)$mo_source_location)
} }
pkg_env$mo_source AMR_env$mo_source
} }
check_validity_mo_source <- function(x, refer_to_name = "`reference_df`", stop_on_error = TRUE) { check_validity_mo_source <- function(x, refer_to_name = "`reference_df`", stop_on_error = TRUE) {
if (paste(deparse(substitute(x)), collapse = "") == "get_mo_source()") { if (paste(deparse(substitute(x)), collapse = "") == "get_mo_source()") {
return(TRUE) return(TRUE)
} }
if (is.null(pkg_env$mo_source) && (identical(x, get_mo_source()))) { if (is.null(AMR_env$mo_source) && (identical(x, get_mo_source()))) {
return(TRUE) return(TRUE)
} }
if (is.null(x)) { if (is.null(x)) {

10
R/rsi.R
View File

@ -906,8 +906,8 @@ as_rsi_method <- function(method_short,
} }
# write to verbose output # write to verbose output
pkg_env$rsi_interpretation_history <- rbind( AMR_env$rsi_interpretation_history <- rbind(
pkg_env$rsi_interpretation_history, AMR_env$rsi_interpretation_history,
data.frame( data.frame(
datetime = Sys.time(), datetime = Sys.time(),
index = i, index = i,
@ -964,7 +964,7 @@ as_rsi_method <- function(method_short,
rsi_interpretation_history <- function(clean = FALSE) { rsi_interpretation_history <- function(clean = FALSE) {
meet_criteria(clean, allow_class = "logical", has_length = 1) meet_criteria(clean, allow_class = "logical", has_length = 1)
out.bak <- pkg_env$rsi_interpretation_history out.bak <- AMR_env$rsi_interpretation_history
out <- out.bak out <- out.bak
if (NROW(out) == 0) { if (NROW(out) == 0) {
message_("No results to return. Run `as.rsi()` on MIC values or disk diffusion zones first to see a 'logbook' data set here.") message_("No results to return. Run `as.rsi()` on MIC values or disk diffusion zones first to see a 'logbook' data set here.")
@ -975,9 +975,9 @@ rsi_interpretation_history <- function(clean = FALSE) {
out$interpretation <- as.rsi(out$interpretation) out$interpretation <- as.rsi(out$interpretation)
# keep stored for next use # keep stored for next use
if (isTRUE(clean)) { if (isTRUE(clean)) {
pkg_env$rsi_interpretation_history <- pkg_env$rsi_interpretation_history[0, , drop = FALSE] AMR_env$rsi_interpretation_history <- AMR_env$rsi_interpretation_history[0, , drop = FALSE]
} else { } else {
pkg_env$rsi_interpretation_history <- out.bak AMR_env$rsi_interpretation_history <- out.bak
} }
if (pkg_is_available("tibble", also_load = FALSE)) { if (pkg_is_available("tibble", also_load = FALSE)) {

19
R/zzz.R
View File

@ -28,23 +28,24 @@
# ==================================================================== # # ==================================================================== #
# set up package environment, used by numerous AMR functions # set up package environment, used by numerous AMR functions
pkg_env <- new.env(hash = FALSE) AMR_env <- new.env(hash = FALSE)
pkg_env$mo_uncertainties <- data.frame( AMR_env$mo_uncertainties <- data.frame(
uncertainty = integer(0),
original_input = character(0), original_input = character(0),
input = character(0), input = character(0),
fullname = character(0), fullname = character(0),
mo = character(0), mo = character(0),
candidates = character(0), candidates = character(0),
minimum_matching_score = integer(0),
keep_synonyms = logical(0),
stringsAsFactors = FALSE stringsAsFactors = FALSE
) )
pkg_env$mo_renamed <- list() AMR_env$mo_renamed <- list()
pkg_env$mo_previously_coerced <- data.frame( AMR_env$mo_previously_coerced <- data.frame(
x = character(0), x = character(0),
mo = character(0), mo = character(0),
stringsAsFactors = FALSE stringsAsFactors = FALSE
) )
pkg_env$rsi_interpretation_history <- data.frame( AMR_env$rsi_interpretation_history <- data.frame(
datetime = Sys.time()[0], datetime = Sys.time()[0],
index = integer(0), index = integer(0),
ab_input = character(0), ab_input = character(0),
@ -60,7 +61,7 @@ pkg_env$rsi_interpretation_history <- data.frame(
interpretation = character(0), interpretation = character(0),
stringsAsFactors = FALSE stringsAsFactors = FALSE
) )
pkg_env$has_data.table <- pkg_is_available("data.table", also_load = FALSE) AMR_env$has_data.table <- pkg_is_available("data.table", also_load = FALSE)
# determine info icon for messages # determine info icon for messages
utf8_supported <- isTRUE(base::l10n_info()$`UTF-8`) utf8_supported <- isTRUE(base::l10n_info()$`UTF-8`)
@ -69,9 +70,9 @@ is_latex <- tryCatch(import_fn("is_latex_output", "knitr", error_on_fail = FALSE
) )
if (utf8_supported && !is_latex) { if (utf8_supported && !is_latex) {
# \u2139 is a symbol officially named 'information source' # \u2139 is a symbol officially named 'information source'
pkg_env$info_icon <- "\u2139" AMR_env$info_icon <- "\u2139"
} else { } else {
pkg_env$info_icon <- "i" AMR_env$info_icon <- "i"
} }
.onLoad <- function(lib, pkg) { .onLoad <- function(lib, pkg) {

View File

@ -109,7 +109,7 @@ read_EUCAST <- function(sheet, file, guideline_name) {
for (i in seq_len(length(x))) { for (i in seq_len(length(x))) {
y <- trimws2(unlist(strsplit(x[i], "(,|and)"))) y <- trimws2(unlist(strsplit(x[i], "(,|and)")))
y <- trimws2(gsub("[(].*[)]", "", y)) y <- trimws2(gsub("[(].*[)]", "", y))
y <- suppressWarnings(suppressMessages(as.mo(y, allow_uncertain = FALSE))) y <- suppressWarnings(suppressMessages(as.mo(y)))
if (!is.null(mo_uncertainties())) uncertainties <<- add_uncertainties(uncertainties, mo_uncertainties()) if (!is.null(mo_uncertainties())) uncertainties <<- add_uncertainties(uncertainties, mo_uncertainties())
y <- y[!is.na(y) & y != "UNKNOWN"] y <- y[!is.na(y) & y != "UNKNOWN"]
x[i] <- paste(y, collapse = "|") x[i] <- paste(y, collapse = "|")

Binary file not shown.

View File

@ -40,7 +40,7 @@ expect_equal(as.character(as.mo("Escherichia coli")), "B_ESCHR_COLI")
expect_equal(as.character(as.mo(112283007)), "B_ESCHR_COLI") expect_equal(as.character(as.mo(112283007)), "B_ESCHR_COLI")
expect_equal(as.character(as.mo("Escherichia species")), "B_ESCHR") expect_equal(as.character(as.mo("Escherichia species")), "B_ESCHR")
expect_equal(as.character(as.mo("Escherichia")), "B_ESCHR") expect_equal(as.character(as.mo("Escherichia")), "B_ESCHR")
expect_equal(as.character(as.mo("Esch spp.")), "B_ESCHR") expect_equal(as.character(as.mo("Eschr spp.")), "B_ESCHR")
expect_equal(as.character(as.mo(" B_ESCHR_COLI ")), "B_ESCHR_COLI") expect_equal(as.character(as.mo(" B_ESCHR_COLI ")), "B_ESCHR_COLI")
expect_equal(as.character(as.mo("e coli")), "B_ESCHR_COLI") # not Campylobacter expect_equal(as.character(as.mo("e coli")), "B_ESCHR_COLI") # not Campylobacter
expect_equal(as.character(as.mo("klpn")), "B_KLBSL_PNMN") expect_equal(as.character(as.mo("klpn")), "B_KLBSL_PNMN")
@ -53,9 +53,8 @@ expect_equal(as.character(as.mo("Strepto")), "B_STRPT")
expect_equal(as.character(as.mo("Streptococcus")), "B_STRPT") # not Peptostreptoccus expect_equal(as.character(as.mo("Streptococcus")), "B_STRPT") # not Peptostreptoccus
expect_equal(as.character(as.mo("Estreptococos grupo B")), "B_STRPT_GRPB") expect_equal(as.character(as.mo("Estreptococos grupo B")), "B_STRPT_GRPB")
expect_equal(as.character(as.mo("Group B Streptococci")), "B_STRPT_GRPB") expect_equal(as.character(as.mo("Group B Streptococci")), "B_STRPT_GRPB")
expect_equal(as.character(as.mo(c("mycobacterie", "mycobakterium"))), c("B_MYCBC", "B_MYCBC"))
expect_equal(as.character(as.mo(c("GAS", "GBS", "a MGS", "haemoly strep"))), c("B_STRPT_GRPA", "B_STRPT_GRPB", "B_STRPT_MILL", "B_STRPT_HAEM")) expect_equal(as.character(as.mo(c("GAS", "GBS", "haemoly strep"))), c("B_STRPT_GRPA", "B_STRPT_GRPB", "B_STRPT_HAEM"))
expect_equal(as.character(as.mo("S. pyo")), "B_STRPT_PYGN") # not Actinomyces pyogenes expect_equal(as.character(as.mo("S. pyo")), "B_STRPT_PYGN") # not Actinomyces pyogenes
@ -90,14 +89,13 @@ expect_identical(
"staaur", "staaur",
"S. aureus", "S. aureus",
"S aureus", "S aureus",
"Sthafilokkockus aureeuzz", "Sthafilokkockus aureus",
"Staphylococcus aureus", "Staphylococcus aureus",
"MRSA", "MRSA",
"VISA", "VISA"
"meth.-resis. S. aureus (MRSA)" ), minimum_matching_score = 0)
))
)), )),
rep("B_STPHY_AURS", 10) rep("B_STPHY_AURS", 9)
) )
expect_identical( expect_identical(
as.character( as.character(
@ -148,8 +146,8 @@ expect_identical(as.character(as.mo("STCPYO", Lancefield = TRUE)), "B_STRPT_GRPA
expect_identical(as.character(as.mo("S. agalactiae", Lancefield = FALSE)), "B_STRPT_AGLC") expect_identical(as.character(as.mo("S. agalactiae", Lancefield = FALSE)), "B_STRPT_AGLC")
expect_identical(as.character(as.mo("S. agalactiae", Lancefield = TRUE)), "B_STRPT_GRPB") # group B expect_identical(as.character(as.mo("S. agalactiae", Lancefield = TRUE)), "B_STRPT_GRPB") # group B
expect_identical(as.character(suppressWarnings(as.mo("estreptococos grupo B"))), "B_STRPT_GRPB") expect_identical(as.character(suppressWarnings(as.mo("estreptococos grupo B"))), "B_STRPT_GRPB")
expect_identical(as.character(as.mo("S. equisimilis", Lancefield = FALSE)), "B_STRPT_DYSG_EQSM") expect_identical(as.character(as.mo("S. equi", Lancefield = FALSE)), "B_STRPT_EQUI")
expect_identical(as.character(as.mo("S. equisimilis", Lancefield = TRUE)), "B_STRPT_GRPC") # group C expect_identical(as.character(as.mo("S. equi", Lancefield = TRUE)), "B_STRPT_GRPC") # group C
# Enterococci must only be influenced if Lancefield = "all" # Enterococci must only be influenced if Lancefield = "all"
expect_identical(as.character(as.mo("E. faecium", Lancefield = FALSE)), "B_ENTRC_FACM") expect_identical(as.character(as.mo("E. faecium", Lancefield = FALSE)), "B_ENTRC_FACM")
expect_identical(as.character(as.mo("E. faecium", Lancefield = TRUE)), "B_ENTRC_FACM") expect_identical(as.character(as.mo("E. faecium", Lancefield = TRUE)), "B_ENTRC_FACM")
@ -213,19 +211,17 @@ expect_equal(
# check empty values # check empty values
expect_equal( expect_equal(
as.character(suppressWarnings(as.mo(""))), as.character(as.mo("")),
NA_character_ NA_character_
) )
# check less prevalent MOs # check less prevalent MOs
expect_equal(as.character(as.mo("Gomphosphaeria aponina delicatula")), "B_GMPHS_APNN_DLCT") expect_equal(as.character(as.mo("Actinosynnema pretiosum auranticum")), "B_ANNMA_PRTS_ARNT")
expect_equal(as.character(as.mo("Gomphosphaeria apo del")), "B_GMPHS_APNN_DLCT") expect_equal(as.character(as.mo("Actinosynnema preti aura")), "B_ANNMA_PRTS_ARNT")
expect_equal(as.character(as.mo("G apo deli")), "B_GMPHS_APNN_DLCT") expect_equal(as.character(as.mo("A pre aur")), "B_ANNMA_PRTS_ARNT")
expect_equal(as.character(as.mo("Gomphosphaeria aponina")), "B_GMPHS_APNN") expect_equal(as.character(as.mo("Actinosynnema pretiosum")), "B_ANNMA_PRTS")
expect_equal(as.character(as.mo("Gomphosphaeria species")), "B_GMPHS") expect_equal(as.character(as.mo("Actinosynnema")), "B_ANNMA")
expect_equal(as.character(as.mo("Gomphosphaeria")), "B_GMPHS") expect_equal(as.character(as.mo(" B_ANNMA_PRTS ")), "B_ANNMA_PRTS")
expect_equal(as.character(as.mo(" B_GMPHS_APNN ")), "B_GMPHS_APNN")
expect_equal(as.character(as.mo("g aponina")), "B_GMPHS_APNN")
# check old names # check old names
expect_equal(suppressMessages(as.character(as.mo("Escherichia blattae"))), "B_SHMWL_BLTT") expect_equal(suppressMessages(as.character(as.mo("Escherichia blattae"))), "B_SHMWL_BLTT")
@ -250,7 +246,7 @@ expect_error(as.mo("E. coli", reference_df = data.frame(mycol = "TestingOwnID"))
# combination of existing mo and other code # combination of existing mo and other code
expect_identical( expect_identical(
as.character(as.mo(c("B_ESCHR_COL", "ESCCOL"))), suppressWarnings(as.character(as.mo(c("B_ESCHR_COL", "ESCCOL")))),
c("B_ESCHR_COLI", "B_ESCHR_COLI") c("B_ESCHR_COLI", "B_ESCHR_COLI")
) )
@ -274,7 +270,7 @@ expect_equal(
c("B_MCRBC_PRXY", "B_STRPT_SUIS", "B_RLTLL_TRRG") c("B_MCRBC_PRXY", "B_STRPT_SUIS", "B_RLTLL_TRRG")
) )
expect_stdout(print(mo_uncertainties())) expect_stdout(print(mo_uncertainties()))
x <- as.mo("S. aur") x <- as.mo("Sta. aur")
# many hits # many hits
expect_stdout(print(mo_uncertainties())) expect_stdout(print(mo_uncertainties()))

View File

@ -4,9 +4,9 @@
\alias{as.mo} \alias{as.mo}
\alias{mo} \alias{mo}
\alias{is.mo} \alias{is.mo}
\alias{mo_failures}
\alias{mo_uncertainties} \alias{mo_uncertainties}
\alias{mo_renamed} \alias{mo_renamed}
\alias{mo_failures}
\alias{mo_reset_session} \alias{mo_reset_session}
\alias{mo_cleaning_regex} \alias{mo_cleaning_regex}
\title{Transform Input to a Microorganism Code} \title{Transform Input to a Microorganism Code}
@ -27,12 +27,12 @@ as.mo(
is.mo(x) is.mo(x)
mo_failures()
mo_uncertainties() mo_uncertainties()
mo_renamed() mo_renamed()
mo_failures()
mo_reset_session() mo_reset_session()
mo_cleaning_regex() mo_cleaning_regex()

View File

@ -3,9 +3,9 @@
\docType{data} \docType{data}
\name{microorganisms.codes} \name{microorganisms.codes}
\alias{microorganisms.codes} \alias{microorganisms.codes}
\title{Data Set with 5,508 Common Microorganism Codes} \title{Data Set with 5,411 Common Microorganism Codes}
\format{ \format{
A \link[tibble:tibble]{tibble} with 5,508 observations and 2 variables: A \link[tibble:tibble]{tibble} with 5,411 observations and 2 variables:
\itemize{ \itemize{
\item \code{code}\cr Commonly used code of a microorganism \item \code{code}\cr Commonly used code of a microorganism
\item \code{mo}\cr ID of the microorganism in the \link{microorganisms} data set \item \code{mo}\cr ID of the microorganism in the \link{microorganisms} data set

View File

@ -261,7 +261,7 @@ mo_property(
\item{keep_synonyms}{a \link{logical} to indicate if old, previously valid taxonomic names must be preserved and not be corrected to currently accepted names. The default is \code{FALSE}, which will return a note if old taxonomic names were processed. The default can be set with \code{options(AMR_keep_synonyms = TRUE)} or \code{options(AMR_keep_synonyms = FALSE)}.} \item{keep_synonyms}{a \link{logical} to indicate if old, previously valid taxonomic names must be preserved and not be corrected to currently accepted names. The default is \code{FALSE}, which will return a note if old taxonomic names were processed. The default can be set with \code{options(AMR_keep_synonyms = TRUE)} or \code{options(AMR_keep_synonyms = FALSE)}.}
\item{...}{other arguments passed on to \code{\link[=as.mo]{as.mo()}}, such as 'allow_uncertain' and 'ignore_pattern'} \item{...}{other arguments passed on to \code{\link[=as.mo]{as.mo()}}, such as 'minimum_matching_score', 'ignore_pattern', and 'remove_from_input'}
\item{ab}{any (vector of) text that can be coerced to a valid antibiotic code with \code{\link[=as.ab]{as.ab()}}} \item{ab}{any (vector of) text that can be coerced to a valid antibiotic code with \code{\link[=as.ab]{as.ab()}}}
@ -285,8 +285,8 @@ Use these functions to return a specific property of a microorganism based on th
All functions will, at default, keep old taxonomic properties. Please refer to this example, knowing that \emph{Escherichia blattae} was renamed to \emph{Shimwellia blattae} in 2010: All functions will, at default, keep old taxonomic properties. Please refer to this example, knowing that \emph{Escherichia blattae} was renamed to \emph{Shimwellia blattae} in 2010:
\itemize{ \itemize{
\item \code{mo_name("Escherichia blattae")} will return \code{"Shimwellia blattae"} (with a message about the renaming) \item \code{mo_name("Escherichia blattae")} will return \code{"Shimwellia blattae"} (with a message about the renaming)
\item \code{mo_ref("Escherichia blattae")} will return \code{"Burgess et al., 1973"} (with a message about the renaming) \item \code{mo_ref("Escherichia blattae", keep_synonyms = TRUE)} will return \code{"Burgess et al., 1973"} (with a warning about the renaming)
\item \code{mo_ref("Shimwellia blattae")} will return \code{"Priest et al., 2010"} (without a message) \item \code{mo_ref("Shimwellia blattae", keep_synonyms = FALSE)} will return \code{"Priest et al., 2010"} (without a message)
} }
The short name - \code{\link[=mo_shortname]{mo_shortname()}} - almost always returns the first character of the genus and the full species, like \code{"E. coli"}. Exceptions are abbreviations of staphylococci (such as \emph{"CoNS"}, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (such as \emph{"GBS"}, Group B Streptococci). Please bear in mind that e.g. \emph{E. coli} could mean \emph{Escherichia coli} (kingdom of Bacteria) as well as \emph{Entamoeba coli} (kingdom of Protozoa). Returning to the full name will be done using \code{\link[=as.mo]{as.mo()}} internally, giving priority to bacteria and human pathogens, i.e. \code{"E. coli"} will be considered \emph{Escherichia coli}. In other words, \code{mo_fullname(mo_shortname("Entamoeba coli"))} returns \code{"Escherichia coli"}. The short name - \code{\link[=mo_shortname]{mo_shortname()}} - almost always returns the first character of the genus and the full species, like \code{"E. coli"}. Exceptions are abbreviations of staphylococci (such as \emph{"CoNS"}, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (such as \emph{"GBS"}, Group B Streptococci). Please bear in mind that e.g. \emph{E. coli} could mean \emph{Escherichia coli} (kingdom of Bacteria) as well as \emph{Entamoeba coli} (kingdom of Protozoa). Returning to the full name will be done using \code{\link[=as.mo]{as.mo()}} internally, giving priority to bacteria and human pathogens, i.e. \code{"E. coli"} will be considered \emph{Escherichia coli}. In other words, \code{mo_fullname(mo_shortname("Entamoeba coli"))} returns \code{"Escherichia coli"}.