mirror of
https://github.com/msberends/AMR.git
synced 2025-01-15 20:41:38 +01:00
fix unit tests
This commit is contained in:
parent
082e52a0dd
commit
37f6db5ccd
29
.github/prehooks/pre-commit
vendored
29
.github/prehooks/pre-commit
vendored
@ -1,5 +1,34 @@
|
||||
#!/bin/sh
|
||||
|
||||
# ==================================================================== #
|
||||
# TITLE #
|
||||
# AMR: An R Package for Working with Antimicrobial Resistance Data #
|
||||
# #
|
||||
# SOURCE #
|
||||
# https://github.com/msberends/AMR #
|
||||
# #
|
||||
# CITE AS #
|
||||
# Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C #
|
||||
# (2022). AMR: An R Package for Working with Antimicrobial Resistance #
|
||||
# Data. Journal of Statistical Software, 104(3), 1-31. #
|
||||
# doi:10.18637/jss.v104.i03 #
|
||||
# #
|
||||
# Developed at the University of Groningen, the Netherlands, in #
|
||||
# collaboration with non-profit organisations Certe Medical #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# #
|
||||
# This R package is free software; you can freely use and distribute #
|
||||
# it for both personal and commercial purposes under the terms of the #
|
||||
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
|
||||
# the Free Software Foundation. #
|
||||
# We created this package for both routine data analysis and academic #
|
||||
# research and it was publicly released in the hope that it will be #
|
||||
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
|
||||
# #
|
||||
# Visit our website for the full manual and a complete tutorial about #
|
||||
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
|
||||
# ==================================================================== #
|
||||
|
||||
echo "Running pre-commit hook..."
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
@ -1,5 +1,5 @@
|
||||
Package: AMR
|
||||
Version: 1.8.2.9026
|
||||
Version: 1.8.2.9027
|
||||
Date: 2022-10-04
|
||||
Title: Antimicrobial Resistance Data Analysis
|
||||
Description: Functions to simplify and standardise antimicrobial resistance (AMR)
|
||||
|
2
NEWS.md
2
NEWS.md
@ -1,4 +1,4 @@
|
||||
# AMR 1.8.2.9026
|
||||
# AMR 1.8.2.9027
|
||||
|
||||
This version will eventually become v2.0! We're happy to reach a new major milestone soon!
|
||||
|
||||
|
@ -409,7 +409,7 @@ word_wrap <- function(...,
|
||||
msg <- paste0(c(...), collapse = "")
|
||||
|
||||
if (isTRUE(as_note)) {
|
||||
msg <- paste0(pkg_env$info_icon, " ", gsub("^note:? ?", "", msg, ignore.case = TRUE))
|
||||
msg <- paste0(AMR_env$info_icon, " ", gsub("^note:? ?", "", msg, ignore.case = TRUE))
|
||||
}
|
||||
|
||||
if (msg %like% "\n") {
|
||||
@ -742,14 +742,14 @@ meet_criteria <- function(object,
|
||||
|
||||
# if object is missing, or another error:
|
||||
tryCatch(invisible(object),
|
||||
error = function(e) pkg_env$meet_criteria_error_txt <- e$message
|
||||
error = function(e) AMR_env$meet_criteria_error_txt <- e$message
|
||||
)
|
||||
if (!is.null(pkg_env$meet_criteria_error_txt)) {
|
||||
error_txt <- pkg_env$meet_criteria_error_txt
|
||||
pkg_env$meet_criteria_error_txt <- NULL
|
||||
if (!is.null(AMR_env$meet_criteria_error_txt)) {
|
||||
error_txt <- AMR_env$meet_criteria_error_txt
|
||||
AMR_env$meet_criteria_error_txt <- NULL
|
||||
stop(error_txt, call. = FALSE) # don't use stop_() here, our pkg may not be loaded yet
|
||||
}
|
||||
pkg_env$meet_criteria_error_txt <- NULL
|
||||
AMR_env$meet_criteria_error_txt <- NULL
|
||||
|
||||
if (is.null(object)) {
|
||||
stop_if(allow_NULL == FALSE, "argument `", obj_name, "` must not be NULL", call = call_depth)
|
||||
@ -990,9 +990,9 @@ message_not_thrown_before <- function(fn, ..., entire_session = FALSE) {
|
||||
# this is to prevent that messages/notes will be printed for every dplyr group or more than once per session
|
||||
# e.g. this would show a msg 4 times: example_isolates %>% group_by(ward) %>% filter(mo_is_gram_negative())
|
||||
salt <- gsub("[^a-zA-Z0-9|_-]", "?", substr(paste(c(...), sep = "|", collapse = "|"), 1, 512), perl = TRUE)
|
||||
not_thrown_before <- is.null(pkg_env[[paste0("thrown_msg.", fn, ".", salt)]]) ||
|
||||
not_thrown_before <- is.null(AMR_env[[paste0("thrown_msg.", fn, ".", salt)]]) ||
|
||||
!identical(
|
||||
pkg_env[[paste0("thrown_msg.", fn, ".", salt)]],
|
||||
AMR_env[[paste0("thrown_msg.", fn, ".", salt)]],
|
||||
unique_call_id(
|
||||
entire_session = entire_session,
|
||||
match_fn = fn
|
||||
@ -1003,7 +1003,7 @@ message_not_thrown_before <- function(fn, ..., entire_session = FALSE) {
|
||||
assign(
|
||||
x = paste0("thrown_msg.", fn, ".", salt),
|
||||
value = unique_call_id(entire_session = entire_session, match_fn = fn),
|
||||
envir = pkg_env
|
||||
envir = AMR_env
|
||||
)
|
||||
}
|
||||
not_thrown_before
|
||||
@ -1354,11 +1354,11 @@ percentage <- function(x, digits = NULL, ...) {
|
||||
}
|
||||
|
||||
time_start_tracking <- function() {
|
||||
pkg_env$time_start <- round(as.double(Sys.time()) * 1000)
|
||||
AMR_env$time_start <- round(as.double(Sys.time()) * 1000)
|
||||
}
|
||||
|
||||
time_track <- function(name = NULL) {
|
||||
paste("(until now:", trimws(round(as.double(Sys.time()) * 1000) - pkg_env$time_start), "ms)")
|
||||
paste("(until now:", trimws(round(as.double(Sys.time()) * 1000) - AMR_env$time_start), "ms)")
|
||||
}
|
||||
|
||||
trimws2 <- function(..., whitespace = "[\u0009\u000A\u000B\u000C\u000D\u0020\u0085\u00A0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u200C\u200D\u2028\u2029\u202F\u205F\u2060\u3000\uFEFF]") {
|
||||
@ -1370,19 +1370,19 @@ trimws2 <- function(..., whitespace = "[\u0009\u000A\u000B\u000C\u000D\u0020\u00
|
||||
# Faster data.table implementations ----
|
||||
|
||||
match <- function(x, ...) {
|
||||
if (isTRUE(pkg_env$has_data.table) && is.character(x)) {
|
||||
if (isTRUE(AMR_env$has_data.table) && is.character(x)) {
|
||||
# data.table::chmatch() is 35% faster than base::match() for character
|
||||
getExportedValue(name = "chmatch", ns = asNamespace("data.table"))(x, ...)
|
||||
} else {
|
||||
base::match(x, ...)
|
||||
}
|
||||
}
|
||||
`%in%` <- function(x, ...) {
|
||||
if (isTRUE(pkg_env$has_data.table) && is.character(x)) {
|
||||
# data.table::`%chin%`() is 20% faster than base::`%in%`() for character
|
||||
getExportedValue(name = "%chin%", ns = asNamespace("data.table"))(x, ...)
|
||||
`%in%` <- function(x, table) {
|
||||
if (isTRUE(AMR_env$has_data.table) && is.character(x) && is.character(table)) {
|
||||
# data.table::`%chin%`() is 20-50% faster than base::`%in%`() for character
|
||||
getExportedValue(name = "%chin%", ns = asNamespace("data.table"))(x, table)
|
||||
} else {
|
||||
base::`%in%`(x, ...)
|
||||
base::`%in%`(x, table)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1178,7 +1178,7 @@ eucast_dosage <- function(ab, administration = "iv", version_breakpoints = 11.0)
|
||||
meet_criteria(administration, allow_class = "character", is_in = dosage$administration[!is.na(dosage$administration)], has_length = 1)
|
||||
meet_criteria(version_breakpoints, allow_class = c("numeric", "integer"), has_length = 1, is_in = as.double(names(EUCAST_VERSION_BREAKPOINTS)))
|
||||
|
||||
# show used version_breakpoints number once per session (pkg_env will reload every session)
|
||||
# show used version_breakpoints number once per session (AMR_env will reload every session)
|
||||
if (message_not_thrown_before("eucast_dosage", "v", gsub("[^0-9]", "", version_breakpoints), entire_session = TRUE)) {
|
||||
message_(
|
||||
"Dosages for antimicrobial drugs, as meant for ",
|
||||
|
@ -112,17 +112,17 @@ get_column_abx <- function(x,
|
||||
entire_session = FALSE,
|
||||
match_fn = fn
|
||||
),
|
||||
pkg_env$get_column_abx.call
|
||||
AMR_env$get_column_abx.call
|
||||
)) {
|
||||
# so within the same call, within the same environment, we got here again.
|
||||
# but we could've come from another function within the same call, so now only check the columns that changed
|
||||
|
||||
# first remove the columns that are not existing anymore
|
||||
previous <- pkg_env$get_column_abx.out
|
||||
previous <- AMR_env$get_column_abx.out
|
||||
current <- previous[previous %in% colnames(x)]
|
||||
|
||||
# then compare columns in current call with columns in original call
|
||||
new_cols <- colnames(x)[!colnames(x) %in% pkg_env$get_column_abx.checked_cols]
|
||||
new_cols <- colnames(x)[!colnames(x) %in% AMR_env$get_column_abx.checked_cols]
|
||||
if (length(new_cols) > 0) {
|
||||
# these columns did not exist in the last call, so add them
|
||||
new_cols_rsi <- get_column_abx(x[, new_cols, drop = FALSE], reuse_previous_result = FALSE, info = FALSE, sort = FALSE)
|
||||
@ -132,11 +132,11 @@ get_column_abx <- function(x,
|
||||
}
|
||||
|
||||
# update pkg environment to improve speed on next run
|
||||
pkg_env$get_column_abx.out <- current
|
||||
pkg_env$get_column_abx.checked_cols <- colnames(x)
|
||||
AMR_env$get_column_abx.out <- current
|
||||
AMR_env$get_column_abx.checked_cols <- colnames(x)
|
||||
|
||||
# and return right values
|
||||
return(pkg_env$get_column_abx.out)
|
||||
return(AMR_env$get_column_abx.out)
|
||||
}
|
||||
|
||||
meet_criteria(x, allow_class = "data.frame")
|
||||
@ -243,9 +243,9 @@ get_column_abx <- function(x,
|
||||
if (info == TRUE && all_okay == TRUE) {
|
||||
message_("No columns found.")
|
||||
}
|
||||
pkg_env$get_column_abx.call <- unique_call_id(entire_session = FALSE, match_fn = fn)
|
||||
pkg_env$get_column_abx.checked_cols <- colnames(x.bak)
|
||||
pkg_env$get_column_abx.out <- out
|
||||
AMR_env$get_column_abx.call <- unique_call_id(entire_session = FALSE, match_fn = fn)
|
||||
AMR_env$get_column_abx.checked_cols <- colnames(x.bak)
|
||||
AMR_env$get_column_abx.out <- out
|
||||
return(out)
|
||||
}
|
||||
|
||||
@ -320,9 +320,9 @@ get_column_abx <- function(x,
|
||||
}
|
||||
}
|
||||
|
||||
pkg_env$get_column_abx.call <- unique_call_id(entire_session = FALSE, match_fn = fn)
|
||||
pkg_env$get_column_abx.checked_cols <- colnames(x.bak)
|
||||
pkg_env$get_column_abx.out <- out
|
||||
AMR_env$get_column_abx.call <- unique_call_id(entire_session = FALSE, match_fn = fn)
|
||||
AMR_env$get_column_abx.checked_cols <- colnames(x.bak)
|
||||
AMR_env$get_column_abx.out <- out
|
||||
out
|
||||
}
|
||||
|
||||
|
4
R/mdro.R
4
R/mdro.R
@ -1955,14 +1955,14 @@ run_custom_mdro_guideline <- function(df, guideline, info) {
|
||||
for (i in seq_len(n_dots)) {
|
||||
qry <- tryCatch(eval(parse(text = guideline[[i]]$query), envir = df, enclos = parent.frame()),
|
||||
error = function(e) {
|
||||
pkg_env$err_msg <- e$message
|
||||
AMR_env$err_msg <- e$message
|
||||
return("error")
|
||||
}
|
||||
)
|
||||
if (identical(qry, "error")) {
|
||||
warning_("in `custom_mdro_guideline()`: rule ", i,
|
||||
" (`", as.character(guideline[[i]]$query), "`) was ignored because of this error message: ",
|
||||
pkg_env$err_msg,
|
||||
AMR_env$err_msg,
|
||||
call = FALSE,
|
||||
add_fn = font_red
|
||||
)
|
||||
|
318
R/mo.R
318
R/mo.R
@ -179,12 +179,14 @@ as.mo <- function(x,
|
||||
x <- replace_old_mo_codes(x, property = "mo")
|
||||
# ignore cases that match the ignore pattern
|
||||
x <- replace_ignore_pattern(x, ignore_pattern)
|
||||
|
||||
|
||||
x_lower <- tolower(x)
|
||||
|
||||
# WHONET: xxx = no growth
|
||||
x[tolower(x) %in% c("", "xxx", "na", "nan")] <- NA_character_
|
||||
x[x_lower %in% c("", "xxx", "na", "nan")] <- NA_character_
|
||||
|
||||
out <- rep(NA_character_, length(x))
|
||||
|
||||
|
||||
# below we use base R's match(), known for powering '%in%', and incredibly fast!
|
||||
|
||||
# From reference_df ----
|
||||
@ -195,9 +197,11 @@ as.mo <- function(x,
|
||||
# From MO code ----
|
||||
out[is.na(out) & x %in% MO_lookup$mo] <- x[is.na(out) & x %in% MO_lookup$mo]
|
||||
# From full name ----
|
||||
out[is.na(out) & x %in% MO_lookup$fullname] <- MO_lookup$mo[match(x[is.na(out) & x %in% MO_lookup$fullname], MO_lookup$fullname)]
|
||||
out[is.na(out) & x_lower %in% MO_lookup$fullname_lower] <- MO_lookup$mo[match(x_lower[is.na(out) & x_lower %in% MO_lookup$fullname_lower], MO_lookup$fullname_lower)]
|
||||
# one exception: "Fungi" matches the kingdom, but instead it should return the 'unknown' code for fungi
|
||||
out[out == "F_[KNG]_FUNGI"] <- "F_FUNGUS"
|
||||
# From known codes ----
|
||||
out[is.na(out) & x %in% AMR::microorganisms.codes$code] <- AMR::microorganisms.codes$mo[match(x[is.na(out) & x %in% AMR::microorganisms.codes$code], AMR::microorganisms.codes$code)]
|
||||
out[is.na(out) & toupper(x) %in% AMR::microorganisms.codes$code] <- AMR::microorganisms.codes$mo[match(toupper(x)[is.na(out) & toupper(x) %in% AMR::microorganisms.codes$code], AMR::microorganisms.codes$code)]
|
||||
# From SNOMED ----
|
||||
if (any(is.na(out) & !is.na(x)) && any(is.na(out) & x %in% unlist(microorganisms$snomed), na.rm = TRUE)) {
|
||||
# found this extremely fast gem here: https://stackoverflow.com/a/11002456/4575331
|
||||
@ -208,7 +212,7 @@ as.mo <- function(x,
|
||||
out[is.na(out)] <- convert_colloquial_input(x[is.na(out)])
|
||||
# From previous hits in this session ----
|
||||
old <- out
|
||||
out[is.na(out) & paste(x, minimum_matching_score) %in% pkg_env$mo_previously_coerced$x] <- pkg_env$mo_previously_coerced$mo[match(paste(x, minimum_matching_score)[is.na(out) & paste(x, minimum_matching_score) %in% pkg_env$mo_previously_coerced$x], pkg_env$mo_previously_coerced$x)]
|
||||
out[is.na(out) & paste(x, minimum_matching_score) %in% AMR_env$mo_previously_coerced$x] <- AMR_env$mo_previously_coerced$mo[match(paste(x, minimum_matching_score)[is.na(out) & paste(x, minimum_matching_score) %in% AMR_env$mo_previously_coerced$x], AMR_env$mo_previously_coerced$x)]
|
||||
new <- out
|
||||
if (isTRUE(info) && message_not_thrown_before("as.mo", old, new, entire_session = TRUE) && any(is.na(old) & !is.na(new), na.rm = TRUE)) {
|
||||
message_(
|
||||
@ -220,8 +224,8 @@ as.mo <- function(x,
|
||||
# For all other input ----
|
||||
if (any(is.na(out) & !is.na(x))) {
|
||||
# reset uncertainties
|
||||
pkg_env$mo_uncertainties <- pkg_env$mo_uncertainties[0, ]
|
||||
pkg_env$mo_failures <- NULL
|
||||
AMR_env$mo_uncertainties <- AMR_env$mo_uncertainties[0, ]
|
||||
AMR_env$mo_failures <- NULL
|
||||
|
||||
# Laboratory systems: remove (translated) entries like "no growth", "not E. coli", etc.
|
||||
x[trimws2(x) %like% translate_into_language("no .*growth", language = language)] <- NA_character_
|
||||
@ -238,8 +242,6 @@ as.mo <- function(x,
|
||||
x_coerced <- vapply(FUN.VALUE = character(1), x_unique, function(x_search) {
|
||||
progress$tick()
|
||||
|
||||
print(x_search)
|
||||
|
||||
# some required cleaning steps
|
||||
x_out <- trimws2(x_search)
|
||||
# this applies the `remove_from_input` argument, which defaults to mo_cleaning_regex()
|
||||
@ -248,7 +250,11 @@ as.mo <- function(x,
|
||||
x_search_cleaned <- x_out
|
||||
x_out <- tolower(x_out)
|
||||
|
||||
print(x_out)
|
||||
# input must not be too short
|
||||
if (nchar(x_out) < 3) {
|
||||
return("UNKNOWN")
|
||||
}
|
||||
|
||||
|
||||
# take out the parts, split by space
|
||||
x_parts <- strsplit(gsub("-", " ", x_out, fixed = TRUE), " ", fixed = TRUE)[[1]]
|
||||
@ -282,7 +288,7 @@ as.mo <- function(x,
|
||||
} else {
|
||||
mo_to_search <- MO_lookup$fullname[filtr]
|
||||
}
|
||||
pkg_env$mo_to_search <- mo_to_search
|
||||
AMR_env$mo_to_search <- mo_to_search
|
||||
# determine the matching score on the original search value
|
||||
m <- mo_matching_score(x = x_search_cleaned, n = mo_to_search)
|
||||
if (is.null(minimum_matching_score)) {
|
||||
@ -302,20 +308,21 @@ as.mo <- function(x,
|
||||
result_mo <- NA_character_
|
||||
} else {
|
||||
result_mo <- MO_lookup$mo[match(top_hits[1], MO_lookup$fullname)]
|
||||
pkg_env$mo_uncertainties <- rbind(pkg_env$mo_uncertainties,
|
||||
AMR_env$mo_uncertainties <- rbind(AMR_env$mo_uncertainties,
|
||||
data.frame(
|
||||
minimum_matching_score = ifelse(is.null(minimum_matching_score), "NULL", minimum_matching_score),
|
||||
original_input = x_search,
|
||||
input = x_search_cleaned,
|
||||
fullname = top_hits[1],
|
||||
mo = result_mo,
|
||||
candidates = ifelse(length(top_hits) > 1, paste(top_hits[2:min(26, length(top_hits))], collapse = ", "), ""),
|
||||
minimum_matching_score = ifelse(is.null(minimum_matching_score), "NULL", minimum_matching_score),
|
||||
keep_synonyms = keep_synonyms,
|
||||
stringsAsFactors = FALSE
|
||||
),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
# save to package env to save time for next time
|
||||
pkg_env$mo_previously_coerced <- unique(rbind(pkg_env$mo_previously_coerced,
|
||||
AMR_env$mo_previously_coerced <- unique(rbind(AMR_env$mo_previously_coerced,
|
||||
data.frame(
|
||||
x = paste(x_search, minimum_matching_score),
|
||||
mo = result_mo,
|
||||
@ -334,21 +341,21 @@ as.mo <- function(x,
|
||||
out[is.na(out)] <- x_coerced[match(x[is.na(out)], x_unique)]
|
||||
|
||||
# Throw note about uncertainties ----
|
||||
if (isTRUE(info) && NROW(pkg_env$mo_uncertainties) > 0) {
|
||||
if (message_not_thrown_before("as.mo", "uncertainties", pkg_env$mo_uncertainties$original_input)) {
|
||||
if (isTRUE(info) && NROW(AMR_env$mo_uncertainties) > 0) {
|
||||
if (message_not_thrown_before("as.mo", "uncertainties", AMR_env$mo_uncertainties$original_input)) {
|
||||
plural <- c("", "this")
|
||||
if (length(pkg_env$mo_uncertainties$original_input) > 1) {
|
||||
if (length(AMR_env$mo_uncertainties$original_input) > 1) {
|
||||
plural <- c("s", "these uncertainties")
|
||||
}
|
||||
if (length(pkg_env$mo_uncertainties$original_input) <= 3) {
|
||||
if (length(AMR_env$mo_uncertainties$original_input) <= 3) {
|
||||
examples <- vector_and(paste0(
|
||||
'"', pkg_env$mo_uncertainties$original_input,
|
||||
'" (assumed ', font_italic(pkg_env$mo_uncertainties$fullname, collapse = NULL), ")"
|
||||
'"', AMR_env$mo_uncertainties$original_input,
|
||||
'" (assumed ', font_italic(AMR_env$mo_uncertainties$fullname, collapse = NULL), ")"
|
||||
),
|
||||
quotes = FALSE
|
||||
)
|
||||
} else {
|
||||
examples <- paste0(nr2char(length(pkg_env$mo_uncertainties$original_input)), " microorganism", plural[1])
|
||||
examples <- paste0(nr2char(length(AMR_env$mo_uncertainties$original_input)), " microorganism", plural[1])
|
||||
}
|
||||
msg <- paste0(
|
||||
"Microorganism translation was uncertain for ", examples,
|
||||
@ -364,18 +371,18 @@ as.mo <- function(x,
|
||||
gbif_matches[!gbif_matches %in% AMR::microorganisms$gbif] <- NA
|
||||
lpsn_matches <- AMR::microorganisms$lpsn_renamed_to[match(out, AMR::microorganisms$mo)]
|
||||
lpsn_matches[!lpsn_matches %in% AMR::microorganisms$lpsn] <- NA
|
||||
pkg_env$mo_renamed <- list(old = out[!is.na(gbif_matches) | !is.na(lpsn_matches)],
|
||||
AMR_env$mo_renamed <- list(old = out[!is.na(gbif_matches) | !is.na(lpsn_matches)],
|
||||
gbif_matches = gbif_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)],
|
||||
lpsn_matches = lpsn_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)])
|
||||
if (isFALSE(keep_synonyms)) {
|
||||
out[which(!is.na(gbif_matches))] <- AMR::microorganisms$mo[match(gbif_matches[which(!is.na(gbif_matches))], AMR::microorganisms$gbif)]
|
||||
out[which(!is.na(lpsn_matches))] <- AMR::microorganisms$mo[match(lpsn_matches[which(!is.na(lpsn_matches))], AMR::microorganisms$lpsn)]
|
||||
if (isTRUE(info) && length(pkg_env$mo_renamed$old) > 0) {
|
||||
if (isTRUE(info) && length(AMR_env$mo_renamed$old) > 0) {
|
||||
print(mo_renamed(), extra_txt = " (use `keep_synonyms = TRUE` to leave uncorrected)")
|
||||
}
|
||||
} else if (is.null(getOption("AMR_keep_synonyms")) && length(pkg_env$mo_renamed$old) > 0 && message_not_thrown_before("as.mo", "keep_synonyms_warning", entire_session = TRUE)) {
|
||||
} else if (is.null(getOption("AMR_keep_synonyms")) && length(AMR_env$mo_renamed$old) > 0 && message_not_thrown_before("as.mo", "keep_synonyms_warning", entire_session = TRUE)) {
|
||||
# keep synonyms is TRUE, so check if any do have synonyms
|
||||
warning_("Function `as.mo()` returned some old taxonomic names. Use `as.mo(..., keep_synonyms = FALSE)` to clean the input to currently accepted taxonomic names, or set the R option `AMR_keep_synonyms` to `FALSE`. This warning will be shown once per session.")
|
||||
warning_("Function `as.mo()` returned ", nr2char(length(unique(AMR_env$mo_renamed$old))), " old taxonomic name", ifelse(length(unique(AMR_env$mo_renamed$old)) > 1, "s", ""), ". Use `as.mo(..., keep_synonyms = FALSE)` to clean the input to currently accepted taxonomic names, or set the R option `AMR_keep_synonyms` to `FALSE`. This warning will be shown once per session.")
|
||||
}
|
||||
|
||||
# Apply Becker ----
|
||||
@ -432,7 +439,10 @@ as.mo <- function(x,
|
||||
|
||||
# All unknowns ----
|
||||
out[is.na(out) & !is.na(x)] <- "UNKNOWN"
|
||||
pkg_env$mo_failures <- unique(x[out == "UNKNOWN" & x != "UNKNOWN" & !is.na(x)])
|
||||
AMR_env$mo_failures <- unique(x[out == "UNKNOWN" & x != "UNKNOWN" & !is.na(x)])
|
||||
if (length(AMR_env$mo_failures) > 0) {
|
||||
warning_("The following input could not be coerced and was returned as \"UNKNOWN\": ", vector_and(AMR_env$mo_failures, quotes = TRUE), ".\nYou can retrieve this list with `mo_failures()`.")
|
||||
}
|
||||
|
||||
# Return class ----
|
||||
set_clean_class(out,
|
||||
@ -440,12 +450,73 @@ as.mo <- function(x,
|
||||
)
|
||||
}
|
||||
|
||||
# OTHER DOCUMENTED FUNCTIONS ----------------------------------------------
|
||||
|
||||
#' @rdname as.mo
|
||||
#' @export
|
||||
is.mo <- function(x) {
|
||||
inherits(x, "mo")
|
||||
}
|
||||
|
||||
#' @rdname as.mo
|
||||
#' @export
|
||||
mo_uncertainties <- function() {
|
||||
set_clean_class(AMR_env$mo_uncertainties, new_class = c("mo_uncertainties", "data.frame"))
|
||||
}
|
||||
|
||||
#' @rdname as.mo
|
||||
#' @export
|
||||
mo_renamed <- function() {
|
||||
x <- AMR_env$mo_renamed
|
||||
|
||||
x$new <- synonym_mo_to_accepted_mo(x$old)
|
||||
mo_old <- AMR::microorganisms$fullname[match(x$old, AMR::microorganisms$mo)]
|
||||
mo_new <- AMR::microorganisms$fullname[match(x$new, AMR::microorganisms$mo)]
|
||||
ref_old <- AMR::microorganisms$ref[match(x$old, AMR::microorganisms$mo)]
|
||||
ref_new <- AMR::microorganisms$ref[match(x$new, AMR::microorganisms$mo)]
|
||||
|
||||
df_renamed <- data.frame(old = mo_old,
|
||||
new = mo_new,
|
||||
ref_old = ref_old,
|
||||
ref_new = ref_new,
|
||||
stringsAsFactors = FALSE)
|
||||
df_renamed <- unique(df_renamed)
|
||||
df_renamed <- df_renamed[order(df_renamed$old), , drop = FALSE]
|
||||
set_clean_class(df_renamed, new_class = c("mo_renamed", "data.frame"))
|
||||
}
|
||||
|
||||
#' @rdname as.mo
|
||||
#' @export
|
||||
mo_failures <- function() {
|
||||
AMR_env$mo_failures
|
||||
}
|
||||
|
||||
#' @rdname as.mo
|
||||
#' @export
|
||||
mo_reset_session <- function() {
|
||||
if (NROW(AMR_env$mo_previously_coerced) > 0) {
|
||||
message_("Reset ", nr2char(NROW(AMR_env$mo_previously_coerced)), " previously matched input value", ifelse(NROW(AMR_env$mo_previously_coerced) > 1, "s", ""), ".")
|
||||
AMR_env$mo_previously_coerced <- AMR_env$mo_previously_coerced[0, , drop = FALSE]
|
||||
AMR_env$mo_uncertainties <- AMR_env$mo_uncertainties[0, , drop = FALSE]
|
||||
} else {
|
||||
message_("No previously matched input values to reset.")
|
||||
}
|
||||
}
|
||||
|
||||
#' @rdname as.mo
|
||||
#' @export
|
||||
mo_cleaning_regex <- function() {
|
||||
paste0(
|
||||
"(",
|
||||
"[^A-Za-z- \\(\\)\\[\\]{}]+",
|
||||
"|",
|
||||
"([({]|\\[).+([})]|\\])",
|
||||
"|",
|
||||
"(^| )(e?spp|e?ssp|e?ss|e?sp|e?subsp|sube?species|biovar|biotype|serovar|e?species)[.]*( |$))")
|
||||
}
|
||||
|
||||
# UNDOCUMENTED METHODS ----------------------------------------------------
|
||||
|
||||
# will be exported using s3_register() in R/zzz.R
|
||||
pillar_shaft.mo <- function(x, ...) {
|
||||
out <- format(x)
|
||||
@ -675,18 +746,6 @@ rep.mo <- function(x, ...) {
|
||||
y
|
||||
}
|
||||
|
||||
#' @rdname as.mo
|
||||
#' @export
|
||||
mo_failures <- function() {
|
||||
pkg_env$mo_failures
|
||||
}
|
||||
|
||||
#' @rdname as.mo
|
||||
#' @export
|
||||
mo_uncertainties <- function() {
|
||||
set_clean_class(pkg_env$mo_uncertainties, new_class = c("mo_uncertainties", "data.frame"))
|
||||
}
|
||||
|
||||
#' @method print mo_uncertainties
|
||||
#' @export
|
||||
#' @noRd
|
||||
@ -768,7 +827,13 @@ print.mo_uncertainties <- function(x, ...) {
|
||||
),
|
||||
collapse = "\n"
|
||||
),
|
||||
# Add "Based on {input}" text if it differs from the original input
|
||||
ifelse(x[i, ]$original_input != x[i, ]$input, paste0(strrep(" ", nchar(x[i, ]$original_input) + 6), "Based on input \"", x[i, ]$input, "\""), ""),
|
||||
# Add note if result was coerced to accepted taxonomic name
|
||||
ifelse(x[i, ]$keep_synonyms == FALSE & x[i, ]$mo %in% AMR::microorganisms$mo[which(AMR::microorganisms$status == "synonym")],
|
||||
paste0(strrep(" ", nchar(x[i, ]$original_input) + 6),
|
||||
font_red(paste0("This old taxonomic name was converted to ", font_italic(AMR::microorganisms$fullname[match(synonym_mo_to_accepted_mo(x[i, ]$mo), AMR::microorganisms$mo)], collapse = NULL), " (", synonym_mo_to_accepted_mo(x[i, ]$mo), ")."), collapse = NULL)),
|
||||
""),
|
||||
candidates,
|
||||
sep = "\n"
|
||||
)
|
||||
@ -777,30 +842,6 @@ print.mo_uncertainties <- function(x, ...) {
|
||||
cat(txt)
|
||||
}
|
||||
|
||||
|
||||
#' @rdname as.mo
|
||||
#' @export
|
||||
mo_renamed <- function() {
|
||||
x <- pkg_env$mo_renamed
|
||||
|
||||
x$new <- ifelse(is.na(x$lpsn_matches),
|
||||
AMR::microorganisms$mo[match(x$gbif_matches, AMR::microorganisms$gbif)],
|
||||
AMR::microorganisms$mo[match(x$lpsn_matches, AMR::microorganisms$lpsn)])
|
||||
mo_old <- AMR::microorganisms$fullname[match(x$old, AMR::microorganisms$mo)]
|
||||
mo_new <- AMR::microorganisms$fullname[match(x$new, AMR::microorganisms$mo)]
|
||||
ref_old <- AMR::microorganisms$ref[match(x$old, AMR::microorganisms$mo)]
|
||||
ref_new <- AMR::microorganisms$ref[match(x$new, AMR::microorganisms$mo)]
|
||||
|
||||
df_renamed <- data.frame(old = mo_old,
|
||||
new = mo_new,
|
||||
ref_old = ref_old,
|
||||
ref_new = ref_new,
|
||||
stringsAsFactors = FALSE)
|
||||
df_renamed <- unique(df_renamed)
|
||||
df_renamed <- df_renamed[order(df_renamed$old), , drop = FALSE]
|
||||
set_clean_class(df_renamed, new_class = c("mo_renamed", "data.frame"))
|
||||
}
|
||||
|
||||
#' @method print mo_renamed
|
||||
#' @export
|
||||
#' @noRd
|
||||
@ -812,6 +853,8 @@ print.mo_renamed <- function(x, extra_txt = "", n = 25, ...) {
|
||||
|
||||
x$ref_old[!is.na(x$ref_old)] <- paste0(" (", gsub("et al.", font_italic("et al."), x$ref_old[!is.na(x$ref_old)], fixed = TRUE), ")")
|
||||
x$ref_new[!is.na(x$ref_new)] <- paste0(" (", gsub("et al.", font_italic("et al."), x$ref_new[!is.na(x$ref_new)], fixed = TRUE), ")")
|
||||
x$ref_old[is.na(x$ref_old)] <- " (author unknown)"
|
||||
x$ref_new[is.na(x$ref_new)] <- " (author unknown)"
|
||||
|
||||
rows <- seq_len(min(NROW(x), n))
|
||||
|
||||
@ -825,28 +868,57 @@ print.mo_renamed <- function(x, extra_txt = "", n = 25, ...) {
|
||||
)
|
||||
}
|
||||
|
||||
#' @rdname as.mo
|
||||
#' @export
|
||||
mo_reset_session <- function() {
|
||||
if (NROW(pkg_env$mo_previously_coerced) > 0) {
|
||||
message_("Reset ", NROW(pkg_env$mo_previously_coerced), " previously matched input values.")
|
||||
pkg_env$mo_previously_coerced <- pkg_env$mo_previously_coerced[0, , drop = FALSE]
|
||||
pkg_env$mo_uncertainties <- pkg_env$mo_uncertainties[0, , drop = FALSE]
|
||||
} else {
|
||||
message_("No previously matched input values to reset.")
|
||||
}
|
||||
}
|
||||
# UNDOCUMENTED HELPER FUNCTIONS -------------------------------------------
|
||||
|
||||
#' @rdname as.mo
|
||||
#' @export
|
||||
mo_cleaning_regex <- function() {
|
||||
paste0(
|
||||
"(",
|
||||
"[^A-Za-z- \\(\\)\\[\\]{}]+",
|
||||
"|",
|
||||
"([({]|\\[).+([})]|\\])",
|
||||
"|",
|
||||
"(^| )(e?spp|e?ssp|e?ss|e?sp|e?subsp|sube?species|biovar|biotype|serovar|e?species)( |$))")
|
||||
convert_colloquial_input <- function(x) {
|
||||
x.bak <- trimws2(x)
|
||||
x <- trimws2(tolower(x))
|
||||
out <- rep(NA_character_, length(x))
|
||||
|
||||
# Streptococci, like GBS = Group B Streptococci (B_STRPT_GRPB)
|
||||
out[x %like_case% "^g[abcdfghkl]s$"] <- gsub("g([abcdfghkl])s",
|
||||
"B_STRPT_GRP\\U\\1",
|
||||
x[x %like_case% "^g[abcdfghkl]s$"],
|
||||
perl = TRUE)
|
||||
# Streptococci in different languages, like "estreptococos grupo B"
|
||||
out[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"] <- gsub(".*e?strepto[ck]o[ck].* ([abcdfghkl])$",
|
||||
"B_STRPT_GRP\\U\\1",
|
||||
x[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"],
|
||||
perl = TRUE)
|
||||
out[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"] <- gsub(".*group ([abcdfghkl]) strepto[ck]o[ck].*",
|
||||
"B_STRPT_GRP\\U\\1",
|
||||
x[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"],
|
||||
perl = TRUE)
|
||||
out[x %like_case% "ha?emoly.*strep"] <- "B_STRPT_HAEM"
|
||||
out[x %like_case% "(strepto.* mil+er+i|^mgs[^a-z]*$)"] <- "B_STRPT_MILL"
|
||||
out[x %like_case% "mil+er+i gr"] <- "B_STRPT_MILL"
|
||||
out[x %like_case% "((strepto|^s).* viridans|^vgs[^a-z]*$)"] <- "B_STRPT_VIRI"
|
||||
|
||||
# CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese)
|
||||
out[x %like_case% "([ck]oagulas[ea].negatie?[vf]|^[ck]o?ns[^a-z]*$)"] <- "B_STPHY_CONS"
|
||||
out[x %like_case% "([ck]oagulas[ea].positie?[vf]|^[ck]o?ps[^a-z]*$)"] <- "B_STPHY_COPS"
|
||||
|
||||
# Gram stains
|
||||
out[x %like_case% "gram[ -]?neg.*|negatie?[vf]"] <- "B_GRAMN"
|
||||
out[x %like_case% "gram[ -]?pos.*|positie?[vf]"] <- "B_GRAMP"
|
||||
|
||||
# yeasts and fungi
|
||||
out[x %like_case% "^yeast?"] <- "F_YEAST"
|
||||
out[x %like_case% "^fung(us|i)"] <- "F_FUNGUS"
|
||||
|
||||
# Salmonella city names, starting with capital species name - they are all S. enterica
|
||||
out[x.bak %like_case% "[sS]almonella [A-Z][a-z]+ ?.*" & x %unlike% "typhi"] <- "B_SLMNL_ENTR"
|
||||
out[x %like_case% "salmonella group"] <- "B_SLMNL"
|
||||
|
||||
# trivial names known to the field
|
||||
out[x %like_case% "meningo[ck]o[ck]"] <- "B_NESSR_MNNG"
|
||||
out[x %like_case% "gono[ck]o[ck]"] <- "B_NESSR_GNRR"
|
||||
out[x %like_case% "pneumo[ck]o[ck]"] <- "B_STRPT_PNMN"
|
||||
|
||||
# unexisting names (xxx and con are WHONET codes)
|
||||
out[x %in% c("con", "other", "none", "unknown") | x %like_case% "virus"] <- "UNKNOWN"
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
nr2char <- function(x) {
|
||||
@ -861,17 +933,6 @@ nr2char <- function(x) {
|
||||
}
|
||||
}
|
||||
|
||||
get_mo_uncertainties <- function() {
|
||||
remember <- list(uncertainties = pkg_env$mo_uncertainties)
|
||||
# empty them, otherwise e.g. mo_shortname("Chlamydophila psittaci") will give 3 notes
|
||||
pkg_env$mo_uncertainties <- NULL
|
||||
remember
|
||||
}
|
||||
|
||||
load_mo_uncertainties <- function(metadata) {
|
||||
pkg_env$mo_uncertainties <- metadata$uncertainties
|
||||
}
|
||||
|
||||
parse_and_convert <- function(x) {
|
||||
if (tryCatch(is.character(x) && all(Encoding(x) == "unknown", na.rm = TRUE), error = function(e) FALSE)) {
|
||||
return(trimws2(x))
|
||||
@ -1008,51 +1069,24 @@ repair_reference_df <- function(reference_df) {
|
||||
reference_df
|
||||
}
|
||||
|
||||
convert_colloquial_input <- function(x) {
|
||||
x.bak <- trimws2(x)
|
||||
x <- trimws2(tolower(x))
|
||||
out <- rep(NA_character_, length(x))
|
||||
|
||||
# Streptococci, like GBS = Group B Streptococci (B_STRPT_GRPB)
|
||||
out[x %like_case% "^g[abcdfghkl]s$"] <- gsub("g([abcdfghkl])s",
|
||||
"B_STRPT_GRP\\U\\1",
|
||||
x[x %like_case% "^g[abcdfghkl]s$"],
|
||||
perl = TRUE)
|
||||
# Streptococci in different languages, like "estreptococos grupo B"
|
||||
out[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"] <- gsub(".*e?strepto[ck]o[ck].* ([abcdfghkl])$",
|
||||
"B_STRPT_GRP\\U\\1",
|
||||
x[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"],
|
||||
perl = TRUE)
|
||||
out[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"] <- gsub(".*group ([abcdfghkl]) strepto[ck]o[ck].*",
|
||||
"B_STRPT_GRP\\U\\1",
|
||||
x[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"],
|
||||
perl = TRUE)
|
||||
out[x %like_case% "ha?emoly.*strep"] <- "B_STRPT_HAEM"
|
||||
out[x %like_case% "(strepto.* mil+er+i|^mgs[^a-z]*$)"] <- "B_STRPT_MILL"
|
||||
out[x %like_case% "((strepto|^s).* viridans|^vgs[^a-z]*$)"] <- "B_STRPT_VIRI"
|
||||
|
||||
# CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese)
|
||||
out[x %like_case% "([ck]oagulas[ea].negatie?[vf]|^[ck]o?ns[^a-z]*$)"] <- "B_STPHY_CONS"
|
||||
out[x %like_case% "([ck]oagulas[ea].positie?[vf]|^[ck]o?ps[^a-z]*$)"] <- "B_STPHY_COPS"
|
||||
|
||||
# Gram stains
|
||||
out[x %like_case% "gram[ -]?neg.*|negatie?[vf]"] <- "B_GRAMN"
|
||||
out[x %like_case% "gram[ -]?pos.*|positie?[vf]"] <- "B_GRAMP"
|
||||
|
||||
# yeasts and fungi
|
||||
out[x %like_case% "^yeast?"] <- "F_YEAST"
|
||||
out[x %like_case% "^fung(us|i)"] <- "F_FUNGUS"
|
||||
|
||||
# Salmonella city names, starting with capital species name - they are all S. enterica
|
||||
out[x.bak %like_case% "[sS]almonella [A-Z][a-z]+ ?.*" & x %unlike% "typhi"] <- "B_SLMNL_ENTR"
|
||||
|
||||
# trivial names known to the field
|
||||
out[x %like_case% "meningo[ck]o[ck]"] <- "B_NESSR_MNNG"
|
||||
out[x %like_case% "gono[ck]o[ck]"] <- "B_NESSR_GNRR"
|
||||
out[x %like_case% "pneumo[ck]o[ck]"] <- "B_STRPT_PNMN"
|
||||
|
||||
# unexisting names (xxx and con are WHONET codes)
|
||||
out[x %in% c("con", "other", "none", "unknown") | x %like_case% "virus"] <- "UNKNOWN"
|
||||
|
||||
out
|
||||
get_mo_uncertainties <- function() {
|
||||
remember <- list(uncertainties = AMR_env$mo_uncertainties)
|
||||
# empty them, otherwise e.g. mo_shortname("Chlamydophila psittaci") will give 3 notes
|
||||
AMR_env$mo_uncertainties <- NULL
|
||||
remember
|
||||
}
|
||||
|
||||
load_mo_uncertainties <- function(metadata) {
|
||||
AMR_env$mo_uncertainties <- metadata$uncertainties
|
||||
}
|
||||
|
||||
synonym_mo_to_accepted_mo <- function(x) {
|
||||
x_gbif <- AMR::microorganisms$gbif_renamed_to[match(x, AMR::microorganisms$mo)]
|
||||
x_lpsn <- AMR::microorganisms$lpsn_renamed_to[match(x, AMR::microorganisms$mo)]
|
||||
x_gbif[!x_gbif %in% AMR::microorganisms$gbif] <- NA
|
||||
x_lpsn[!x_lpsn %in% AMR::microorganisms$lpsn] <- NA
|
||||
|
||||
ifelse(is.na(x_lpsn),
|
||||
AMR::microorganisms$mo[match(x_gbif, AMR::microorganisms$gbif)],
|
||||
AMR::microorganisms$mo[match(x_lpsn, AMR::microorganisms$lpsn)])
|
||||
}
|
||||
|
@ -79,7 +79,10 @@ mo_matching_score <- function(x, n) {
|
||||
|
||||
# only keep one space
|
||||
x <- gsub(" +", " ", x)
|
||||
|
||||
|
||||
# start with a capital letter
|
||||
substr(x, 1, 1) <- toupper(substr(x, 1, 1))
|
||||
|
||||
# n is always a taxonomically valid full name
|
||||
if (length(n) == 1) {
|
||||
n <- rep(n, length(x))
|
||||
|
@ -33,13 +33,13 @@
|
||||
#' @param x any [character] (vector) that can be coerced to a valid microorganism code with [as.mo()]. Can be left blank for auto-guessing the column containing microorganism codes if used in a data set, see *Examples*.
|
||||
#' @param property one of the column names of the [microorganisms] data set: `r vector_or(colnames(microorganisms), sort = FALSE, quotes = TRUE)`, or must be `"shortname"`
|
||||
#' @inheritParams as.mo
|
||||
#' @param ... other arguments passed on to [as.mo()], such as 'allow_uncertain' and 'ignore_pattern'
|
||||
#' @param ... other arguments passed on to [as.mo()], such as 'minimum_matching_score', 'ignore_pattern', and 'remove_from_input'
|
||||
#' @param ab any (vector of) text that can be coerced to a valid antibiotic code with [as.ab()]
|
||||
#' @param open browse the URL using [`browseURL()`][utils::browseURL()]
|
||||
#' @details All functions will, at default, keep old taxonomic properties. Please refer to this example, knowing that *Escherichia blattae* was renamed to *Shimwellia blattae* in 2010:
|
||||
#' - `mo_name("Escherichia blattae")` will return `"Shimwellia blattae"` (with a message about the renaming)
|
||||
#' - `mo_ref("Escherichia blattae")` will return `"Burgess et al., 1973"` (with a message about the renaming)
|
||||
#' - `mo_ref("Shimwellia blattae")` will return `"Priest et al., 2010"` (without a message)
|
||||
#' - `mo_ref("Escherichia blattae", keep_synonyms = TRUE)` will return `"Burgess et al., 1973"` (with a warning about the renaming)
|
||||
#' - `mo_ref("Shimwellia blattae", keep_synonyms = FALSE)` will return `"Priest et al., 2010"` (without a message)
|
||||
#'
|
||||
#' The short name - [mo_shortname()] - almost always returns the first character of the genus and the full species, like `"E. coli"`. Exceptions are abbreviations of staphylococci (such as *"CoNS"*, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (such as *"GBS"*, Group B Streptococci). Please bear in mind that e.g. *E. coli* could mean *Escherichia coli* (kingdom of Bacteria) as well as *Entamoeba coli* (kingdom of Protozoa). Returning to the full name will be done using [as.mo()] internally, giving priority to bacteria and human pathogens, i.e. `"E. coli"` will be considered *Escherichia coli*. In other words, `mo_fullname(mo_shortname("Entamoeba coli"))` returns `"Escherichia coli"`.
|
||||
#'
|
||||
@ -504,7 +504,7 @@ mo_is_intrinsic_resistant <- function(x, ab, language = get_AMR_locale(), keep_s
|
||||
stop_("length of `x` and `ab` must be equal, or one of them must be of length 1.")
|
||||
}
|
||||
|
||||
# show used version number once per session (pkg_env will reload every session)
|
||||
# show used version number once per session (AMR_env will reload every session)
|
||||
if (message_not_thrown_before("mo_is_intrinsic_resistant", "version.mo", entire_session = TRUE)) {
|
||||
message_(
|
||||
"Determining intrinsic resistance based on ",
|
||||
|
@ -134,7 +134,7 @@ set_mo_source <- function(path, destination = getOption("AMR_mo_source", "~/mo_s
|
||||
stop_ifnot(interactive(), "this function can only be used in interactive mode, since it must ask for the user's permission to write a file to their file system.")
|
||||
|
||||
if (is.null(path) || path %in% c(FALSE, "")) {
|
||||
pkg_env$mo_source <- NULL
|
||||
AMR_env$mo_source <- NULL
|
||||
if (file.exists(mo_source_destination)) {
|
||||
unlink(mo_source_destination)
|
||||
message_("Removed mo_source file '", font_bold(mo_source_destination), "'",
|
||||
@ -227,7 +227,7 @@ set_mo_source <- function(path, destination = getOption("AMR_mo_source", "~/mo_s
|
||||
attr(df, "mo_source_destination") <- mo_source_destination
|
||||
attr(df, "mo_source_timestamp") <- file.mtime(path)
|
||||
saveRDS(df, mo_source_destination)
|
||||
pkg_env$mo_source <- df
|
||||
AMR_env$mo_source <- df
|
||||
message_(
|
||||
action, " mo_source file '", font_bold(mo_source_destination),
|
||||
"' (", formatted_filesize(mo_source_destination),
|
||||
@ -247,24 +247,24 @@ get_mo_source <- function(destination = getOption("AMR_mo_source", "~/mo_source.
|
||||
}
|
||||
return(NULL)
|
||||
}
|
||||
if (is.null(pkg_env$mo_source)) {
|
||||
pkg_env$mo_source <- readRDS(path.expand(destination))
|
||||
if (is.null(AMR_env$mo_source)) {
|
||||
AMR_env$mo_source <- readRDS(path.expand(destination))
|
||||
}
|
||||
|
||||
old_time <- attributes(pkg_env$mo_source)$mo_source_timestamp
|
||||
new_time <- file.mtime(attributes(pkg_env$mo_source)$mo_source_location)
|
||||
old_time <- attributes(AMR_env$mo_source)$mo_source_timestamp
|
||||
new_time <- file.mtime(attributes(AMR_env$mo_source)$mo_source_location)
|
||||
if (interactive() && !identical(old_time, new_time)) {
|
||||
# source file was updated, also update reference
|
||||
set_mo_source(attributes(pkg_env$mo_source)$mo_source_location)
|
||||
set_mo_source(attributes(AMR_env$mo_source)$mo_source_location)
|
||||
}
|
||||
pkg_env$mo_source
|
||||
AMR_env$mo_source
|
||||
}
|
||||
|
||||
check_validity_mo_source <- function(x, refer_to_name = "`reference_df`", stop_on_error = TRUE) {
|
||||
if (paste(deparse(substitute(x)), collapse = "") == "get_mo_source()") {
|
||||
return(TRUE)
|
||||
}
|
||||
if (is.null(pkg_env$mo_source) && (identical(x, get_mo_source()))) {
|
||||
if (is.null(AMR_env$mo_source) && (identical(x, get_mo_source()))) {
|
||||
return(TRUE)
|
||||
}
|
||||
if (is.null(x)) {
|
||||
|
10
R/rsi.R
10
R/rsi.R
@ -906,8 +906,8 @@ as_rsi_method <- function(method_short,
|
||||
}
|
||||
|
||||
# write to verbose output
|
||||
pkg_env$rsi_interpretation_history <- rbind(
|
||||
pkg_env$rsi_interpretation_history,
|
||||
AMR_env$rsi_interpretation_history <- rbind(
|
||||
AMR_env$rsi_interpretation_history,
|
||||
data.frame(
|
||||
datetime = Sys.time(),
|
||||
index = i,
|
||||
@ -964,7 +964,7 @@ as_rsi_method <- function(method_short,
|
||||
rsi_interpretation_history <- function(clean = FALSE) {
|
||||
meet_criteria(clean, allow_class = "logical", has_length = 1)
|
||||
|
||||
out.bak <- pkg_env$rsi_interpretation_history
|
||||
out.bak <- AMR_env$rsi_interpretation_history
|
||||
out <- out.bak
|
||||
if (NROW(out) == 0) {
|
||||
message_("No results to return. Run `as.rsi()` on MIC values or disk diffusion zones first to see a 'logbook' data set here.")
|
||||
@ -975,9 +975,9 @@ rsi_interpretation_history <- function(clean = FALSE) {
|
||||
out$interpretation <- as.rsi(out$interpretation)
|
||||
# keep stored for next use
|
||||
if (isTRUE(clean)) {
|
||||
pkg_env$rsi_interpretation_history <- pkg_env$rsi_interpretation_history[0, , drop = FALSE]
|
||||
AMR_env$rsi_interpretation_history <- AMR_env$rsi_interpretation_history[0, , drop = FALSE]
|
||||
} else {
|
||||
pkg_env$rsi_interpretation_history <- out.bak
|
||||
AMR_env$rsi_interpretation_history <- out.bak
|
||||
}
|
||||
|
||||
if (pkg_is_available("tibble", also_load = FALSE)) {
|
||||
|
19
R/zzz.R
19
R/zzz.R
@ -28,23 +28,24 @@
|
||||
# ==================================================================== #
|
||||
|
||||
# set up package environment, used by numerous AMR functions
|
||||
pkg_env <- new.env(hash = FALSE)
|
||||
pkg_env$mo_uncertainties <- data.frame(
|
||||
uncertainty = integer(0),
|
||||
AMR_env <- new.env(hash = FALSE)
|
||||
AMR_env$mo_uncertainties <- data.frame(
|
||||
original_input = character(0),
|
||||
input = character(0),
|
||||
fullname = character(0),
|
||||
mo = character(0),
|
||||
candidates = character(0),
|
||||
minimum_matching_score = integer(0),
|
||||
keep_synonyms = logical(0),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
pkg_env$mo_renamed <- list()
|
||||
pkg_env$mo_previously_coerced <- data.frame(
|
||||
AMR_env$mo_renamed <- list()
|
||||
AMR_env$mo_previously_coerced <- data.frame(
|
||||
x = character(0),
|
||||
mo = character(0),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
pkg_env$rsi_interpretation_history <- data.frame(
|
||||
AMR_env$rsi_interpretation_history <- data.frame(
|
||||
datetime = Sys.time()[0],
|
||||
index = integer(0),
|
||||
ab_input = character(0),
|
||||
@ -60,7 +61,7 @@ pkg_env$rsi_interpretation_history <- data.frame(
|
||||
interpretation = character(0),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
pkg_env$has_data.table <- pkg_is_available("data.table", also_load = FALSE)
|
||||
AMR_env$has_data.table <- pkg_is_available("data.table", also_load = FALSE)
|
||||
|
||||
# determine info icon for messages
|
||||
utf8_supported <- isTRUE(base::l10n_info()$`UTF-8`)
|
||||
@ -69,9 +70,9 @@ is_latex <- tryCatch(import_fn("is_latex_output", "knitr", error_on_fail = FALSE
|
||||
)
|
||||
if (utf8_supported && !is_latex) {
|
||||
# \u2139 is a symbol officially named 'information source'
|
||||
pkg_env$info_icon <- "\u2139"
|
||||
AMR_env$info_icon <- "\u2139"
|
||||
} else {
|
||||
pkg_env$info_icon <- "i"
|
||||
AMR_env$info_icon <- "i"
|
||||
}
|
||||
|
||||
.onLoad <- function(lib, pkg) {
|
||||
|
@ -109,7 +109,7 @@ read_EUCAST <- function(sheet, file, guideline_name) {
|
||||
for (i in seq_len(length(x))) {
|
||||
y <- trimws2(unlist(strsplit(x[i], "(,|and)")))
|
||||
y <- trimws2(gsub("[(].*[)]", "", y))
|
||||
y <- suppressWarnings(suppressMessages(as.mo(y, allow_uncertain = FALSE)))
|
||||
y <- suppressWarnings(suppressMessages(as.mo(y)))
|
||||
if (!is.null(mo_uncertainties())) uncertainties <<- add_uncertainties(uncertainties, mo_uncertainties())
|
||||
y <- y[!is.na(y) & y != "UNKNOWN"]
|
||||
x[i] <- paste(y, collapse = "|")
|
||||
|
Binary file not shown.
@ -40,7 +40,7 @@ expect_equal(as.character(as.mo("Escherichia coli")), "B_ESCHR_COLI")
|
||||
expect_equal(as.character(as.mo(112283007)), "B_ESCHR_COLI")
|
||||
expect_equal(as.character(as.mo("Escherichia species")), "B_ESCHR")
|
||||
expect_equal(as.character(as.mo("Escherichia")), "B_ESCHR")
|
||||
expect_equal(as.character(as.mo("Esch spp.")), "B_ESCHR")
|
||||
expect_equal(as.character(as.mo("Eschr spp.")), "B_ESCHR")
|
||||
expect_equal(as.character(as.mo(" B_ESCHR_COLI ")), "B_ESCHR_COLI")
|
||||
expect_equal(as.character(as.mo("e coli")), "B_ESCHR_COLI") # not Campylobacter
|
||||
expect_equal(as.character(as.mo("klpn")), "B_KLBSL_PNMN")
|
||||
@ -53,9 +53,8 @@ expect_equal(as.character(as.mo("Strepto")), "B_STRPT")
|
||||
expect_equal(as.character(as.mo("Streptococcus")), "B_STRPT") # not Peptostreptoccus
|
||||
expect_equal(as.character(as.mo("Estreptococos grupo B")), "B_STRPT_GRPB")
|
||||
expect_equal(as.character(as.mo("Group B Streptococci")), "B_STRPT_GRPB")
|
||||
expect_equal(as.character(as.mo(c("mycobacterie", "mycobakterium"))), c("B_MYCBC", "B_MYCBC"))
|
||||
|
||||
expect_equal(as.character(as.mo(c("GAS", "GBS", "a MGS", "haemoly strep"))), c("B_STRPT_GRPA", "B_STRPT_GRPB", "B_STRPT_MILL", "B_STRPT_HAEM"))
|
||||
expect_equal(as.character(as.mo(c("GAS", "GBS", "haemoly strep"))), c("B_STRPT_GRPA", "B_STRPT_GRPB", "B_STRPT_HAEM"))
|
||||
|
||||
|
||||
expect_equal(as.character(as.mo("S. pyo")), "B_STRPT_PYGN") # not Actinomyces pyogenes
|
||||
@ -90,14 +89,13 @@ expect_identical(
|
||||
"staaur",
|
||||
"S. aureus",
|
||||
"S aureus",
|
||||
"Sthafilokkockus aureeuzz",
|
||||
"Sthafilokkockus aureus",
|
||||
"Staphylococcus aureus",
|
||||
"MRSA",
|
||||
"VISA",
|
||||
"meth.-resis. S. aureus (MRSA)"
|
||||
))
|
||||
"VISA"
|
||||
), minimum_matching_score = 0)
|
||||
)),
|
||||
rep("B_STPHY_AURS", 10)
|
||||
rep("B_STPHY_AURS", 9)
|
||||
)
|
||||
expect_identical(
|
||||
as.character(
|
||||
@ -148,8 +146,8 @@ expect_identical(as.character(as.mo("STCPYO", Lancefield = TRUE)), "B_STRPT_GRPA
|
||||
expect_identical(as.character(as.mo("S. agalactiae", Lancefield = FALSE)), "B_STRPT_AGLC")
|
||||
expect_identical(as.character(as.mo("S. agalactiae", Lancefield = TRUE)), "B_STRPT_GRPB") # group B
|
||||
expect_identical(as.character(suppressWarnings(as.mo("estreptococos grupo B"))), "B_STRPT_GRPB")
|
||||
expect_identical(as.character(as.mo("S. equisimilis", Lancefield = FALSE)), "B_STRPT_DYSG_EQSM")
|
||||
expect_identical(as.character(as.mo("S. equisimilis", Lancefield = TRUE)), "B_STRPT_GRPC") # group C
|
||||
expect_identical(as.character(as.mo("S. equi", Lancefield = FALSE)), "B_STRPT_EQUI")
|
||||
expect_identical(as.character(as.mo("S. equi", Lancefield = TRUE)), "B_STRPT_GRPC") # group C
|
||||
# Enterococci must only be influenced if Lancefield = "all"
|
||||
expect_identical(as.character(as.mo("E. faecium", Lancefield = FALSE)), "B_ENTRC_FACM")
|
||||
expect_identical(as.character(as.mo("E. faecium", Lancefield = TRUE)), "B_ENTRC_FACM")
|
||||
@ -213,19 +211,17 @@ expect_equal(
|
||||
|
||||
# check empty values
|
||||
expect_equal(
|
||||
as.character(suppressWarnings(as.mo(""))),
|
||||
as.character(as.mo("")),
|
||||
NA_character_
|
||||
)
|
||||
|
||||
# check less prevalent MOs
|
||||
expect_equal(as.character(as.mo("Gomphosphaeria aponina delicatula")), "B_GMPHS_APNN_DLCT")
|
||||
expect_equal(as.character(as.mo("Gomphosphaeria apo del")), "B_GMPHS_APNN_DLCT")
|
||||
expect_equal(as.character(as.mo("G apo deli")), "B_GMPHS_APNN_DLCT")
|
||||
expect_equal(as.character(as.mo("Gomphosphaeria aponina")), "B_GMPHS_APNN")
|
||||
expect_equal(as.character(as.mo("Gomphosphaeria species")), "B_GMPHS")
|
||||
expect_equal(as.character(as.mo("Gomphosphaeria")), "B_GMPHS")
|
||||
expect_equal(as.character(as.mo(" B_GMPHS_APNN ")), "B_GMPHS_APNN")
|
||||
expect_equal(as.character(as.mo("g aponina")), "B_GMPHS_APNN")
|
||||
expect_equal(as.character(as.mo("Actinosynnema pretiosum auranticum")), "B_ANNMA_PRTS_ARNT")
|
||||
expect_equal(as.character(as.mo("Actinosynnema preti aura")), "B_ANNMA_PRTS_ARNT")
|
||||
expect_equal(as.character(as.mo("A pre aur")), "B_ANNMA_PRTS_ARNT")
|
||||
expect_equal(as.character(as.mo("Actinosynnema pretiosum")), "B_ANNMA_PRTS")
|
||||
expect_equal(as.character(as.mo("Actinosynnema")), "B_ANNMA")
|
||||
expect_equal(as.character(as.mo(" B_ANNMA_PRTS ")), "B_ANNMA_PRTS")
|
||||
|
||||
# check old names
|
||||
expect_equal(suppressMessages(as.character(as.mo("Escherichia blattae"))), "B_SHMWL_BLTT")
|
||||
@ -250,7 +246,7 @@ expect_error(as.mo("E. coli", reference_df = data.frame(mycol = "TestingOwnID"))
|
||||
|
||||
# combination of existing mo and other code
|
||||
expect_identical(
|
||||
as.character(as.mo(c("B_ESCHR_COL", "ESCCOL"))),
|
||||
suppressWarnings(as.character(as.mo(c("B_ESCHR_COL", "ESCCOL")))),
|
||||
c("B_ESCHR_COLI", "B_ESCHR_COLI")
|
||||
)
|
||||
|
||||
@ -274,7 +270,7 @@ expect_equal(
|
||||
c("B_MCRBC_PRXY", "B_STRPT_SUIS", "B_RLTLL_TRRG")
|
||||
)
|
||||
expect_stdout(print(mo_uncertainties()))
|
||||
x <- as.mo("S. aur")
|
||||
x <- as.mo("Sta. aur")
|
||||
# many hits
|
||||
expect_stdout(print(mo_uncertainties()))
|
||||
|
||||
|
@ -4,9 +4,9 @@
|
||||
\alias{as.mo}
|
||||
\alias{mo}
|
||||
\alias{is.mo}
|
||||
\alias{mo_failures}
|
||||
\alias{mo_uncertainties}
|
||||
\alias{mo_renamed}
|
||||
\alias{mo_failures}
|
||||
\alias{mo_reset_session}
|
||||
\alias{mo_cleaning_regex}
|
||||
\title{Transform Input to a Microorganism Code}
|
||||
@ -27,12 +27,12 @@ as.mo(
|
||||
|
||||
is.mo(x)
|
||||
|
||||
mo_failures()
|
||||
|
||||
mo_uncertainties()
|
||||
|
||||
mo_renamed()
|
||||
|
||||
mo_failures()
|
||||
|
||||
mo_reset_session()
|
||||
|
||||
mo_cleaning_regex()
|
||||
|
@ -3,9 +3,9 @@
|
||||
\docType{data}
|
||||
\name{microorganisms.codes}
|
||||
\alias{microorganisms.codes}
|
||||
\title{Data Set with 5,508 Common Microorganism Codes}
|
||||
\title{Data Set with 5,411 Common Microorganism Codes}
|
||||
\format{
|
||||
A \link[tibble:tibble]{tibble} with 5,508 observations and 2 variables:
|
||||
A \link[tibble:tibble]{tibble} with 5,411 observations and 2 variables:
|
||||
\itemize{
|
||||
\item \code{code}\cr Commonly used code of a microorganism
|
||||
\item \code{mo}\cr ID of the microorganism in the \link{microorganisms} data set
|
||||
|
@ -261,7 +261,7 @@ mo_property(
|
||||
|
||||
\item{keep_synonyms}{a \link{logical} to indicate if old, previously valid taxonomic names must be preserved and not be corrected to currently accepted names. The default is \code{FALSE}, which will return a note if old taxonomic names were processed. The default can be set with \code{options(AMR_keep_synonyms = TRUE)} or \code{options(AMR_keep_synonyms = FALSE)}.}
|
||||
|
||||
\item{...}{other arguments passed on to \code{\link[=as.mo]{as.mo()}}, such as 'allow_uncertain' and 'ignore_pattern'}
|
||||
\item{...}{other arguments passed on to \code{\link[=as.mo]{as.mo()}}, such as 'minimum_matching_score', 'ignore_pattern', and 'remove_from_input'}
|
||||
|
||||
\item{ab}{any (vector of) text that can be coerced to a valid antibiotic code with \code{\link[=as.ab]{as.ab()}}}
|
||||
|
||||
@ -285,8 +285,8 @@ Use these functions to return a specific property of a microorganism based on th
|
||||
All functions will, at default, keep old taxonomic properties. Please refer to this example, knowing that \emph{Escherichia blattae} was renamed to \emph{Shimwellia blattae} in 2010:
|
||||
\itemize{
|
||||
\item \code{mo_name("Escherichia blattae")} will return \code{"Shimwellia blattae"} (with a message about the renaming)
|
||||
\item \code{mo_ref("Escherichia blattae")} will return \code{"Burgess et al., 1973"} (with a message about the renaming)
|
||||
\item \code{mo_ref("Shimwellia blattae")} will return \code{"Priest et al., 2010"} (without a message)
|
||||
\item \code{mo_ref("Escherichia blattae", keep_synonyms = TRUE)} will return \code{"Burgess et al., 1973"} (with a warning about the renaming)
|
||||
\item \code{mo_ref("Shimwellia blattae", keep_synonyms = FALSE)} will return \code{"Priest et al., 2010"} (without a message)
|
||||
}
|
||||
|
||||
The short name - \code{\link[=mo_shortname]{mo_shortname()}} - almost always returns the first character of the genus and the full species, like \code{"E. coli"}. Exceptions are abbreviations of staphylococci (such as \emph{"CoNS"}, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (such as \emph{"GBS"}, Group B Streptococci). Please bear in mind that e.g. \emph{E. coli} could mean \emph{Escherichia coli} (kingdom of Bacteria) as well as \emph{Entamoeba coli} (kingdom of Protozoa). Returning to the full name will be done using \code{\link[=as.mo]{as.mo()}} internally, giving priority to bacteria and human pathogens, i.e. \code{"E. coli"} will be considered \emph{Escherichia coli}. In other words, \code{mo_fullname(mo_shortname("Entamoeba coli"))} returns \code{"Escherichia coli"}.
|
||||
|
Loading…
Reference in New Issue
Block a user