1
0
mirror of https://github.com/msberends/AMR.git synced 2025-01-15 23:21:37 +01:00

last unit tests fix?

This commit is contained in:
dr. M.S. (Matthijs) Berends 2022-10-04 21:33:04 +02:00
parent 37f6db5ccd
commit aa06aad4ea
22 changed files with 275 additions and 256 deletions

View File

@ -1,5 +1,5 @@
Package: AMR Package: AMR
Version: 1.8.2.9027 Version: 1.8.2.9028
Date: 2022-10-04 Date: 2022-10-04
Title: Antimicrobial Resistance Data Analysis Title: Antimicrobial Resistance Data Analysis
Description: Functions to simplify and standardise antimicrobial resistance (AMR) Description: Functions to simplify and standardise antimicrobial resistance (AMR)

View File

@ -1,4 +1,4 @@
# AMR 1.8.2.9027 # AMR 1.8.2.9028
This version will eventually become v2.0! We're happy to reach a new major milestone soon! This version will eventually become v2.0! We're happy to reach a new major milestone soon!
@ -9,11 +9,12 @@ This version will eventually become v2.0! We're happy to reach a new major miles
* Chromista are almost never clinically relevant, thus lacking the secondary scope of this package * Chromista are almost never clinically relevant, thus lacking the secondary scope of this package
* The `microorganisms` no longer relies on the Catalogue of Life, but now primarily on the List of Prokaryotic names with Standing in Nomenclature (LPSN) and is supplemented with the Global Biodiversity Information Facility (GBIF). The structure of this data set has changed to include separate LPSN and GBIF identifiers. Almost all previous MO codes were retained. It contains over 1,000 taxonomic names from 2022 already. * The `microorganisms` no longer relies on the Catalogue of Life, but now primarily on the List of Prokaryotic names with Standing in Nomenclature (LPSN) and is supplemented with the Global Biodiversity Information Facility (GBIF). The structure of this data set has changed to include separate LPSN and GBIF identifiers. Almost all previous MO codes were retained. It contains over 1,000 taxonomic names from 2022 already.
* The `microorganisms.old` data set was removed, and all previously accepted names are now included in the `microorganisms` data set. A new column `status` contains `"accepted"` for currently accepted names and `"synonym"` for taxonomic synonyms; currently invalid names. All previously accepted names now have a microorganisms ID and - if available - an LPSN, GBIF and SNOMED CT identifier. * The `microorganisms.old` data set was removed, and all previously accepted names are now included in the `microorganisms` data set. A new column `status` contains `"accepted"` for currently accepted names and `"synonym"` for taxonomic synonyms; currently invalid names. All previously accepted names now have a microorganisms ID and - if available - an LPSN, GBIF and SNOMED CT identifier.
* The `mo_matching_score()` now count deletions and substitutions as 2 instead of 1, which impacts the outcome of `as.mo()` and any `mo_*()` function
### New ### New
* EUCAST 2022 and CLSI 2022 guidelines have been added for `as.rsi()`. EUCAST 2022 is now the new default guideline for all MIC and disks diffusion interpretations. * EUCAST 2022 and CLSI 2022 guidelines have been added for `as.rsi()`. EUCAST 2022 is now the new default guideline for all MIC and disks diffusion interpretations.
* All new algorithm for `as.mo()` (and thus internally all `mo_*()` functions) while still following our original set-up as described in our paper (DOI 10.18637/jss.v104.i03). * All new algorithm for `as.mo()` (and thus internally all `mo_*()` functions) while still following our original set-up as described in our paper (DOI 10.18637/jss.v104.i03).
* A new argument `keep_synonyms` allows to *not* correct for updated taxonomy * A new argument `keep_synonyms` allows to *not* correct for updated taxonomy, in favour of the now deleted argument `allow_uncertain`
* It has increased tremendously in speed and returns generally more consequent results * It has increased tremendously in speed and returns generally more consequent results
* Sequential coercion is now extremely fast as results are stored to the package environment, although coercion of unknown values must be run once per session. Previous results can be reset/removed with the new `mo_reset_session()` function. * Sequential coercion is now extremely fast as results are stored to the package environment, although coercion of unknown values must be run once per session. Previous results can be reset/removed with the new `mo_reset_session()` function.
* Function `mean_amr_distance()` to calculate the mean AMR distance. The mean AMR distance is a normalised numeric value to compare AMR test results and can help to identify similar isolates, without comparing antibiograms by hand. * Function `mean_amr_distance()` to calculate the mean AMR distance. The mean AMR distance is a normalised numeric value to compare AMR test results and can help to identify similar isolates, without comparing antibiograms by hand.

View File

@ -41,12 +41,12 @@ pm_left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) {
if (length(by) == 1) { if (length(by) == 1) {
by <- rep(by, 2) by <- rep(by, 2)
} }
int_x <- colnames(x) %in% colnames(y) & colnames(x) != by[1] int_x <- colnames(x) %in% colnames(y) & colnames(x) != by[1]
int_y <- colnames(y) %in% colnames(x) & colnames(y) != by[2] int_y <- colnames(y) %in% colnames(x) & colnames(y) != by[2]
colnames(x)[int_x] <- paste0(colnames(x)[int_x], suffix[1L]) colnames(x)[int_x] <- paste0(colnames(x)[int_x], suffix[1L])
colnames(y)[int_y] <- paste0(colnames(y)[int_y], suffix[2L]) colnames(y)[int_y] <- paste0(colnames(y)[int_y], suffix[2L])
merged <- cbind( merged <- cbind(
x, x,
y[match( y[match(
@ -57,7 +57,7 @@ pm_left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) {
drop = FALSE drop = FALSE
] ]
) )
rownames(merged) <- NULL rownames(merged) <- NULL
merged merged
} }
@ -93,7 +93,7 @@ quick_case_when <- function(...) {
if (n == 0L) { if (n == 0L) {
stop("No cases provided.") stop("No cases provided.")
} }
validate_case_when_length <- function(query, value, fs) { validate_case_when_length <- function(query, value, fs) {
lhs_lengths <- lengths(query) lhs_lengths <- lengths(query)
rhs_lengths <- lengths(value) rhs_lengths <- lengths(value)
@ -112,13 +112,13 @@ quick_case_when <- function(...) {
problems <- lhs_problems | rhs_problems problems <- lhs_problems | rhs_problems
if (any(problems)) { if (any(problems)) {
stop("The following formulas must be length ", len, " or 1, not ", stop("The following formulas must be length ", len, " or 1, not ",
paste(inconsistent_lengths, collapse = ", "), ".\n ", paste(inconsistent_lengths, collapse = ", "), ".\n ",
paste(fs[problems], collapse = "\n "), paste(fs[problems], collapse = "\n "),
call. = FALSE call. = FALSE
) )
} }
} }
replace_with <- function(x, i, val, arg_name) { replace_with <- function(x, i, val, arg_name) {
if (is.null(val)) { if (is.null(val)) {
return(x) return(x)
@ -131,7 +131,7 @@ quick_case_when <- function(...) {
} }
x x
} }
query <- vector("list", n) query <- vector("list", n)
value <- vector("list", n) value <- vector("list", n)
default_env <- parent.frame() default_env <- parent.frame()
@ -163,13 +163,13 @@ addin_insert_in <- function() {
# No export, no Rd # No export, no Rd
addin_insert_like <- function() { addin_insert_like <- function() {
# we want Shift + Ctrl/Cmd + L to iterate over %like%, %unlike%, %like_case%, and %unlike_case% # we want Shift + Ctrl/Cmd + L to iterate over %like%, %unlike%, %like_case%, and %unlike_case%
getActiveDocumentContext <- import_fn("getActiveDocumentContext", "rstudioapi") getActiveDocumentContext <- import_fn("getActiveDocumentContext", "rstudioapi")
insertText <- import_fn("insertText", "rstudioapi") insertText <- import_fn("insertText", "rstudioapi")
modifyRange <- import_fn("modifyRange", "rstudioapi") modifyRange <- import_fn("modifyRange", "rstudioapi")
document_range <- import_fn("document_range", "rstudioapi") document_range <- import_fn("document_range", "rstudioapi")
document_position <- import_fn("document_position", "rstudioapi") document_position <- import_fn("document_position", "rstudioapi")
context <- getActiveDocumentContext() context <- getActiveDocumentContext()
current_row <- context$selection[[1]]$range$end[1] current_row <- context$selection[[1]]$range$end[1]
current_col <- context$selection[[1]]$range$end[2] current_col <- context$selection[[1]]$range$end[2]
@ -178,15 +178,15 @@ addin_insert_like <- function() {
insertText(" %like% ") insertText(" %like% ")
return(invisible()) return(invisible())
} }
pos_preceded_by <- function(txt) { pos_preceded_by <- function(txt) {
if (tryCatch(substr(current_row_txt, current_col - nchar(trimws(txt, which = "right")), current_col) == trimws(txt, which = "right"), if (tryCatch(substr(current_row_txt, current_col - nchar(trimws(txt, which = "right")), current_col) == trimws(txt, which = "right"),
error = function(e) FALSE error = function(e) FALSE
)) { )) {
return(TRUE) return(TRUE)
} }
tryCatch(substr(current_row_txt, current_col - nchar(txt), current_col) %like% paste0("^", txt), tryCatch(substr(current_row_txt, current_col - nchar(txt), current_col) %like% paste0("^", txt),
error = function(e) FALSE error = function(e) FALSE
) )
} }
replace_pos <- function(old, with) { replace_pos <- function(old, with) {
@ -198,7 +198,7 @@ addin_insert_like <- function() {
id = context$id id = context$id
) )
} }
if (pos_preceded_by(" %like% ")) { if (pos_preceded_by(" %like% ")) {
replace_pos(" %like% ", with = " %unlike% ") replace_pos(" %like% ", with = " %unlike% ")
} else if (pos_preceded_by(" %unlike% ")) { } else if (pos_preceded_by(" %unlike% ")) {
@ -215,21 +215,21 @@ addin_insert_like <- function() {
search_type_in_df <- function(x, type, info = TRUE) { search_type_in_df <- function(x, type, info = TRUE) {
meet_criteria(x, allow_class = "data.frame") meet_criteria(x, allow_class = "data.frame")
meet_criteria(type, allow_class = "character", has_length = 1) meet_criteria(type, allow_class = "character", has_length = 1)
# try to find columns based on type # try to find columns based on type
found <- NULL found <- NULL
# remove attributes from other packages # remove attributes from other packages
x <- as.data.frame(x, stringsAsFactors = FALSE) x <- as.data.frame(x, stringsAsFactors = FALSE)
colnames_formatted <- tolower(generalise_antibiotic_name(colnames(x))) colnames_formatted <- tolower(generalise_antibiotic_name(colnames(x)))
# -- mo # -- mo
if (type == "mo") { if (type == "mo") {
if (any(vapply(FUN.VALUE = logical(1), x, is.mo))) { if (any(vapply(FUN.VALUE = logical(1), x, is.mo))) {
# take first <mo> column # take first <mo> column
found <- colnames(x)[vapply(FUN.VALUE = logical(1), x, is.mo)] found <- colnames(x)[vapply(FUN.VALUE = logical(1), x, is.mo)]
} else if ("mo" %in% colnames_formatted && } else if ("mo" %in% colnames_formatted &&
suppressWarnings(all(x$mo %in% c(NA, AMR::microorganisms$mo)))) { suppressWarnings(all(x$mo %in% c(NA, AMR::microorganisms$mo)))) {
found <- "mo" found <- "mo"
} else if (any(colnames_formatted %like_case% "^(mo|microorganism|organism|bacteria|ba[ck]terie)s?$")) { } else if (any(colnames_formatted %like_case% "^(mo|microorganism|organism|bacteria|ba[ck]terie)s?$")) {
found <- sort(colnames(x)[colnames_formatted %like_case% "^(mo|microorganism|organism|bacteria|ba[ck]terie)s?$"]) found <- sort(colnames(x)[colnames_formatted %like_case% "^(mo|microorganism|organism|bacteria|ba[ck]terie)s?$"])
@ -294,16 +294,16 @@ search_type_in_df <- function(x, type, info = TRUE) {
# this column should contain logicals # this column should contain logicals
if (!is.logical(x[, found, drop = TRUE])) { if (!is.logical(x[, found, drop = TRUE])) {
message_("Column '", font_bold(found), "' found as input for `col_", type, message_("Column '", font_bold(found), "' found as input for `col_", type,
"`, but this column does not contain 'logical' values (TRUE/FALSE) and was ignored.", "`, but this column does not contain 'logical' values (TRUE/FALSE) and was ignored.",
add_fn = font_red add_fn = font_red
) )
found <- NULL found <- NULL
} }
} }
} }
found <- found[1] found <- found[1]
if (!is.null(found) && info == TRUE) { if (!is.null(found) && info == TRUE) {
if (message_not_thrown_before("search_in_type", type)) { if (message_not_thrown_before("search_in_type", type)) {
msg <- paste0("Using column '", font_bold(found), "' as input for `col_", type, "`.") msg <- paste0("Using column '", font_bold(found), "' as input for `col_", type, "`.")
@ -349,16 +349,16 @@ stop_ifnot_installed <- function(package) {
# https://developer.r-project.org/Blog/public/2019/02/14/staged-install/index.html # https://developer.r-project.org/Blog/public/2019/02/14/staged-install/index.html
vapply(FUN.VALUE = character(1), package, function(pkg) { vapply(FUN.VALUE = character(1), package, function(pkg) {
tryCatch(get(".packageName", envir = asNamespace(pkg)), tryCatch(get(".packageName", envir = asNamespace(pkg)),
error = function(e) { error = function(e) {
if (pkg == "rstudioapi") { if (pkg == "rstudioapi") {
stop("This function only works in RStudio when using R >= 3.2.", call. = FALSE) stop("This function only works in RStudio when using R >= 3.2.", call. = FALSE)
} else if (pkg != "base") { } else if (pkg != "base") {
stop("This requires the '", pkg, "' package.", stop("This requires the '", pkg, "' package.",
"\nTry to install it with: install.packages(\"", pkg, "\")", "\nTry to install it with: install.packages(\"", pkg, "\")",
call. = FALSE call. = FALSE
) )
} }
} }
) )
}) })
return(invisible()) return(invisible())
@ -386,8 +386,8 @@ import_fn <- function(name, pkg, error_on_fail = TRUE) {
error = function(e) { error = function(e) {
if (isTRUE(error_on_fail)) { if (isTRUE(error_on_fail)) {
stop_("function ", name, "() is not an exported object from package '", pkg, stop_("function ", name, "() is not an exported object from package '", pkg,
"'. Please create an issue at https://github.com/msberends/AMR/issues. Many thanks!", "'. Please create an issue at https://github.com/msberends/AMR/issues. Many thanks!",
call = FALSE call = FALSE
) )
} else { } else {
return(NULL) return(NULL)
@ -407,11 +407,11 @@ word_wrap <- function(...,
width = 0.95 * getOption("width"), width = 0.95 * getOption("width"),
extra_indent = 0) { extra_indent = 0) {
msg <- paste0(c(...), collapse = "") msg <- paste0(c(...), collapse = "")
if (isTRUE(as_note)) { if (isTRUE(as_note)) {
msg <- paste0(AMR_env$info_icon, " ", gsub("^note:? ?", "", msg, ignore.case = TRUE)) msg <- paste0(AMR_env$info_icon, " ", gsub("^note:? ?", "", msg, ignore.case = TRUE))
} }
if (msg %like% "\n") { if (msg %like% "\n") {
# run word_wraps() over every line here, bind them and return again # run word_wraps() over every line here, bind them and return again
return(paste0(vapply( return(paste0(vapply(
@ -426,7 +426,7 @@ word_wrap <- function(...,
collapse = "\n" collapse = "\n"
)) ))
} }
# correct for operators (will add the space later on) # correct for operators (will add the space later on)
ops <- "([,./><\\]\\[])" ops <- "([,./><\\]\\[])"
msg <- gsub(paste0(ops, " ", ops), "\\1\\2", msg, perl = TRUE) msg <- gsub(paste0(ops, " ", ops), "\\1\\2", msg, perl = TRUE)
@ -434,13 +434,13 @@ word_wrap <- function(...,
msg_stripped <- font_stripstyle(msg) msg_stripped <- font_stripstyle(msg)
# where are the spaces now? # where are the spaces now?
msg_stripped_wrapped <- paste0(strwrap(msg_stripped, msg_stripped_wrapped <- paste0(strwrap(msg_stripped,
simplify = TRUE, simplify = TRUE,
width = width width = width
), ),
collapse = "\n" collapse = "\n"
) )
msg_stripped_wrapped <- paste0(unlist(strsplit(msg_stripped_wrapped, "(\n|\\*\\|\\*)")), msg_stripped_wrapped <- paste0(unlist(strsplit(msg_stripped_wrapped, "(\n|\\*\\|\\*)")),
collapse = "\n" collapse = "\n"
) )
msg_stripped_spaces <- which(unlist(strsplit(msg_stripped, "", fixed = TRUE)) == " ") msg_stripped_spaces <- which(unlist(strsplit(msg_stripped, "", fixed = TRUE)) == " ")
msg_stripped_wrapped_spaces <- which(unlist(strsplit(msg_stripped_wrapped, "", fixed = TRUE)) != "\n") msg_stripped_wrapped_spaces <- which(unlist(strsplit(msg_stripped_wrapped, "", fixed = TRUE)) != "\n")
@ -453,7 +453,7 @@ word_wrap <- function(...,
msg <- gsub(paste0(ops, ops), "\\1 \\2", msg, perl = TRUE) msg <- gsub(paste0(ops, ops), "\\1 \\2", msg, perl = TRUE)
msg <- paste0(msg, collapse = " ") msg <- paste0(msg, collapse = " ")
msg <- gsub("\n ", "\n", msg, fixed = TRUE) msg <- gsub("\n ", "\n", msg, fixed = TRUE)
if (msg_stripped %like% "\u2139 ") { if (msg_stripped %like% "\u2139 ") {
indentation <- 2 + extra_indent indentation <- 2 + extra_indent
} else if (msg_stripped %like% "^=> ") { } else if (msg_stripped %like% "^=> ") {
@ -464,7 +464,7 @@ word_wrap <- function(...,
msg <- gsub("\n", paste0("\n", strrep(" ", indentation)), msg, fixed = TRUE) msg <- gsub("\n", paste0("\n", strrep(" ", indentation)), msg, fixed = TRUE)
# remove trailing empty characters # remove trailing empty characters
msg <- gsub("(\n| )+$", "", msg) msg <- gsub("(\n| )+$", "", msg)
if (length(add_fn) > 0) { if (length(add_fn) > 0) {
if (!is.list(add_fn)) { if (!is.list(add_fn)) {
add_fn <- list(add_fn) add_fn <- list(add_fn)
@ -473,15 +473,15 @@ word_wrap <- function(...,
msg <- add_fn[[i]](msg) msg <- add_fn[[i]](msg)
} }
} }
# format backticks # format backticks
msg <- gsub("(`.+?`)", font_grey_bg("\\1"), msg) msg <- gsub("(`.+?`)", font_grey_bg("\\1"), msg)
# clean introduced whitespace between fullstops # clean introduced whitespace between fullstops
msg <- gsub("[.] +[.]", "..", msg) msg <- gsub("[.] +[.]", "..", msg)
# remove extra space that was introduced (case: "Smith et al., 2022") # remove extra space that was introduced (case: "Smith et al., 2022")
msg <- gsub(". ,", ".,", msg, fixed = TRUE) msg <- gsub(". ,", ".,", msg, fixed = TRUE)
msg msg
} }
@ -490,8 +490,8 @@ message_ <- function(...,
add_fn = list(font_blue), add_fn = list(font_blue),
as_note = TRUE) { as_note = TRUE) {
message(word_wrap(..., message(word_wrap(...,
add_fn = add_fn, add_fn = add_fn,
as_note = as_note as_note = as_note
), ),
appendLF = appendLF appendLF = appendLF
) )
@ -502,8 +502,8 @@ warning_ <- function(...,
immediate = FALSE, immediate = FALSE,
call = FALSE) { call = FALSE) {
warning(word_wrap(..., warning(word_wrap(...,
add_fn = add_fn, add_fn = add_fn,
as_note = FALSE as_note = FALSE
), ),
immediate. = immediate, immediate. = immediate,
call. = call call. = call
@ -736,13 +736,13 @@ meet_criteria <- function(object,
allow_NA = FALSE, allow_NA = FALSE,
ignore.case = FALSE, ignore.case = FALSE,
.call_depth = 0) { # depth in calling .call_depth = 0) { # depth in calling
obj_name <- deparse(substitute(object)) obj_name <- deparse(substitute(object))
call_depth <- -2 - abs(.call_depth) call_depth <- -2 - abs(.call_depth)
# if object is missing, or another error: # if object is missing, or another error:
tryCatch(invisible(object), tryCatch(invisible(object),
error = function(e) AMR_env$meet_criteria_error_txt <- e$message error = function(e) AMR_env$meet_criteria_error_txt <- e$message
) )
if (!is.null(AMR_env$meet_criteria_error_txt)) { if (!is.null(AMR_env$meet_criteria_error_txt)) {
error_txt <- AMR_env$meet_criteria_error_txt error_txt <- AMR_env$meet_criteria_error_txt
@ -750,7 +750,7 @@ meet_criteria <- function(object,
stop(error_txt, call. = FALSE) # don't use stop_() here, our pkg may not be loaded yet stop(error_txt, call. = FALSE) # don't use stop_() here, our pkg may not be loaded yet
} }
AMR_env$meet_criteria_error_txt <- NULL AMR_env$meet_criteria_error_txt <- NULL
if (is.null(object)) { if (is.null(object)) {
stop_if(allow_NULL == FALSE, "argument `", obj_name, "` must not be NULL", call = call_depth) stop_if(allow_NULL == FALSE, "argument `", obj_name, "` must not be NULL", call = call_depth)
return(invisible()) return(invisible())
@ -759,36 +759,36 @@ meet_criteria <- function(object,
stop_if(allow_NA == FALSE, "argument `", obj_name, "` must not be NA", call = call_depth) stop_if(allow_NA == FALSE, "argument `", obj_name, "` must not be NA", call = call_depth)
return(invisible()) return(invisible())
} }
if (!is.null(allow_class)) { if (!is.null(allow_class)) {
stop_ifnot(inherits(object, allow_class), "argument `", obj_name, stop_ifnot(inherits(object, allow_class), "argument `", obj_name,
"` must be ", format_class(allow_class, plural = isTRUE(has_length > 1)), "` must be ", format_class(allow_class, plural = isTRUE(has_length > 1)),
", i.e. not be ", format_class(class(object), plural = isTRUE(has_length > 1)), ", i.e. not be ", format_class(class(object), plural = isTRUE(has_length > 1)),
call = call_depth call = call_depth
) )
# check data.frames for data # check data.frames for data
if (inherits(object, "data.frame")) { if (inherits(object, "data.frame")) {
stop_if(any(dim(object) == 0), stop_if(any(dim(object) == 0),
"the data provided in argument `", obj_name, "the data provided in argument `", obj_name,
"` must contain rows and columns (current dimensions: ", "` must contain rows and columns (current dimensions: ",
paste(dim(object), collapse = "x"), ")", paste(dim(object), collapse = "x"), ")",
call = call_depth call = call_depth
) )
} }
} }
if (!is.null(has_length)) { if (!is.null(has_length)) {
stop_ifnot(length(object) %in% has_length, "argument `", obj_name, stop_ifnot(length(object) %in% has_length, "argument `", obj_name,
"` must ", # ifelse(allow_NULL, "be NULL or must ", ""), "` must ", # ifelse(allow_NULL, "be NULL or must ", ""),
"be of length ", vector_or(has_length, quotes = FALSE), "be of length ", vector_or(has_length, quotes = FALSE),
", not ", length(object), ", not ", length(object),
call = call_depth call = call_depth
) )
} }
if (!is.null(looks_like)) { if (!is.null(looks_like)) {
stop_ifnot(object %like% looks_like, "argument `", obj_name, stop_ifnot(object %like% looks_like, "argument `", obj_name,
"` must ", # ifelse(allow_NULL, "be NULL or must ", ""), "` must ", # ifelse(allow_NULL, "be NULL or must ", ""),
"resemble the regular expression \"", looks_like, "\"", "resemble the regular expression \"", looks_like, "\"",
call = call_depth call = call_depth
) )
} }
if (!is.null(is_in)) { if (!is.null(is_in)) {
@ -797,44 +797,44 @@ meet_criteria <- function(object,
is_in <- tolower(is_in) is_in <- tolower(is_in)
} }
stop_ifnot(all(object %in% is_in, na.rm = TRUE), "argument `", obj_name, "` ", stop_ifnot(all(object %in% is_in, na.rm = TRUE), "argument `", obj_name, "` ",
ifelse(!is.null(has_length) && length(has_length) == 1 && has_length == 1, ifelse(!is.null(has_length) && length(has_length) == 1 && has_length == 1,
"must be either ", "must be either ",
"must only contain values " "must only contain values "
), ),
vector_or(is_in, quotes = !isTRUE(any(c("double", "numeric", "integer") %in% allow_class))), vector_or(is_in, quotes = !isTRUE(any(c("double", "numeric", "integer") %in% allow_class))),
ifelse(allow_NA == TRUE, ", or NA", ""), ifelse(allow_NA == TRUE, ", or NA", ""),
call = call_depth call = call_depth
) )
} }
if (isTRUE(is_positive)) { if (isTRUE(is_positive)) {
stop_if(is.numeric(object) && !all(object > 0, na.rm = TRUE), "argument `", obj_name, stop_if(is.numeric(object) && !all(object > 0, na.rm = TRUE), "argument `", obj_name,
"` must ", "` must ",
ifelse(!is.null(has_length) && length(has_length) == 1 && has_length == 1, ifelse(!is.null(has_length) && length(has_length) == 1 && has_length == 1,
"be a number higher than zero", "be a number higher than zero",
"all be numbers higher than zero" "all be numbers higher than zero"
), ),
call = call_depth call = call_depth
) )
} }
if (isTRUE(is_positive_or_zero)) { if (isTRUE(is_positive_or_zero)) {
stop_if(is.numeric(object) && !all(object >= 0, na.rm = TRUE), "argument `", obj_name, stop_if(is.numeric(object) && !all(object >= 0, na.rm = TRUE), "argument `", obj_name,
"` must ", "` must ",
ifelse(!is.null(has_length) && length(has_length) == 1 && has_length == 1, ifelse(!is.null(has_length) && length(has_length) == 1 && has_length == 1,
"be zero or a positive number", "be zero or a positive number",
"all be zero or numbers higher than zero" "all be zero or numbers higher than zero"
), ),
call = call_depth call = call_depth
) )
} }
if (isTRUE(is_finite)) { if (isTRUE(is_finite)) {
stop_if(is.numeric(object) && !all(is.finite(object[!is.na(object)]), na.rm = TRUE), "argument `", obj_name, stop_if(is.numeric(object) && !all(is.finite(object[!is.na(object)]), na.rm = TRUE), "argument `", obj_name,
"` must ", "` must ",
ifelse(!is.null(has_length) && length(has_length) == 1 && has_length == 1, ifelse(!is.null(has_length) && length(has_length) == 1 && has_length == 1,
"be a finite number", "be a finite number",
"all be finite numbers" "all be finite numbers"
), ),
" (i.e. not be infinite)", " (i.e. not be infinite)",
call = call_depth call = call_depth
) )
} }
if (!is.null(contains_column_class)) { if (!is.null(contains_column_class)) {
@ -868,12 +868,12 @@ get_current_data <- function(arg_name, call) {
return(out) return(out)
} }
} }
# try a manual (base R) method, by going over all underlying environments with sys.frames() # try a manual (base R) method, by going over all underlying environments with sys.frames()
for (env in sys.frames()) { for (env in sys.frames()) {
if (!is.null(env$`.Generic`)) { if (!is.null(env$`.Generic`)) {
# don't check `".Generic" %in% names(env)`, because in R < 3.2, `names(env)` is always NULL # don't check `".Generic" %in% names(env)`, because in R < 3.2, `names(env)` is always NULL
if (valid_df(env$`.data`)) { if (valid_df(env$`.data`)) {
# an element `.data` will be in the environment when using `dplyr::select()` # an element `.data` will be in the environment when using `dplyr::select()`
# (but not when using `dplyr::filter()`, `dplyr::mutate()` or `dplyr::summarise()`) # (but not when using `dplyr::filter()`, `dplyr::mutate()` or `dplyr::summarise()`)
@ -887,7 +887,7 @@ get_current_data <- function(arg_name, call) {
} }
} }
} }
# no data.frame found, so an error must be returned: # no data.frame found, so an error must be returned:
if (is.na(arg_name)) { if (is.na(arg_name)) {
if (isTRUE(is.numeric(call))) { if (isTRUE(is.numeric(call))) {
@ -903,8 +903,8 @@ get_current_data <- function(arg_name, call) {
examples <- "" examples <- ""
} }
stop_("this function must be used inside a `dplyr` verb or `data.frame` call", stop_("this function must be used inside a `dplyr` verb or `data.frame` call",
examples, examples,
call = call call = call
) )
} else { } else {
# mimic a base R error that the argument is missing # mimic a base R error that the argument is missing
@ -921,7 +921,7 @@ get_current_column <- function() {
return(out) return(out)
} }
} }
# cur_column() doesn't always work (only allowed for certain conditions set by dplyr), but it's probably still possible: # cur_column() doesn't always work (only allowed for certain conditions set by dplyr), but it's probably still possible:
frms <- lapply(sys.frames(), function(env) { frms <- lapply(sys.frames(), function(env) {
if (!is.null(env$i)) { if (!is.null(env$i)) {
@ -941,7 +941,7 @@ get_current_column <- function() {
NULL NULL
} }
}) })
vars <- unlist(frms) vars <- unlist(frms)
if (length(vars) > 0) { if (length(vars) > 0) {
vars[length(vars)] vars[length(vars)]
@ -960,7 +960,7 @@ unique_call_id <- function(entire_session = FALSE, match_fn = NULL) {
if (entire_session == TRUE) { if (entire_session == TRUE) {
return(c(envir = "session", call = "session")) return(c(envir = "session", call = "session"))
} }
# combination of environment ID (such as "0x7fed4ee8c848") # combination of environment ID (such as "0x7fed4ee8c848")
# and relevant system call (where 'match_fn' is being called in) # and relevant system call (where 'match_fn' is being called in)
calls <- sys.calls() calls <- sys.calls()
@ -1011,7 +1011,7 @@ message_not_thrown_before <- function(fn, ..., entire_session = FALSE) {
has_colour <- function() { has_colour <- function() {
# this is a base R version of crayon::has_color, but disables colours on emacs # this is a base R version of crayon::has_color, but disables colours on emacs
if (Sys.getenv("EMACS") != "" || Sys.getenv("INSIDE_EMACS") != "") { if (Sys.getenv("EMACS") != "" || Sys.getenv("INSIDE_EMACS") != "") {
# disable on emacs, which only supports 8 colours # disable on emacs, which only supports 8 colours
return(FALSE) return(FALSE)
@ -1293,7 +1293,7 @@ round2 <- function(x, digits = 1, force_zero = TRUE) {
# percentage from our other package: 'cleaner' # percentage from our other package: 'cleaner'
percentage <- function(x, digits = NULL, ...) { percentage <- function(x, digits = NULL, ...) {
# getdecimalplaces() function # getdecimalplaces() function
getdecimalplaces <- function(x, minimum = 0, maximum = 3) { getdecimalplaces <- function(x, minimum = 0, maximum = 3) {
if (maximum < minimum) { if (maximum < minimum) {
@ -1310,14 +1310,14 @@ percentage <- function(x, digits = NULL, ...) {
function(y) ifelse(length(y) == 2, nchar(y[2]), 0) function(y) ifelse(length(y) == 2, nchar(y[2]), 0)
)), na.rm = TRUE) )), na.rm = TRUE)
max(min(max_places, max(min(max_places,
maximum, maximum,
na.rm = TRUE na.rm = TRUE
), ),
minimum, minimum,
na.rm = TRUE na.rm = TRUE
) )
} }
# format_percentage() function # format_percentage() function
format_percentage <- function(x, digits = NULL, ...) { format_percentage <- function(x, digits = NULL, ...) {
if (is.null(digits)) { if (is.null(digits)) {
@ -1326,19 +1326,19 @@ percentage <- function(x, digits = NULL, ...) {
if (is.null(digits) || is.na(digits) || !is.numeric(digits)) { if (is.null(digits) || is.na(digits) || !is.numeric(digits)) {
digits <- 2 digits <- 2
} }
# round right: percentage(0.4455) and format(as.percentage(0.4455), 1) should return "44.6%", not "44.5%" # round right: percentage(0.4455) and format(as.percentage(0.4455), 1) should return "44.6%", not "44.5%"
x_formatted <- format(round2(as.double(x), digits = digits + 2) * 100, x_formatted <- format(round2(as.double(x), digits = digits + 2) * 100,
scientific = FALSE, scientific = FALSE,
digits = max(1, digits), digits = max(1, digits),
nsmall = digits, nsmall = digits,
... ...
) )
x_formatted <- paste0(x_formatted, "%") x_formatted <- paste0(x_formatted, "%")
x_formatted[!grepl(pattern = "^[0-9.,e-]+$", x = x)] <- NA_character_ x_formatted[!grepl(pattern = "^[0-9.,e-]+$", x = x)] <- NA_character_
x_formatted x_formatted
} }
# the actual working part # the actual working part
x <- as.double(x) x <- as.double(x)
if (is.null(digits)) { if (is.null(digits)) {
@ -1495,9 +1495,9 @@ if (getRversion() < "3.5.0") {
which <- match.arg(which) which <- match.arg(which)
mysub <- function(re, x) sub(re, "", x, perl = TRUE) mysub <- function(re, x) sub(re, "", x, perl = TRUE)
switch(which, switch(which,
left = mysub(paste0("^", whitespace, "+"), x), left = mysub(paste0("^", whitespace, "+"), x),
right = mysub(paste0(whitespace, "+$"), x), right = mysub(paste0(whitespace, "+$"), x),
both = mysub(paste0(whitespace, "+$"), mysub(paste0("^", whitespace, "+"), x)) both = mysub(paste0(whitespace, "+$"), mysub(paste0("^", whitespace, "+"), x))
) )
} }
} }

View File

@ -566,7 +566,7 @@ ab_select_exec <- function(function_name,
message_("No antimicrobial agents found in the data.") message_("No antimicrobial agents found in the data.")
return(NULL) return(NULL)
} }
if (is.null(ab_class_args) || isTRUE(function_name %in% c("antifungals", "antimycobacterials"))) { if (is.null(ab_class_args) || isTRUE(function_name %in% c("antifungals", "antimycobacterials"))) {
ab_group <- NULL ab_group <- NULL
if (isTRUE(function_name == "antifungals")) { if (isTRUE(function_name == "antifungals")) {
@ -580,8 +580,8 @@ ab_select_exec <- function(function_name,
ab_group <- function_name ab_group <- function_name
} }
examples <- paste0(" (such as ", vector_or(ab_name(sample(abx, size = min(2, length(abx)), replace = FALSE), examples <- paste0(" (such as ", vector_or(ab_name(sample(abx, size = min(2, length(abx)), replace = FALSE),
tolower = TRUE, tolower = TRUE,
language = NULL language = NULL
), ),
quotes = FALSE quotes = FALSE
), ")") ), ")")

10
R/amr.R
View File

@ -31,9 +31,9 @@
#' #'
#' @description #' @description
#' Welcome to the `AMR` package. #' Welcome to the `AMR` package.
#' #'
#' `AMR` is a free, open-source and independent \R package to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. Our aim is to provide a standard for clean and reproducible antimicrobial resistance data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting. #' `AMR` is a free, open-source and independent \R package to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. Our aim is to provide a standard for clean and reproducible antimicrobial resistance data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting.
#' #'
#' This work was published in the Journal of Statistical Software (Volume 104(3); \doi{10.18637/jss.v104.i03}) and formed the basis of two PhD theses (\doi{10.33612/diss.177417131} and \doi{10.33612/diss.192486375}). #' This work was published in the Journal of Statistical Software (Volume 104(3); \doi{10.18637/jss.v104.i03}) and formed the basis of two PhD theses (\doi{10.33612/diss.177417131} and \doi{10.33612/diss.192486375}).
#' #'
#' After installing this package, \R knows `r format_included_data_number(microorganisms)` distinct microbial species and all `r format_included_data_number(rbind(antibiotics[, "atc", drop = FALSE], antivirals[, "atc", drop = FALSE]))` antibiotic, antimycotic and antiviral drugs by name and code (including ATC, EARS-NET, LOINC and SNOMED CT), and knows all about valid R/SI and MIC values. It supports any data format, including WHONET/EARS-Net data. #' After installing this package, \R knows `r format_included_data_number(microorganisms)` distinct microbial species and all `r format_included_data_number(rbind(antibiotics[, "atc", drop = FALSE], antivirals[, "atc", drop = FALSE]))` antibiotic, antimycotic and antiviral drugs by name and code (including ATC, EARS-NET, LOINC and SNOMED CT), and knows all about valid R/SI and MIC values. It supports any data format, including WHONET/EARS-Net data.
@ -62,11 +62,11 @@
#' All data sets in this `AMR` package (about microorganisms, antibiotics, R/SI interpretation, EUCAST rules, etc.) are publicly and freely available for download in the following formats: R, MS Excel, Apache Feather, Apache Parquet, SPSS, SAS, and Stata. We also provide tab-separated plain text files that are machine-readable and suitable for input in any software program, such as laboratory information systems. Please visit [our website for the download links](https://msberends.github.io/AMR/articles/datasets.html). The actual files are of course available on [our GitHub repository](https://github.com/msberends/AMR/tree/main/data-raw). #' All data sets in this `AMR` package (about microorganisms, antibiotics, R/SI interpretation, EUCAST rules, etc.) are publicly and freely available for download in the following formats: R, MS Excel, Apache Feather, Apache Parquet, SPSS, SAS, and Stata. We also provide tab-separated plain text files that are machine-readable and suitable for input in any software program, such as laboratory information systems. Please visit [our website for the download links](https://msberends.github.io/AMR/articles/datasets.html). The actual files are of course available on [our GitHub repository](https://github.com/msberends/AMR/tree/main/data-raw).
#' @source #' @source
#' To cite AMR in publications use: #' To cite AMR in publications use:
#' #'
#' Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C (2022). "AMR: An R Package for Working with Antimicrobial Resistance Data." _Journal of Statistical Software_, *104*(3), 1-31. \doi{10.18637/jss.v104.i03}. #' Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C (2022). "AMR: An R Package for Working with Antimicrobial Resistance Data." _Journal of Statistical Software_, *104*(3), 1-31. \doi{10.18637/jss.v104.i03}.
#' #'
#' A BibTeX entry for LaTeX users is: #' A BibTeX entry for LaTeX users is:
#' #'
#' \preformatted{ #' \preformatted{
#' `r format(citation("AMR"), style = "bib")` #' `r format(citation("AMR"), style = "bib")`
#' } #' }

View File

@ -49,10 +49,10 @@
#' ```r #' ```r
#' x #' x
#' #> A set of custom EUCAST rules: #' #> A set of custom EUCAST rules:
#' #> #' #>
#' #> 1. If TZP is "S" then set to S : #' #> 1. If TZP is "S" then set to S :
#' #> amoxicillin (AMX), ampicillin (AMP) #' #> amoxicillin (AMX), ampicillin (AMP)
#' #> #' #>
#' #> 2. If TZP is "R" then set to R : #' #> 2. If TZP is "R" then set to R :
#' #> amoxicillin (AMX), ampicillin (AMP) #' #> amoxicillin (AMX), ampicillin (AMP)
#' ``` #' ```
@ -68,7 +68,7 @@
#' #> mo TZP ampi cipro #' #> mo TZP ampi cipro
#' #> 1 Escherichia coli R S S #' #> 1 Escherichia coli R S S
#' #> 2 Klebsiella pneumoniae R S S #' #> 2 Klebsiella pneumoniae R S S
#' #'
#' eucast_rules(df, rules = "custom", custom_rules = x, info = FALSE) #' eucast_rules(df, rules = "custom", custom_rules = x, info = FALSE)
#' #> mo TZP ampi cipro #' #> mo TZP ampi cipro
#' #> 1 Escherichia coli R R S #' #> 1 Escherichia coli R R S

View File

@ -167,7 +167,7 @@ valid_mic_levels <- c(
as.mic <- function(x, na.rm = FALSE) { as.mic <- function(x, na.rm = FALSE) {
meet_criteria(x, allow_class = c("mic", "character", "numeric", "integer", "factor"), allow_NA = TRUE) meet_criteria(x, allow_class = c("mic", "character", "numeric", "integer", "factor"), allow_NA = TRUE)
meet_criteria(na.rm, allow_class = "logical", has_length = 1) meet_criteria(na.rm, allow_class = "logical", has_length = 1)
if (is.mic(x)) { if (is.mic(x)) {
x x
} else { } else {

163
R/mo.R
View File

@ -67,13 +67,13 @@
#' Values that cannot be coerced will be considered 'unknown' and will be returned as the MO code `UNKNOWN` with a warning. #' Values that cannot be coerced will be considered 'unknown' and will be returned as the MO code `UNKNOWN` with a warning.
#' #'
#' Use the [`mo_*`][mo_property()] functions to get properties based on the returned code, see *Examples*. #' Use the [`mo_*`][mo_property()] functions to get properties based on the returned code, see *Examples*.
#' #'
#' The [as.mo()] function uses a novel [matching score algorithm][mo_matching_score()] (see *Matching Score for Microorganisms* below) to match input against the [available microbial taxonomy][microoganisms] in this package. This will lead to the effect that e.g. `"E. coli"` (a microorganism highly prevalent in humans) will return the microbial ID of *Escherichia coli* and not *Entamoeba coli* (a microorganism less prevalent in humans), although the latter would alphabetically come first. The algorithm uses data from the List of Prokaryotic names with Standing in Nomenclature (LPSN) and the Global Biodiversity Information Facility (GBIF) (see [microorganisms]). #' The [as.mo()] function uses a novel [matching score algorithm][mo_matching_score()] (see *Matching Score for Microorganisms* below) to match input against the [available microbial taxonomy][microoganisms] in this package. This will lead to the effect that e.g. `"E. coli"` (a microorganism highly prevalent in humans) will return the microbial ID of *Escherichia coli* and not *Entamoeba coli* (a microorganism less prevalent in humans), although the latter would alphabetically come first. The algorithm uses data from the List of Prokaryotic names with Standing in Nomenclature (LPSN) and the Global Biodiversity Information Facility (GBIF) (see [microorganisms]).
#' #'
#' ### Coping with Uncertain Results #' ### Coping with Uncertain Results
#' #'
#' Results of non-exact taxonomic input are based on their [matching score][mo_matching_score()]. The lowest allowed score can be set with the `minimum_matching_score` argument. At default this will be determined based on the character length of the input, and the [taxonomic kingdom][microorganisms] and [human pathogenicity][mo_matching_score()] of the taxonomic outcome. If values are matched with uncertainty, a message will be shown to suggest the user to evaluate the results with [mo_uncertainties()], which returns a [data.frame] with all specifications. #' Results of non-exact taxonomic input are based on their [matching score][mo_matching_score()]. The lowest allowed score can be set with the `minimum_matching_score` argument. At default this will be determined based on the character length of the input, and the [taxonomic kingdom][microorganisms] and [human pathogenicity][mo_matching_score()] of the taxonomic outcome. If values are matched with uncertainty, a message will be shown to suggest the user to evaluate the results with [mo_uncertainties()], which returns a [data.frame] with all specifications.
#' #'
#' To increase the quality of matching, the `remove_from_input` argument can be used to clean the input (i.e., `x`). This must be a [regular expression][base::regex] that matches parts of the input that should be removed before the input is matched against the [available microbial taxonomy][microoganisms]. It will be matched Perl-compatible and case-insensitive. The default value of `remove_from_input` is the outcome of the helper function [mo_cleaning_regex()]. #' To increase the quality of matching, the `remove_from_input` argument can be used to clean the input (i.e., `x`). This must be a [regular expression][base::regex] that matches parts of the input that should be removed before the input is matched against the [available microbial taxonomy][microoganisms]. It will be matched Perl-compatible and case-insensitive. The default value of `remove_from_input` is the outcome of the helper function [mo_cleaning_regex()].
#' #'
#' There are three helper functions that can be run after using the [as.mo()] function: #' There are three helper functions that can be run after using the [as.mo()] function:
@ -85,7 +85,7 @@
#' #'
#' The coercion rules consider the prevalence of microorganisms in humans grouped into three groups, which is available as the `prevalence` columns in the [microorganisms] data set. The grouping into human pathogenic prevalence is explained in the section *Matching Score for Microorganisms* below. #' The coercion rules consider the prevalence of microorganisms in humans grouped into three groups, which is available as the `prevalence` columns in the [microorganisms] data set. The grouping into human pathogenic prevalence is explained in the section *Matching Score for Microorganisms* below.
#' @inheritSection mo_matching_score Matching Score for Microorganisms #' @inheritSection mo_matching_score Matching Score for Microorganisms
#' #'
# (source as a section here, so it can be inherited by other man pages) # (source as a section here, so it can be inherited by other man pages)
#' @section Source: #' @section Source:
#' 1. Berends MS *et al.* (2022). **AMR: An R Package for Working with Antimicrobial Resistance Data**. *Journal of Statistical Software*, 104(3), 1-31; \doi{10.18637/jss.v104.i03} #' 1. Berends MS *et al.* (2022). **AMR: An R Package for Working with Antimicrobial Resistance Data**. *Journal of Statistical Software*, 104(3), 1-31; \doi{10.18637/jss.v104.i03}
@ -120,7 +120,7 @@
#' "VISA", # Vancomycin Intermediate S. aureus #' "VISA", # Vancomycin Intermediate S. aureus
#' "VRSA", # Vancomycin Resistant S. aureus #' "VRSA", # Vancomycin Resistant S. aureus
#' 115329001 # SNOMED CT code #' 115329001 # SNOMED CT code
#' )) #' ))
#' #'
#' # Dyslexia is no problem - these all work: #' # Dyslexia is no problem - these all work:
#' as.mo(c( #' as.mo(c(
@ -163,7 +163,7 @@ as.mo <- function(x,
meet_criteria(ignore_pattern, allow_class = "character", has_length = 1, allow_NULL = TRUE) meet_criteria(ignore_pattern, allow_class = "character", has_length = 1, allow_NULL = TRUE)
language <- validate_language(language) language <- validate_language(language)
meet_criteria(info, allow_class = "logical", has_length = 1) meet_criteria(info, allow_class = "logical", has_length = 1)
if (tryCatch(all(x %in% c(MO_lookup$mo, NA)) && if (tryCatch(all(x %in% c(MO_lookup$mo, NA)) &&
isFALSE(Becker) && isFALSE(Becker) &&
isFALSE(Lancefield), error = function(e) FALSE)) { isFALSE(Lancefield), error = function(e) FALSE)) {
@ -171,7 +171,7 @@ as.mo <- function(x,
# is.mo() won't work - MO codes might change between package versions # is.mo() won't work - MO codes might change between package versions
return(set_clean_class(x, new_class = c("mo", "character"))) return(set_clean_class(x, new_class = c("mo", "character")))
} }
# start off with replaced language-specific non-ASCII characters with ASCII characters # start off with replaced language-specific non-ASCII characters with ASCII characters
x <- parse_and_convert(x) x <- parse_and_convert(x)
@ -179,14 +179,14 @@ as.mo <- function(x,
x <- replace_old_mo_codes(x, property = "mo") x <- replace_old_mo_codes(x, property = "mo")
# ignore cases that match the ignore pattern # ignore cases that match the ignore pattern
x <- replace_ignore_pattern(x, ignore_pattern) x <- replace_ignore_pattern(x, ignore_pattern)
x_lower <- tolower(x) x_lower <- tolower(x)
# WHONET: xxx = no growth # WHONET: xxx = no growth
x[x_lower %in% c("", "xxx", "na", "nan")] <- NA_character_ x[x_lower %in% c("", "xxx", "na", "nan")] <- NA_character_
out <- rep(NA_character_, length(x)) out <- rep(NA_character_, length(x))
# below we use base R's match(), known for powering '%in%', and incredibly fast! # below we use base R's match(), known for powering '%in%', and incredibly fast!
# From reference_df ---- # From reference_df ----
@ -220,7 +220,7 @@ as.mo <- function(x,
" for ", vector_and(x[is.na(old) & !is.na(new)]), ". Run `mo_reset_session()` to reset this." " for ", vector_and(x[is.na(old) & !is.na(new)]), ". Run `mo_reset_session()` to reset this."
) )
} }
# For all other input ---- # For all other input ----
if (any(is.na(out) & !is.na(x))) { if (any(is.na(out) & !is.na(x))) {
# reset uncertainties # reset uncertainties
@ -249,16 +249,16 @@ as.mo <- function(x,
x_out <- trimws2(gsub(" +", " ", x_out, perl = TRUE)) x_out <- trimws2(gsub(" +", " ", x_out, perl = TRUE))
x_search_cleaned <- x_out x_search_cleaned <- x_out
x_out <- tolower(x_out) x_out <- tolower(x_out)
# input must not be too short # input must not be too short
if (nchar(x_out) < 3) { if (nchar(x_out) < 3) {
return("UNKNOWN") return("UNKNOWN")
} }
# take out the parts, split by space # take out the parts, split by space
x_parts <- strsplit(gsub("-", " ", x_out, fixed = TRUE), " ", fixed = TRUE)[[1]] x_parts <- strsplit(gsub("-", " ", x_out, fixed = TRUE), " ", fixed = TRUE)[[1]]
# do a pre-match on first character (and if it contains a space, first chars of first two terms) # do a pre-match on first character (and if it contains a space, first chars of first two terms)
if (length(x_parts) %in% c(2, 3)) { if (length(x_parts) %in% c(2, 3)) {
# for genus + species + subspecies # for genus + species + subspecies
@ -301,7 +301,7 @@ as.mo <- function(x,
minimum_matching_score_current <- minimum_matching_score minimum_matching_score_current <- minimum_matching_score
} }
m[m < minimum_matching_score_current] <- NA_real_ m[m < minimum_matching_score_current] <- NA_real_
top_hits <- mo_to_search[order(m, decreasing = TRUE, na.last = NA)] # na.last = NA will remove the NAs top_hits <- mo_to_search[order(m, decreasing = TRUE, na.last = NA)] # na.last = NA will remove the NAs
if (length(top_hits) == 0) { if (length(top_hits) == 0) {
warning_("No hits found for \"", x_search, "\" with minimum_matching_score = ", ifelse(is.null(minimum_matching_score), paste0("NULL (=", round(min(minimum_matching_score_current, na.rm = TRUE), 3), ")"), minimum_matching_score), ". Try setting this value lower or even to 0.") warning_("No hits found for \"", x_search, "\" with minimum_matching_score = ", ifelse(is.null(minimum_matching_score), paste0("NULL (=", round(min(minimum_matching_score_current, na.rm = TRUE), 3), ")"), minimum_matching_score), ". Try setting this value lower or even to 0.")
@ -371,9 +371,11 @@ as.mo <- function(x,
gbif_matches[!gbif_matches %in% AMR::microorganisms$gbif] <- NA gbif_matches[!gbif_matches %in% AMR::microorganisms$gbif] <- NA
lpsn_matches <- AMR::microorganisms$lpsn_renamed_to[match(out, AMR::microorganisms$mo)] lpsn_matches <- AMR::microorganisms$lpsn_renamed_to[match(out, AMR::microorganisms$mo)]
lpsn_matches[!lpsn_matches %in% AMR::microorganisms$lpsn] <- NA lpsn_matches[!lpsn_matches %in% AMR::microorganisms$lpsn] <- NA
AMR_env$mo_renamed <- list(old = out[!is.na(gbif_matches) | !is.na(lpsn_matches)], AMR_env$mo_renamed <- list(
gbif_matches = gbif_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)], old = out[!is.na(gbif_matches) | !is.na(lpsn_matches)],
lpsn_matches = lpsn_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)]) gbif_matches = gbif_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)],
lpsn_matches = lpsn_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)]
)
if (isFALSE(keep_synonyms)) { if (isFALSE(keep_synonyms)) {
out[which(!is.na(gbif_matches))] <- AMR::microorganisms$mo[match(gbif_matches[which(!is.na(gbif_matches))], AMR::microorganisms$gbif)] out[which(!is.na(gbif_matches))] <- AMR::microorganisms$mo[match(gbif_matches[which(!is.na(gbif_matches))], AMR::microorganisms$gbif)]
out[which(!is.na(lpsn_matches))] <- AMR::microorganisms$mo[match(lpsn_matches[which(!is.na(lpsn_matches))], AMR::microorganisms$lpsn)] out[which(!is.na(lpsn_matches))] <- AMR::microorganisms$mo[match(lpsn_matches[which(!is.na(lpsn_matches))], AMR::microorganisms$lpsn)]
@ -384,7 +386,7 @@ as.mo <- function(x,
# keep synonyms is TRUE, so check if any do have synonyms # keep synonyms is TRUE, so check if any do have synonyms
warning_("Function `as.mo()` returned ", nr2char(length(unique(AMR_env$mo_renamed$old))), " old taxonomic name", ifelse(length(unique(AMR_env$mo_renamed$old)) > 1, "s", ""), ". Use `as.mo(..., keep_synonyms = FALSE)` to clean the input to currently accepted taxonomic names, or set the R option `AMR_keep_synonyms` to `FALSE`. This warning will be shown once per session.") warning_("Function `as.mo()` returned ", nr2char(length(unique(AMR_env$mo_renamed$old))), " old taxonomic name", ifelse(length(unique(AMR_env$mo_renamed$old)) > 1, "s", ""), ". Use `as.mo(..., keep_synonyms = FALSE)` to clean the input to currently accepted taxonomic names, or set the R option `AMR_keep_synonyms` to `FALSE`. This warning will be shown once per session.")
} }
# Apply Becker ---- # Apply Becker ----
if (isTRUE(Becker) || Becker == "all") { if (isTRUE(Becker) || Becker == "all") {
# warn when species found that are not in: # warn when species found that are not in:
@ -436,7 +438,7 @@ as.mo <- function(x,
out[out %like_case% "^B_STRPT_SLVR(_|$)"] <- "B_STRPT_GRPK" out[out %like_case% "^B_STRPT_SLVR(_|$)"] <- "B_STRPT_GRPK"
# group L - only S. dysgalactiae which is also group C, so ignore it here # group L - only S. dysgalactiae which is also group C, so ignore it here
} }
# All unknowns ---- # All unknowns ----
out[is.na(out) & !is.na(x)] <- "UNKNOWN" out[is.na(out) & !is.na(x)] <- "UNKNOWN"
AMR_env$mo_failures <- unique(x[out == "UNKNOWN" & x != "UNKNOWN" & !is.na(x)]) AMR_env$mo_failures <- unique(x[out == "UNKNOWN" & x != "UNKNOWN" & !is.na(x)])
@ -468,18 +470,20 @@ mo_uncertainties <- function() {
#' @export #' @export
mo_renamed <- function() { mo_renamed <- function() {
x <- AMR_env$mo_renamed x <- AMR_env$mo_renamed
x$new <- synonym_mo_to_accepted_mo(x$old) x$new <- synonym_mo_to_accepted_mo(x$old)
mo_old <- AMR::microorganisms$fullname[match(x$old, AMR::microorganisms$mo)] mo_old <- AMR::microorganisms$fullname[match(x$old, AMR::microorganisms$mo)]
mo_new <- AMR::microorganisms$fullname[match(x$new, AMR::microorganisms$mo)] mo_new <- AMR::microorganisms$fullname[match(x$new, AMR::microorganisms$mo)]
ref_old <- AMR::microorganisms$ref[match(x$old, AMR::microorganisms$mo)] ref_old <- AMR::microorganisms$ref[match(x$old, AMR::microorganisms$mo)]
ref_new <- AMR::microorganisms$ref[match(x$new, AMR::microorganisms$mo)] ref_new <- AMR::microorganisms$ref[match(x$new, AMR::microorganisms$mo)]
df_renamed <- data.frame(old = mo_old, df_renamed <- data.frame(
new = mo_new, old = mo_old,
ref_old = ref_old, new = mo_new,
ref_new = ref_new, ref_old = ref_old,
stringsAsFactors = FALSE) ref_new = ref_new,
stringsAsFactors = FALSE
)
df_renamed <- unique(df_renamed) df_renamed <- unique(df_renamed)
df_renamed <- df_renamed[order(df_renamed$old), , drop = FALSE] df_renamed <- df_renamed[order(df_renamed$old), , drop = FALSE]
set_clean_class(df_renamed, new_class = c("mo_renamed", "data.frame")) set_clean_class(df_renamed, new_class = c("mo_renamed", "data.frame"))
@ -512,7 +516,8 @@ mo_cleaning_regex <- function() {
"|", "|",
"([({]|\\[).+([})]|\\])", "([({]|\\[).+([})]|\\])",
"|", "|",
"(^| )(e?spp|e?ssp|e?ss|e?sp|e?subsp|sube?species|biovar|biotype|serovar|e?species)[.]*( |$))") "(^| )(e?spp|e?ssp|e?ss|e?sp|e?subsp|sube?species|biovar|biotype|serovar|e?species)[.]*( |$))"
)
} }
# UNDOCUMENTED METHODS ---------------------------------------------------- # UNDOCUMENTED METHODS ----------------------------------------------------
@ -754,7 +759,7 @@ print.mo_uncertainties <- function(x, ...) {
cat(word_wrap("No uncertainties to show. Only uncertainties of the last call of `as.mo()` or any `mo_*()` function are stored.\n", add_fn = font_blue)) cat(word_wrap("No uncertainties to show. Only uncertainties of the last call of `as.mo()` or any `mo_*()` function are stored.\n", add_fn = font_blue))
return(invisible(NULL)) return(invisible(NULL))
} }
cat(word_wrap("Matching scores are based on the resemblance between the input and the full taxonomic name, and the pathogenicity in humans. See `?mo_matching_score`.\n\n", add_fn = font_blue)) cat(word_wrap("Matching scores are based on the resemblance between the input and the full taxonomic name, and the pathogenicity in humans. See `?mo_matching_score`.\n\n", add_fn = font_blue))
if (has_colour()) { if (has_colour()) {
cat(word_wrap("Colour keys: ", cat(word_wrap("Colour keys: ",
@ -815,27 +820,30 @@ print.mo_uncertainties <- function(x, ...) {
) )
score_formatted <- trimws(formatC(round(score, 3), format = "f", digits = 3)) score_formatted <- trimws(formatC(round(score, 3), format = "f", digits = 3))
txt <- paste(txt, txt <- paste(txt,
paste0( paste0(
paste0( paste0(
'"', x[i, ]$original_input, '"', '"', x[i, ]$original_input, '"',
" -> ", " -> ",
paste0( paste0(
font_bold(font_italic(x[i, ]$fullname)), font_bold(font_italic(x[i, ]$fullname)),
ifelse(!is.na(x[i, ]$renamed_to), paste(", renamed to", font_italic(x[i, ]$renamed_to)), ""), ifelse(!is.na(x[i, ]$renamed_to), paste(", renamed to", font_italic(x[i, ]$renamed_to)), ""),
" (", x[i, ]$mo, ", ", score_set_colour(score_formatted, score), ")" " (", x[i, ]$mo, ", ", score_set_colour(score_formatted, score), ")"
) )
), ),
collapse = "\n" collapse = "\n"
), ),
# Add "Based on {input}" text if it differs from the original input # Add "Based on {input}" text if it differs from the original input
ifelse(x[i, ]$original_input != x[i, ]$input, paste0(strrep(" ", nchar(x[i, ]$original_input) + 6), "Based on input \"", x[i, ]$input, "\""), ""), ifelse(x[i, ]$original_input != x[i, ]$input, paste0(strrep(" ", nchar(x[i, ]$original_input) + 6), "Based on input \"", x[i, ]$input, "\""), ""),
# Add note if result was coerced to accepted taxonomic name # Add note if result was coerced to accepted taxonomic name
ifelse(x[i, ]$keep_synonyms == FALSE & x[i, ]$mo %in% AMR::microorganisms$mo[which(AMR::microorganisms$status == "synonym")], ifelse(x[i, ]$keep_synonyms == FALSE & x[i, ]$mo %in% AMR::microorganisms$mo[which(AMR::microorganisms$status == "synonym")],
paste0(strrep(" ", nchar(x[i, ]$original_input) + 6), paste0(
font_red(paste0("This old taxonomic name was converted to ", font_italic(AMR::microorganisms$fullname[match(synonym_mo_to_accepted_mo(x[i, ]$mo), AMR::microorganisms$mo)], collapse = NULL), " (", synonym_mo_to_accepted_mo(x[i, ]$mo), ")."), collapse = NULL)), strrep(" ", nchar(x[i, ]$original_input) + 6),
""), font_red(paste0("This old taxonomic name was converted to ", font_italic(AMR::microorganisms$fullname[match(synonym_mo_to_accepted_mo(x[i, ]$mo), AMR::microorganisms$mo)], collapse = NULL), " (", synonym_mo_to_accepted_mo(x[i, ]$mo), ")."), collapse = NULL)
candidates, ),
sep = "\n" ""
),
candidates,
sep = "\n"
) )
txt <- paste0(gsub("\n\n", "\n", txt), "\n\n") txt <- paste0(gsub("\n\n", "\n", txt), "\n\n")
} }
@ -850,19 +858,19 @@ print.mo_renamed <- function(x, extra_txt = "", n = 25, ...) {
cat(word_wrap("No renamed taxonomy to show. Only renamed taxonomy of the last call of `as.mo()` or any `mo_*()` function are stored.\n", add_fn = font_blue)) cat(word_wrap("No renamed taxonomy to show. Only renamed taxonomy of the last call of `as.mo()` or any `mo_*()` function are stored.\n", add_fn = font_blue))
return(invisible(NULL)) return(invisible(NULL))
} }
x$ref_old[!is.na(x$ref_old)] <- paste0(" (", gsub("et al.", font_italic("et al."), x$ref_old[!is.na(x$ref_old)], fixed = TRUE), ")") x$ref_old[!is.na(x$ref_old)] <- paste0(" (", gsub("et al.", font_italic("et al."), x$ref_old[!is.na(x$ref_old)], fixed = TRUE), ")")
x$ref_new[!is.na(x$ref_new)] <- paste0(" (", gsub("et al.", font_italic("et al."), x$ref_new[!is.na(x$ref_new)], fixed = TRUE), ")") x$ref_new[!is.na(x$ref_new)] <- paste0(" (", gsub("et al.", font_italic("et al."), x$ref_new[!is.na(x$ref_new)], fixed = TRUE), ")")
x$ref_old[is.na(x$ref_old)] <- " (author unknown)" x$ref_old[is.na(x$ref_old)] <- " (author unknown)"
x$ref_new[is.na(x$ref_new)] <- " (author unknown)" x$ref_new[is.na(x$ref_new)] <- " (author unknown)"
rows <- seq_len(min(NROW(x), n)) rows <- seq_len(min(NROW(x), n))
message_( message_(
"The following microorganism", ifelse(NROW(x) > 1, "s were", " was"), " taxonomically renamed", extra_txt, ":\n", "The following microorganism", ifelse(NROW(x) > 1, "s were", " was"), " taxonomically renamed", extra_txt, ":\n",
paste0(" \u2022 ", font_italic(x$old[rows], collapse = NULL), x$ref_old[rows], paste0(" \u2022 ", font_italic(x$old[rows], collapse = NULL), x$ref_old[rows],
" -> ", font_italic(x$new[rows], collapse = NULL), x$ref_new[rows], " -> ", font_italic(x$new[rows], collapse = NULL), x$ref_new[rows],
collapse = "\n" collapse = "\n"
), ),
ifelse(NROW(x) > n, paste0("\n\nOnly the first ", n, " (out of ", NROW(x), ") are shown. Run `print(mo_renamed(), n = ...)` to view more entries (might be slow), or save `mo_renamed()` to an object."), "") ifelse(NROW(x) > n, paste0("\n\nOnly the first ", n, " (out of ", NROW(x), ") are shown. Run `print(mo_renamed(), n = ...)` to view more entries (might be slow), or save `mo_renamed()` to an object."), "")
) )
@ -874,50 +882,54 @@ convert_colloquial_input <- function(x) {
x.bak <- trimws2(x) x.bak <- trimws2(x)
x <- trimws2(tolower(x)) x <- trimws2(tolower(x))
out <- rep(NA_character_, length(x)) out <- rep(NA_character_, length(x))
# Streptococci, like GBS = Group B Streptococci (B_STRPT_GRPB) # Streptococci, like GBS = Group B Streptococci (B_STRPT_GRPB)
out[x %like_case% "^g[abcdfghkl]s$"] <- gsub("g([abcdfghkl])s", out[x %like_case% "^g[abcdfghkl]s$"] <- gsub("g([abcdfghkl])s",
"B_STRPT_GRP\\U\\1", "B_STRPT_GRP\\U\\1",
x[x %like_case% "^g[abcdfghkl]s$"], x[x %like_case% "^g[abcdfghkl]s$"],
perl = TRUE) perl = TRUE
)
# Streptococci in different languages, like "estreptococos grupo B" # Streptococci in different languages, like "estreptococos grupo B"
out[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"] <- gsub(".*e?strepto[ck]o[ck].* ([abcdfghkl])$", out[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"] <- gsub(".*e?strepto[ck]o[ck].* ([abcdfghkl])$",
"B_STRPT_GRP\\U\\1", "B_STRPT_GRP\\U\\1",
x[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"], x[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"],
perl = TRUE) perl = TRUE
)
out[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"] <- gsub(".*group ([abcdfghkl]) strepto[ck]o[ck].*", out[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"] <- gsub(".*group ([abcdfghkl]) strepto[ck]o[ck].*",
"B_STRPT_GRP\\U\\1", "B_STRPT_GRP\\U\\1",
x[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"], x[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"],
perl = TRUE) perl = TRUE
)
out[x %like_case% "ha?emoly.*strep"] <- "B_STRPT_HAEM" out[x %like_case% "ha?emoly.*strep"] <- "B_STRPT_HAEM"
out[x %like_case% "(strepto.* mil+er+i|^mgs[^a-z]*$)"] <- "B_STRPT_MILL" out[x %like_case% "(strepto.* mil+er+i|^mgs[^a-z]*$)"] <- "B_STRPT_MILL"
out[x %like_case% "mil+er+i gr"] <- "B_STRPT_MILL" out[x %like_case% "mil+er+i gr"] <- "B_STRPT_MILL"
out[x %like_case% "((strepto|^s).* viridans|^vgs[^a-z]*$)"] <- "B_STRPT_VIRI" out[x %like_case% "((strepto|^s).* viridans|^vgs[^a-z]*$)"] <- "B_STRPT_VIRI"
out[x %like_case% "(viridans.* (strepto|^s).*|^vgs[^a-z]*$)"] <- "B_STRPT_VIRI"
# CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese) # CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese)
out[x %like_case% "([ck]oagulas[ea].negatie?[vf]|^[ck]o?ns[^a-z]*$)"] <- "B_STPHY_CONS" out[x %like_case% "([ck]oagulas[ea].negatie?[vf]|^[ck]o?ns[^a-z]*$)"] <- "B_STPHY_CONS"
out[x %like_case% "([ck]oagulas[ea].positie?[vf]|^[ck]o?ps[^a-z]*$)"] <- "B_STPHY_COPS" out[x %like_case% "([ck]oagulas[ea].positie?[vf]|^[ck]o?ps[^a-z]*$)"] <- "B_STPHY_COPS"
# Gram stains # Gram stains
out[x %like_case% "gram[ -]?neg.*|negatie?[vf]"] <- "B_GRAMN" out[x %like_case% "gram[ -]?neg.*|negatie?[vf]"] <- "B_GRAMN"
out[x %like_case% "gram[ -]?pos.*|positie?[vf]"] <- "B_GRAMP" out[x %like_case% "gram[ -]?pos.*|positie?[vf]"] <- "B_GRAMP"
# yeasts and fungi # yeasts and fungi
out[x %like_case% "^yeast?"] <- "F_YEAST" out[x %like_case% "^yeast?"] <- "F_YEAST"
out[x %like_case% "^fung(us|i)"] <- "F_FUNGUS" out[x %like_case% "^fung(us|i)"] <- "F_FUNGUS"
# Salmonella city names, starting with capital species name - they are all S. enterica # Salmonella city names, starting with capital species name - they are all S. enterica
out[x.bak %like_case% "[sS]almonella [A-Z][a-z]+ ?.*" & x %unlike% "typhi"] <- "B_SLMNL_ENTR" out[x.bak %like_case% "[sS]almonella [A-Z][a-z]+ ?.*" & x %unlike% "typhi"] <- "B_SLMNL_ENTR"
out[x %like_case% "salmonella group"] <- "B_SLMNL" out[x %like_case% "salmonella group"] <- "B_SLMNL"
# trivial names known to the field # trivial names known to the field
out[x %like_case% "meningo[ck]o[ck]"] <- "B_NESSR_MNNG" out[x %like_case% "meningo[ck]o[ck]"] <- "B_NESSR_MNNG"
out[x %like_case% "gono[ck]o[ck]"] <- "B_NESSR_GNRR" out[x %like_case% "gono[ck]o[ck]"] <- "B_NESSR_GNRR"
out[x %like_case% "pneumo[ck]o[ck]"] <- "B_STRPT_PNMN" out[x %like_case% "pneumo[ck]o[ck]"] <- "B_STRPT_PNMN"
# unexisting names (xxx and con are WHONET codes) # unexisting names (xxx and con are WHONET codes)
out[x %in% c("con", "other", "none", "unknown") | x %like_case% "virus"] <- "UNKNOWN" out[x %in% c("con", "other", "none", "unknown") | x %like_case% "virus"] <- "UNKNOWN"
out out
} }
@ -1085,8 +1097,9 @@ synonym_mo_to_accepted_mo <- function(x) {
x_lpsn <- AMR::microorganisms$lpsn_renamed_to[match(x, AMR::microorganisms$mo)] x_lpsn <- AMR::microorganisms$lpsn_renamed_to[match(x, AMR::microorganisms$mo)]
x_gbif[!x_gbif %in% AMR::microorganisms$gbif] <- NA x_gbif[!x_gbif %in% AMR::microorganisms$gbif] <- NA
x_lpsn[!x_lpsn %in% AMR::microorganisms$lpsn] <- NA x_lpsn[!x_lpsn %in% AMR::microorganisms$lpsn] <- NA
ifelse(is.na(x_lpsn), ifelse(is.na(x_lpsn),
AMR::microorganisms$mo[match(x_gbif, AMR::microorganisms$gbif)], AMR::microorganisms$mo[match(x_gbif, AMR::microorganisms$gbif)],
AMR::microorganisms$mo[match(x_lpsn, AMR::microorganisms$lpsn)]) AMR::microorganisms$mo[match(x_lpsn, AMR::microorganisms$lpsn)]
)
} }

View File

@ -79,10 +79,10 @@ mo_matching_score <- function(x, n) {
# only keep one space # only keep one space
x <- gsub(" +", " ", x) x <- gsub(" +", " ", x)
# start with a capital letter # force a capital letter, so this conversion will not count as a substitution
substr(x, 1, 1) <- toupper(substr(x, 1, 1)) substr(x, 1, 1) <- toupper(substr(x, 1, 1))
# n is always a taxonomically valid full name # n is always a taxonomically valid full name
if (length(n) == 1) { if (length(n) == 1) {
n <- rep(n, length(x)) n <- rep(n, length(x))
@ -90,19 +90,20 @@ mo_matching_score <- function(x, n) {
if (length(x) == 1) { if (length(x) == 1) {
x <- rep(x, length(n)) x <- rep(x, length(n))
} }
# length of fullname # length of fullname
l_n <- nchar(n) l_n <- nchar(n)
lev <- double(length = length(x)) lev <- double(length = length(x))
l_n.lev <- double(length = length(x)) l_n.lev <- double(length = length(x))
lev <- unlist(Map(f = function(a, b) { lev <- unlist(Map(f = function(a, b) {
as.double(utils::adist(a, b, as.double(utils::adist(a, b,
ignore.case = FALSE, ignore.case = FALSE,
fixed = TRUE, fixed = TRUE,
costs = c(insertions = 1, deletions = 2, substitutions = 2), costs = c(insertions = 1, deletions = 2, substitutions = 2),
counts = FALSE)) counts = FALSE
))
}, x, n, USE.NAMES = FALSE)) }, x, n, USE.NAMES = FALSE))
l_n.lev[l_n < lev] <- l_n[l_n < lev] l_n.lev[l_n < lev] <- l_n[l_n < lev]
l_n.lev[lev < l_n] <- lev[lev < l_n] l_n.lev[lev < l_n] <- lev[lev < l_n]
l_n.lev[lev == l_n] <- lev[lev == l_n] l_n.lev[lev == l_n] <- lev[lev == l_n]

View File

@ -376,7 +376,7 @@ mo_status <- function(x, language = get_AMR_locale(), keep_synonyms = getOption(
meet_criteria(x, allow_NA = TRUE) meet_criteria(x, allow_NA = TRUE)
language <- validate_language(language) language <- validate_language(language)
meet_criteria(keep_synonyms, allow_class = "logical", has_length = 1) meet_criteria(keep_synonyms, allow_class = "logical", has_length = 1)
translate_into_language(mo_validate(x = x, property = "status", language = language, keep_synonyms = keep_synonyms, ...), language = language, only_unknown = TRUE) translate_into_language(mo_validate(x = x, property = "status", language = language, keep_synonyms = keep_synonyms, ...), language = language, only_unknown = TRUE)
} }
@ -603,7 +603,7 @@ mo_gbif <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("A
meet_criteria(x, allow_NA = TRUE) meet_criteria(x, allow_NA = TRUE)
language <- validate_language(language) language <- validate_language(language)
meet_criteria(keep_synonyms, allow_class = "logical", has_length = 1) meet_criteria(keep_synonyms, allow_class = "logical", has_length = 1)
mo_validate(x = x, property = "gbif", language = language, keep_synonyms = keep_synonyms, ...) mo_validate(x = x, property = "gbif", language = language, keep_synonyms = keep_synonyms, ...)
} }
@ -796,7 +796,7 @@ mo_validate <- function(x, property, language, keep_synonyms = keep_synonyms, ..
Lancefield <- FALSE Lancefield <- FALSE
} }
has_Becker_or_Lancefield <- Becker %in% c(TRUE, "all") || Lancefield %in% c(TRUE, "all") has_Becker_or_Lancefield <- Becker %in% c(TRUE, "all") || Lancefield %in% c(TRUE, "all")
# get microorganisms data set, but remove synonyms if keep_synonyms is FALSE # get microorganisms data set, but remove synonyms if keep_synonyms is FALSE
mo_data_check <- AMR::microorganisms[which(AMR::microorganisms$status %in% if (isTRUE(keep_synonyms)) c("synonym", "accepted") else "accepted"), , drop = FALSE] mo_data_check <- AMR::microorganisms[which(AMR::microorganisms$status %in% if (isTRUE(keep_synonyms)) c("synonym", "accepted") else "accepted"), , drop = FALSE]

16
R/rsi.R
View File

@ -344,7 +344,7 @@ as.rsi.default <- function(x, ...) {
x <- gsub("^R+$", "R", x) x <- gsub("^R+$", "R", x)
x[!x %in% c("S", "I", "R")] <- NA_character_ x[!x %in% c("S", "I", "R")] <- NA_character_
na_after <- length(x[is.na(x) | x == ""]) na_after <- length(x[is.na(x) | x == ""])
if (!isFALSE(list(...)$warn)) { # so as.rsi(..., warn = FALSE) will never throw a warning if (!isFALSE(list(...)$warn)) { # so as.rsi(..., warn = FALSE) will never throw a warning
if (na_before != na_after) { if (na_before != na_after) {
list_missing <- x.bak[is.na(x) & !is.na(x.bak) & x.bak != ""] %pm>% list_missing <- x.bak[is.na(x) & !is.na(x.bak) & x.bak != ""] %pm>%
@ -353,13 +353,13 @@ as.rsi.default <- function(x, ...) {
vector_and(quotes = TRUE) vector_and(quotes = TRUE)
cur_col <- get_current_column() cur_col <- get_current_column()
warning_("in `as.rsi()`: ", na_after - na_before, " result", warning_("in `as.rsi()`: ", na_after - na_before, " result",
ifelse(na_after - na_before > 1, "s", ""), ifelse(na_after - na_before > 1, "s", ""),
ifelse(is.null(cur_col), "", paste0(" in column '", cur_col, "'")), ifelse(is.null(cur_col), "", paste0(" in column '", cur_col, "'")),
" truncated (", " truncated (",
round(((na_after - na_before) / length(x)) * 100), round(((na_after - na_before) / length(x)) * 100),
"%) that were invalid antimicrobial interpretations: ", "%) that were invalid antimicrobial interpretations: ",
list_missing, list_missing,
call = FALSE call = FALSE
) )
} }
if (any(toupper(x.bak[!is.na(x.bak)]) == "U") && message_not_thrown_before("as.rsi", "U")) { if (any(toupper(x.bak[!is.na(x.bak)]) == "U") && message_not_thrown_before("as.rsi", "U")) {

Binary file not shown.

View File

@ -124,11 +124,14 @@ set_AMR_locale <- function(language) {
options(AMR_locale = language) options(AMR_locale = language)
if (interactive() || identical(Sys.getenv("IN_PKGDOWN"), "true")) { if (interactive() || identical(Sys.getenv("IN_PKGDOWN"), "true")) {
# show which language to use now # show which language to use now
message_("Using ", LANGUAGES_SUPPORTED_NAMES[[language]]$exonym, message_(
ifelse(language != "en", "Using ", LANGUAGES_SUPPORTED_NAMES[[language]]$exonym,
paste0(" (", LANGUAGES_SUPPORTED_NAMES[[language]]$endonym, ")"), ifelse(language != "en",
""), paste0(" (", LANGUAGES_SUPPORTED_NAMES[[language]]$endonym, ")"),
" for the AMR package for this session.") ""
),
" for the AMR package for this session."
)
} }
} }

View File

@ -128,12 +128,12 @@ if (utf8_supported && !is_latex) {
s3_register("vctrs::vec_cast", "character.mic") s3_register("vctrs::vec_cast", "character.mic")
s3_register("vctrs::vec_cast", "double.mic") s3_register("vctrs::vec_cast", "double.mic")
s3_register("vctrs::vec_math", "mic") s3_register("vctrs::vec_math", "mic")
# if mo source exists, fire it up (see mo_source()) # if mo source exists, fire it up (see mo_source())
if (tryCatch(file.exists(getOption("AMR_mo_source", "~/mo_source.rds")), error = function(e) FALSE)) { if (tryCatch(file.exists(getOption("AMR_mo_source", "~/mo_source.rds")), error = function(e) FALSE)) {
invisible(get_mo_source()) invisible(get_mo_source())
} }
# be sure to print tibbles as tibbles # be sure to print tibbles as tibbles
if (pkg_is_available("tibble", also_load = FALSE)) { if (pkg_is_available("tibble", also_load = FALSE)) {
loadNamespace("tibble") loadNamespace("tibble")

View File

@ -104,7 +104,7 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
MO_staph[which(MO_staph$species %in% c( MO_staph[which(MO_staph$species %in% c(
"coagulase-negative", "argensis", "arlettae", "coagulase-negative", "argensis", "arlettae",
"auricularis", "borealis", "caeli", "capitis", "caprae", "auricularis", "borealis", "caeli", "capitis", "caprae",
"carnosus", "casei", "chromogenes", "cohnii", "condimenti", "carnosus", "casei", "caseolyticus", "chromogenes", "cohnii", "condimenti",
"croceilyticus", "croceilyticus",
"debuckii", "devriesei", "edaphicus", "epidermidis", "debuckii", "devriesei", "edaphicus", "epidermidis",
"equorum", "felis", "fleurettii", "gallinarum", "equorum", "felis", "fleurettii", "gallinarum",
@ -118,7 +118,7 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
"vitulinus", "vitulus", "warneri", "xylosus", "vitulinus", "vitulus", "warneri", "xylosus",
"caledonicus", "canis", "caledonicus", "canis",
"durrellii", "lloydii", "durrellii", "lloydii",
"ratti", "taiwanensis" "ratti", "taiwanensis", "veratri", "urealyticus"
) | ) |
# old, now renamed to S. schleiferi (but still as synonym in our data of course): # old, now renamed to S. schleiferi (but still as synonym in our data of course):
(MO_staph$species == "schleiferi" & MO_staph$subspecies %in% c("schleiferi", ""))), (MO_staph$species == "schleiferi" & MO_staph$subspecies %in% c("schleiferi", ""))),

View File

@ -1005,7 +1005,7 @@ taxonomy <- taxonomy %>%
# Remove unwanted taxonomic entries from Protoza/Fungi -------------------- # Remove unwanted taxonomic entries from Protoza/Fungi --------------------
# this must be done after the microbial ID generation, since it will otherwise generate a lot of different IDs # this must be done after the microbial ID generation, since it will otherwise generate a lot of different IDs
taxonomy <- taxonomy %>% taxonomy <- taxonomy %>%
filter( filter(
# Protozoa: # Protozoa:
!(phylum %in% c("Choanozoa", "Mycetozoa") & prevalence == 3), !(phylum %in% c("Choanozoa", "Mycetozoa") & prevalence == 3),
@ -1016,7 +1016,8 @@ taxonomy <- taxonomy %>%
# Animalia: # Animalia:
!genus %in% c("Lucilia", "Lumbricus"), !genus %in% c("Lucilia", "Lumbricus"),
!(genus %in% c("Aedes", "Anopheles") & rank %in% c("species", "subspecies")), # only genus of the many hundreds of mosquitoes species !(genus %in% c("Aedes", "Anopheles") & rank %in% c("species", "subspecies")), # only genus of the many hundreds of mosquitoes species
kingdom != "Plantae") # this kingdom only contained Curvularia and Hymenolepis, which have coincidental twin names with Fungi kingdom != "Plantae"
) # this kingdom only contained Curvularia and Hymenolepis, which have coincidental twin names with Fungi
message("\nCongratulations! The new taxonomic table will contain ", format(nrow(taxonomy), big.mark = ","), " rows.\n") message("\nCongratulations! The new taxonomic table will contain ", format(nrow(taxonomy), big.mark = ","), " rows.\n")
@ -1071,7 +1072,7 @@ taxonomy$lpsn_renamed_to[which(taxonomy$fullname == "Moraxella catarrhalis")]
taxonomy$status[which(taxonomy$fullname == "Moraxella catarrhalis")] <- "accepted" taxonomy$status[which(taxonomy$fullname == "Moraxella catarrhalis")] <- "accepted"
taxonomy$lpsn_renamed_to[which(taxonomy$fullname == "Moraxella catarrhalis")] <- NA_character_ taxonomy$lpsn_renamed_to[which(taxonomy$fullname == "Moraxella catarrhalis")] <- NA_character_
taxonomy <- taxonomy %>% taxonomy <- taxonomy %>%
AMR:::dataset_UTF8_to_ASCII() AMR:::dataset_UTF8_to_ASCII()

View File

@ -68,10 +68,10 @@ df <- AMR:::MO_lookup
expect_true(nrow(df[which(df$prevalence == 1), , drop = FALSE]) < nrow(df[which(df$prevalence == 2), , drop = FALSE])) expect_true(nrow(df[which(df$prevalence == 1), , drop = FALSE]) < nrow(df[which(df$prevalence == 2), , drop = FALSE]))
expect_true(nrow(df[which(df$prevalence == 2), , drop = FALSE]) < nrow(df[which(df$prevalence == 3), , drop = FALSE])) expect_true(nrow(df[which(df$prevalence == 2), , drop = FALSE]) < nrow(df[which(df$prevalence == 3), , drop = FALSE]))
expect_true(all(c( expect_true(all(c(
"mo", "fullname", "mo", "fullname", "status", "kingdom", "phylum", "class", "order",
"kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "family", "genus", "species", "subspecies", "rank", "ref", "source",
"rank", "ref", "lpsn", "gbif", "status", "source", "prevalence", "snomed", "lpsn", "lpsn_parent", "lpsn_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence",
"kingdom_index", "fullname_lower", "g_species" "snomed", "kingdom_index", "fullname_lower", "full_first", "species_first"
) %in% colnames(df))) ) %in% colnames(df)))
expect_inherits(AMR:::MO_CONS, "mo") expect_inherits(AMR:::MO_CONS, "mo")
@ -87,7 +87,8 @@ expect_true(NROW(uncategorised) == 0,
"All staphylococcal species categorised as CoNS/CoPS.", "All staphylococcal species categorised as CoNS/CoPS.",
paste0( paste0(
"Staphylococcal species not categorised as CoNS/CoPS: S. ", "Staphylococcal species not categorised as CoNS/CoPS: S. ",
uncategorised$species, " (", uncategorised$mo, ")" uncategorised$species, " (", uncategorised$mo, ")",
collapse = "\n"
) )
) )
) )

View File

@ -41,7 +41,7 @@ expect_equal(
) )
MOs_mentioned <- unique(AMR:::EUCAST_RULES_DF$this_value) MOs_mentioned <- unique(AMR:::EUCAST_RULES_DF$this_value)
MOs_mentioned <- sort(trimws(unlist(strsplit(MOs_mentioned[!AMR:::is_valid_regex(MOs_mentioned)], ",", fixed = TRUE)))) MOs_mentioned <- sort(trimws(unlist(strsplit(MOs_mentioned[!AMR:::is_valid_regex(MOs_mentioned)], ",", fixed = TRUE))))
MOs_test <- suppressWarnings(suppressMessages(mo_name(MOs_mentioned))) MOs_test <- suppressWarnings(suppressMessages(mo_name(MOs_mentioned, keep_synonyms = TRUE, language = NULL)))
expect_true(length(MOs_mentioned[MOs_test != MOs_mentioned]) == 0) expect_true(length(MOs_mentioned[MOs_test != MOs_mentioned]) == 0)
expect_error(suppressWarnings(eucast_rules(example_isolates, col_mo = "Non-existing"))) expect_error(suppressWarnings(eucast_rules(example_isolates, col_mo = "Non-existing")))

View File

@ -59,7 +59,7 @@ expect_inherits(mo_synonyms(c("Candida albicans", "Escherichia coli")), "list")
expect_equal(names(mo_info("Escherichia coli")), c( expect_equal(names(mo_info("Escherichia coli")), c(
"kingdom", "phylum", "class", "order", "kingdom", "phylum", "class", "order",
"family", "genus", "species", "subspecies", "family", "genus", "species", "subspecies",
"synonyms", "gramstain", "url", "ref", "status", "synonyms", "gramstain", "url", "ref",
"snomed" "snomed"
)) ))
expect_inherits(mo_info(c("Escherichia coli", "Staphylococcus aureus")), "list") expect_inherits(mo_info(c("Escherichia coli", "Staphylococcus aureus")), "list")
@ -73,7 +73,7 @@ expect_true(mo_url("Escherichia coli") %like% "lpsn.dsmz.de")
# test integrity # test integrity
MOs <- microorganisms MOs <- microorganisms
expect_identical(MOs$fullname, mo_fullname(MOs$fullname, language = "en")) expect_identical(MOs$fullname, mo_fullname(MOs$fullname, language = "en", keep_synonyms = TRUE))
# check languages # check languages
expect_equal(mo_type("Escherichia coli", language = "de"), "Bakterien") expect_equal(mo_type("Escherichia coli", language = "de"), "Bakterien")
@ -81,13 +81,13 @@ expect_equal(mo_gramstain("Escherichia coli", language = "nl"), "Gram-negatief")
gr <- mo_gramstain("Escherichia coli", language = NULL) gr <- mo_gramstain("Escherichia coli", language = NULL)
for (l in AMR:::LANGUAGES_SUPPORTED[-1]) { for (l in AMR:::LANGUAGES_SUPPORTED[-1]) {
expect_false(mo_gramstain("Escherichia coli", language = l) == gr, info = paste("Gram-stain in langauge", l)) expect_false(mo_gramstain("Escherichia coli", language = l) == gr, info = paste("Gram-stain in language", l))
} }
expect_error(mo_gramstain("Escherichia coli", language = "UNKNOWN")) expect_error(mo_gramstain("Escherichia coli", language = "UNKNOWN"))
dutch <- mo_name(microorganisms$fullname[which(microorganisms$fullname %unlike% "unknown|coagulase")], language = "nl") # should be transformable to English again dutch <- mo_name(microorganisms$fullname[which(microorganisms$fullname %unlike% "unknown|coagulase|Fungi")], language = "nl", keep_synonyms = TRUE) # should be transformable to English again
expect_identical(mo_name(dutch, language = NULL), expect_identical(mo_name(dutch, language = NULL, keep_synonyms = TRUE),
microorganisms$fullname[which(microorganisms$fullname %unlike% "unknown|coagulase")]) # gigantic test - will run ALL names microorganisms$fullname[which(microorganisms$fullname %unlike% "unknown|coagulase|Fungi")]) # gigantic test - will run ALL names
# manual property function # manual property function
expect_error(mo_property("Escherichia coli", property = c("genus", "fullname"))) expect_error(mo_property("Escherichia coli", property = c("genus", "fullname")))

View File

@ -177,7 +177,7 @@ as.mo(c(
"VISA", # Vancomycin Intermediate S. aureus "VISA", # Vancomycin Intermediate S. aureus
"VRSA", # Vancomycin Resistant S. aureus "VRSA", # Vancomycin Resistant S. aureus
115329001 # SNOMED CT code 115329001 # SNOMED CT code
)) ))
# Dyslexia is no problem - these all work: # Dyslexia is no problem - these all work:
as.mo(c( as.mo(c(

View File

@ -32,10 +32,10 @@ These are two custom EUCAST rules: if TZP (piperacillin/tazobactam) is "S", all
\if{html}{\out{<div class="sourceCode r">}}\preformatted{x \if{html}{\out{<div class="sourceCode r">}}\preformatted{x
#> A set of custom EUCAST rules: #> A set of custom EUCAST rules:
#> #>
#> 1. If TZP is "S" then set to S : #> 1. If TZP is "S" then set to S :
#> amoxicillin (AMX), ampicillin (AMP) #> amoxicillin (AMX), ampicillin (AMP)
#> #>
#> 2. If TZP is "R" then set to R : #> 2. If TZP is "R" then set to R :
#> amoxicillin (AMX), ampicillin (AMP) #> amoxicillin (AMX), ampicillin (AMP)
}\if{html}{\out{</div>}} }\if{html}{\out{</div>}}

View File

@ -76,4 +76,3 @@ if (identical(Sys.getenv("R_RUN_TINYTEST"), "true")) {
print(summary(out)) print(summary(out))
} }
} }