mirror of
https://github.com/msberends/AMR.git
synced 2025-01-16 00:41:53 +01:00
fix unit tests
This commit is contained in:
parent
082e52a0dd
commit
37f6db5ccd
29
.github/prehooks/pre-commit
vendored
29
.github/prehooks/pre-commit
vendored
@ -1,5 +1,34 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
|
# ==================================================================== #
|
||||||
|
# TITLE #
|
||||||
|
# AMR: An R Package for Working with Antimicrobial Resistance Data #
|
||||||
|
# #
|
||||||
|
# SOURCE #
|
||||||
|
# https://github.com/msberends/AMR #
|
||||||
|
# #
|
||||||
|
# CITE AS #
|
||||||
|
# Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C #
|
||||||
|
# (2022). AMR: An R Package for Working with Antimicrobial Resistance #
|
||||||
|
# Data. Journal of Statistical Software, 104(3), 1-31. #
|
||||||
|
# doi:10.18637/jss.v104.i03 #
|
||||||
|
# #
|
||||||
|
# Developed at the University of Groningen, the Netherlands, in #
|
||||||
|
# collaboration with non-profit organisations Certe Medical #
|
||||||
|
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||||
|
# #
|
||||||
|
# This R package is free software; you can freely use and distribute #
|
||||||
|
# it for both personal and commercial purposes under the terms of the #
|
||||||
|
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
|
||||||
|
# the Free Software Foundation. #
|
||||||
|
# We created this package for both routine data analysis and academic #
|
||||||
|
# research and it was publicly released in the hope that it will be #
|
||||||
|
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
|
||||||
|
# #
|
||||||
|
# Visit our website for the full manual and a complete tutorial about #
|
||||||
|
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
|
||||||
|
# ==================================================================== #
|
||||||
|
|
||||||
echo "Running pre-commit hook..."
|
echo "Running pre-commit hook..."
|
||||||
|
|
||||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
Package: AMR
|
Package: AMR
|
||||||
Version: 1.8.2.9026
|
Version: 1.8.2.9027
|
||||||
Date: 2022-10-04
|
Date: 2022-10-04
|
||||||
Title: Antimicrobial Resistance Data Analysis
|
Title: Antimicrobial Resistance Data Analysis
|
||||||
Description: Functions to simplify and standardise antimicrobial resistance (AMR)
|
Description: Functions to simplify and standardise antimicrobial resistance (AMR)
|
||||||
|
2
NEWS.md
2
NEWS.md
@ -1,4 +1,4 @@
|
|||||||
# AMR 1.8.2.9026
|
# AMR 1.8.2.9027
|
||||||
|
|
||||||
This version will eventually become v2.0! We're happy to reach a new major milestone soon!
|
This version will eventually become v2.0! We're happy to reach a new major milestone soon!
|
||||||
|
|
||||||
|
@ -409,7 +409,7 @@ word_wrap <- function(...,
|
|||||||
msg <- paste0(c(...), collapse = "")
|
msg <- paste0(c(...), collapse = "")
|
||||||
|
|
||||||
if (isTRUE(as_note)) {
|
if (isTRUE(as_note)) {
|
||||||
msg <- paste0(pkg_env$info_icon, " ", gsub("^note:? ?", "", msg, ignore.case = TRUE))
|
msg <- paste0(AMR_env$info_icon, " ", gsub("^note:? ?", "", msg, ignore.case = TRUE))
|
||||||
}
|
}
|
||||||
|
|
||||||
if (msg %like% "\n") {
|
if (msg %like% "\n") {
|
||||||
@ -742,14 +742,14 @@ meet_criteria <- function(object,
|
|||||||
|
|
||||||
# if object is missing, or another error:
|
# if object is missing, or another error:
|
||||||
tryCatch(invisible(object),
|
tryCatch(invisible(object),
|
||||||
error = function(e) pkg_env$meet_criteria_error_txt <- e$message
|
error = function(e) AMR_env$meet_criteria_error_txt <- e$message
|
||||||
)
|
)
|
||||||
if (!is.null(pkg_env$meet_criteria_error_txt)) {
|
if (!is.null(AMR_env$meet_criteria_error_txt)) {
|
||||||
error_txt <- pkg_env$meet_criteria_error_txt
|
error_txt <- AMR_env$meet_criteria_error_txt
|
||||||
pkg_env$meet_criteria_error_txt <- NULL
|
AMR_env$meet_criteria_error_txt <- NULL
|
||||||
stop(error_txt, call. = FALSE) # don't use stop_() here, our pkg may not be loaded yet
|
stop(error_txt, call. = FALSE) # don't use stop_() here, our pkg may not be loaded yet
|
||||||
}
|
}
|
||||||
pkg_env$meet_criteria_error_txt <- NULL
|
AMR_env$meet_criteria_error_txt <- NULL
|
||||||
|
|
||||||
if (is.null(object)) {
|
if (is.null(object)) {
|
||||||
stop_if(allow_NULL == FALSE, "argument `", obj_name, "` must not be NULL", call = call_depth)
|
stop_if(allow_NULL == FALSE, "argument `", obj_name, "` must not be NULL", call = call_depth)
|
||||||
@ -990,9 +990,9 @@ message_not_thrown_before <- function(fn, ..., entire_session = FALSE) {
|
|||||||
# this is to prevent that messages/notes will be printed for every dplyr group or more than once per session
|
# this is to prevent that messages/notes will be printed for every dplyr group or more than once per session
|
||||||
# e.g. this would show a msg 4 times: example_isolates %>% group_by(ward) %>% filter(mo_is_gram_negative())
|
# e.g. this would show a msg 4 times: example_isolates %>% group_by(ward) %>% filter(mo_is_gram_negative())
|
||||||
salt <- gsub("[^a-zA-Z0-9|_-]", "?", substr(paste(c(...), sep = "|", collapse = "|"), 1, 512), perl = TRUE)
|
salt <- gsub("[^a-zA-Z0-9|_-]", "?", substr(paste(c(...), sep = "|", collapse = "|"), 1, 512), perl = TRUE)
|
||||||
not_thrown_before <- is.null(pkg_env[[paste0("thrown_msg.", fn, ".", salt)]]) ||
|
not_thrown_before <- is.null(AMR_env[[paste0("thrown_msg.", fn, ".", salt)]]) ||
|
||||||
!identical(
|
!identical(
|
||||||
pkg_env[[paste0("thrown_msg.", fn, ".", salt)]],
|
AMR_env[[paste0("thrown_msg.", fn, ".", salt)]],
|
||||||
unique_call_id(
|
unique_call_id(
|
||||||
entire_session = entire_session,
|
entire_session = entire_session,
|
||||||
match_fn = fn
|
match_fn = fn
|
||||||
@ -1003,7 +1003,7 @@ message_not_thrown_before <- function(fn, ..., entire_session = FALSE) {
|
|||||||
assign(
|
assign(
|
||||||
x = paste0("thrown_msg.", fn, ".", salt),
|
x = paste0("thrown_msg.", fn, ".", salt),
|
||||||
value = unique_call_id(entire_session = entire_session, match_fn = fn),
|
value = unique_call_id(entire_session = entire_session, match_fn = fn),
|
||||||
envir = pkg_env
|
envir = AMR_env
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
not_thrown_before
|
not_thrown_before
|
||||||
@ -1354,11 +1354,11 @@ percentage <- function(x, digits = NULL, ...) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
time_start_tracking <- function() {
|
time_start_tracking <- function() {
|
||||||
pkg_env$time_start <- round(as.double(Sys.time()) * 1000)
|
AMR_env$time_start <- round(as.double(Sys.time()) * 1000)
|
||||||
}
|
}
|
||||||
|
|
||||||
time_track <- function(name = NULL) {
|
time_track <- function(name = NULL) {
|
||||||
paste("(until now:", trimws(round(as.double(Sys.time()) * 1000) - pkg_env$time_start), "ms)")
|
paste("(until now:", trimws(round(as.double(Sys.time()) * 1000) - AMR_env$time_start), "ms)")
|
||||||
}
|
}
|
||||||
|
|
||||||
trimws2 <- function(..., whitespace = "[\u0009\u000A\u000B\u000C\u000D\u0020\u0085\u00A0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u200C\u200D\u2028\u2029\u202F\u205F\u2060\u3000\uFEFF]") {
|
trimws2 <- function(..., whitespace = "[\u0009\u000A\u000B\u000C\u000D\u0020\u0085\u00A0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u200C\u200D\u2028\u2029\u202F\u205F\u2060\u3000\uFEFF]") {
|
||||||
@ -1370,19 +1370,19 @@ trimws2 <- function(..., whitespace = "[\u0009\u000A\u000B\u000C\u000D\u0020\u00
|
|||||||
# Faster data.table implementations ----
|
# Faster data.table implementations ----
|
||||||
|
|
||||||
match <- function(x, ...) {
|
match <- function(x, ...) {
|
||||||
if (isTRUE(pkg_env$has_data.table) && is.character(x)) {
|
if (isTRUE(AMR_env$has_data.table) && is.character(x)) {
|
||||||
# data.table::chmatch() is 35% faster than base::match() for character
|
# data.table::chmatch() is 35% faster than base::match() for character
|
||||||
getExportedValue(name = "chmatch", ns = asNamespace("data.table"))(x, ...)
|
getExportedValue(name = "chmatch", ns = asNamespace("data.table"))(x, ...)
|
||||||
} else {
|
} else {
|
||||||
base::match(x, ...)
|
base::match(x, ...)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
`%in%` <- function(x, ...) {
|
`%in%` <- function(x, table) {
|
||||||
if (isTRUE(pkg_env$has_data.table) && is.character(x)) {
|
if (isTRUE(AMR_env$has_data.table) && is.character(x) && is.character(table)) {
|
||||||
# data.table::`%chin%`() is 20% faster than base::`%in%`() for character
|
# data.table::`%chin%`() is 20-50% faster than base::`%in%`() for character
|
||||||
getExportedValue(name = "%chin%", ns = asNamespace("data.table"))(x, ...)
|
getExportedValue(name = "%chin%", ns = asNamespace("data.table"))(x, table)
|
||||||
} else {
|
} else {
|
||||||
base::`%in%`(x, ...)
|
base::`%in%`(x, table)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1178,7 +1178,7 @@ eucast_dosage <- function(ab, administration = "iv", version_breakpoints = 11.0)
|
|||||||
meet_criteria(administration, allow_class = "character", is_in = dosage$administration[!is.na(dosage$administration)], has_length = 1)
|
meet_criteria(administration, allow_class = "character", is_in = dosage$administration[!is.na(dosage$administration)], has_length = 1)
|
||||||
meet_criteria(version_breakpoints, allow_class = c("numeric", "integer"), has_length = 1, is_in = as.double(names(EUCAST_VERSION_BREAKPOINTS)))
|
meet_criteria(version_breakpoints, allow_class = c("numeric", "integer"), has_length = 1, is_in = as.double(names(EUCAST_VERSION_BREAKPOINTS)))
|
||||||
|
|
||||||
# show used version_breakpoints number once per session (pkg_env will reload every session)
|
# show used version_breakpoints number once per session (AMR_env will reload every session)
|
||||||
if (message_not_thrown_before("eucast_dosage", "v", gsub("[^0-9]", "", version_breakpoints), entire_session = TRUE)) {
|
if (message_not_thrown_before("eucast_dosage", "v", gsub("[^0-9]", "", version_breakpoints), entire_session = TRUE)) {
|
||||||
message_(
|
message_(
|
||||||
"Dosages for antimicrobial drugs, as meant for ",
|
"Dosages for antimicrobial drugs, as meant for ",
|
||||||
|
@ -112,17 +112,17 @@ get_column_abx <- function(x,
|
|||||||
entire_session = FALSE,
|
entire_session = FALSE,
|
||||||
match_fn = fn
|
match_fn = fn
|
||||||
),
|
),
|
||||||
pkg_env$get_column_abx.call
|
AMR_env$get_column_abx.call
|
||||||
)) {
|
)) {
|
||||||
# so within the same call, within the same environment, we got here again.
|
# so within the same call, within the same environment, we got here again.
|
||||||
# but we could've come from another function within the same call, so now only check the columns that changed
|
# but we could've come from another function within the same call, so now only check the columns that changed
|
||||||
|
|
||||||
# first remove the columns that are not existing anymore
|
# first remove the columns that are not existing anymore
|
||||||
previous <- pkg_env$get_column_abx.out
|
previous <- AMR_env$get_column_abx.out
|
||||||
current <- previous[previous %in% colnames(x)]
|
current <- previous[previous %in% colnames(x)]
|
||||||
|
|
||||||
# then compare columns in current call with columns in original call
|
# then compare columns in current call with columns in original call
|
||||||
new_cols <- colnames(x)[!colnames(x) %in% pkg_env$get_column_abx.checked_cols]
|
new_cols <- colnames(x)[!colnames(x) %in% AMR_env$get_column_abx.checked_cols]
|
||||||
if (length(new_cols) > 0) {
|
if (length(new_cols) > 0) {
|
||||||
# these columns did not exist in the last call, so add them
|
# these columns did not exist in the last call, so add them
|
||||||
new_cols_rsi <- get_column_abx(x[, new_cols, drop = FALSE], reuse_previous_result = FALSE, info = FALSE, sort = FALSE)
|
new_cols_rsi <- get_column_abx(x[, new_cols, drop = FALSE], reuse_previous_result = FALSE, info = FALSE, sort = FALSE)
|
||||||
@ -132,11 +132,11 @@ get_column_abx <- function(x,
|
|||||||
}
|
}
|
||||||
|
|
||||||
# update pkg environment to improve speed on next run
|
# update pkg environment to improve speed on next run
|
||||||
pkg_env$get_column_abx.out <- current
|
AMR_env$get_column_abx.out <- current
|
||||||
pkg_env$get_column_abx.checked_cols <- colnames(x)
|
AMR_env$get_column_abx.checked_cols <- colnames(x)
|
||||||
|
|
||||||
# and return right values
|
# and return right values
|
||||||
return(pkg_env$get_column_abx.out)
|
return(AMR_env$get_column_abx.out)
|
||||||
}
|
}
|
||||||
|
|
||||||
meet_criteria(x, allow_class = "data.frame")
|
meet_criteria(x, allow_class = "data.frame")
|
||||||
@ -243,9 +243,9 @@ get_column_abx <- function(x,
|
|||||||
if (info == TRUE && all_okay == TRUE) {
|
if (info == TRUE && all_okay == TRUE) {
|
||||||
message_("No columns found.")
|
message_("No columns found.")
|
||||||
}
|
}
|
||||||
pkg_env$get_column_abx.call <- unique_call_id(entire_session = FALSE, match_fn = fn)
|
AMR_env$get_column_abx.call <- unique_call_id(entire_session = FALSE, match_fn = fn)
|
||||||
pkg_env$get_column_abx.checked_cols <- colnames(x.bak)
|
AMR_env$get_column_abx.checked_cols <- colnames(x.bak)
|
||||||
pkg_env$get_column_abx.out <- out
|
AMR_env$get_column_abx.out <- out
|
||||||
return(out)
|
return(out)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -320,9 +320,9 @@ get_column_abx <- function(x,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pkg_env$get_column_abx.call <- unique_call_id(entire_session = FALSE, match_fn = fn)
|
AMR_env$get_column_abx.call <- unique_call_id(entire_session = FALSE, match_fn = fn)
|
||||||
pkg_env$get_column_abx.checked_cols <- colnames(x.bak)
|
AMR_env$get_column_abx.checked_cols <- colnames(x.bak)
|
||||||
pkg_env$get_column_abx.out <- out
|
AMR_env$get_column_abx.out <- out
|
||||||
out
|
out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
4
R/mdro.R
4
R/mdro.R
@ -1955,14 +1955,14 @@ run_custom_mdro_guideline <- function(df, guideline, info) {
|
|||||||
for (i in seq_len(n_dots)) {
|
for (i in seq_len(n_dots)) {
|
||||||
qry <- tryCatch(eval(parse(text = guideline[[i]]$query), envir = df, enclos = parent.frame()),
|
qry <- tryCatch(eval(parse(text = guideline[[i]]$query), envir = df, enclos = parent.frame()),
|
||||||
error = function(e) {
|
error = function(e) {
|
||||||
pkg_env$err_msg <- e$message
|
AMR_env$err_msg <- e$message
|
||||||
return("error")
|
return("error")
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
if (identical(qry, "error")) {
|
if (identical(qry, "error")) {
|
||||||
warning_("in `custom_mdro_guideline()`: rule ", i,
|
warning_("in `custom_mdro_guideline()`: rule ", i,
|
||||||
" (`", as.character(guideline[[i]]$query), "`) was ignored because of this error message: ",
|
" (`", as.character(guideline[[i]]$query), "`) was ignored because of this error message: ",
|
||||||
pkg_env$err_msg,
|
AMR_env$err_msg,
|
||||||
call = FALSE,
|
call = FALSE,
|
||||||
add_fn = font_red
|
add_fn = font_red
|
||||||
)
|
)
|
||||||
|
314
R/mo.R
314
R/mo.R
@ -180,8 +180,10 @@ as.mo <- function(x,
|
|||||||
# ignore cases that match the ignore pattern
|
# ignore cases that match the ignore pattern
|
||||||
x <- replace_ignore_pattern(x, ignore_pattern)
|
x <- replace_ignore_pattern(x, ignore_pattern)
|
||||||
|
|
||||||
|
x_lower <- tolower(x)
|
||||||
|
|
||||||
# WHONET: xxx = no growth
|
# WHONET: xxx = no growth
|
||||||
x[tolower(x) %in% c("", "xxx", "na", "nan")] <- NA_character_
|
x[x_lower %in% c("", "xxx", "na", "nan")] <- NA_character_
|
||||||
|
|
||||||
out <- rep(NA_character_, length(x))
|
out <- rep(NA_character_, length(x))
|
||||||
|
|
||||||
@ -195,9 +197,11 @@ as.mo <- function(x,
|
|||||||
# From MO code ----
|
# From MO code ----
|
||||||
out[is.na(out) & x %in% MO_lookup$mo] <- x[is.na(out) & x %in% MO_lookup$mo]
|
out[is.na(out) & x %in% MO_lookup$mo] <- x[is.na(out) & x %in% MO_lookup$mo]
|
||||||
# From full name ----
|
# From full name ----
|
||||||
out[is.na(out) & x %in% MO_lookup$fullname] <- MO_lookup$mo[match(x[is.na(out) & x %in% MO_lookup$fullname], MO_lookup$fullname)]
|
out[is.na(out) & x_lower %in% MO_lookup$fullname_lower] <- MO_lookup$mo[match(x_lower[is.na(out) & x_lower %in% MO_lookup$fullname_lower], MO_lookup$fullname_lower)]
|
||||||
|
# one exception: "Fungi" matches the kingdom, but instead it should return the 'unknown' code for fungi
|
||||||
|
out[out == "F_[KNG]_FUNGI"] <- "F_FUNGUS"
|
||||||
# From known codes ----
|
# From known codes ----
|
||||||
out[is.na(out) & x %in% AMR::microorganisms.codes$code] <- AMR::microorganisms.codes$mo[match(x[is.na(out) & x %in% AMR::microorganisms.codes$code], AMR::microorganisms.codes$code)]
|
out[is.na(out) & toupper(x) %in% AMR::microorganisms.codes$code] <- AMR::microorganisms.codes$mo[match(toupper(x)[is.na(out) & toupper(x) %in% AMR::microorganisms.codes$code], AMR::microorganisms.codes$code)]
|
||||||
# From SNOMED ----
|
# From SNOMED ----
|
||||||
if (any(is.na(out) & !is.na(x)) && any(is.na(out) & x %in% unlist(microorganisms$snomed), na.rm = TRUE)) {
|
if (any(is.na(out) & !is.na(x)) && any(is.na(out) & x %in% unlist(microorganisms$snomed), na.rm = TRUE)) {
|
||||||
# found this extremely fast gem here: https://stackoverflow.com/a/11002456/4575331
|
# found this extremely fast gem here: https://stackoverflow.com/a/11002456/4575331
|
||||||
@ -208,7 +212,7 @@ as.mo <- function(x,
|
|||||||
out[is.na(out)] <- convert_colloquial_input(x[is.na(out)])
|
out[is.na(out)] <- convert_colloquial_input(x[is.na(out)])
|
||||||
# From previous hits in this session ----
|
# From previous hits in this session ----
|
||||||
old <- out
|
old <- out
|
||||||
out[is.na(out) & paste(x, minimum_matching_score) %in% pkg_env$mo_previously_coerced$x] <- pkg_env$mo_previously_coerced$mo[match(paste(x, minimum_matching_score)[is.na(out) & paste(x, minimum_matching_score) %in% pkg_env$mo_previously_coerced$x], pkg_env$mo_previously_coerced$x)]
|
out[is.na(out) & paste(x, minimum_matching_score) %in% AMR_env$mo_previously_coerced$x] <- AMR_env$mo_previously_coerced$mo[match(paste(x, minimum_matching_score)[is.na(out) & paste(x, minimum_matching_score) %in% AMR_env$mo_previously_coerced$x], AMR_env$mo_previously_coerced$x)]
|
||||||
new <- out
|
new <- out
|
||||||
if (isTRUE(info) && message_not_thrown_before("as.mo", old, new, entire_session = TRUE) && any(is.na(old) & !is.na(new), na.rm = TRUE)) {
|
if (isTRUE(info) && message_not_thrown_before("as.mo", old, new, entire_session = TRUE) && any(is.na(old) & !is.na(new), na.rm = TRUE)) {
|
||||||
message_(
|
message_(
|
||||||
@ -220,8 +224,8 @@ as.mo <- function(x,
|
|||||||
# For all other input ----
|
# For all other input ----
|
||||||
if (any(is.na(out) & !is.na(x))) {
|
if (any(is.na(out) & !is.na(x))) {
|
||||||
# reset uncertainties
|
# reset uncertainties
|
||||||
pkg_env$mo_uncertainties <- pkg_env$mo_uncertainties[0, ]
|
AMR_env$mo_uncertainties <- AMR_env$mo_uncertainties[0, ]
|
||||||
pkg_env$mo_failures <- NULL
|
AMR_env$mo_failures <- NULL
|
||||||
|
|
||||||
# Laboratory systems: remove (translated) entries like "no growth", "not E. coli", etc.
|
# Laboratory systems: remove (translated) entries like "no growth", "not E. coli", etc.
|
||||||
x[trimws2(x) %like% translate_into_language("no .*growth", language = language)] <- NA_character_
|
x[trimws2(x) %like% translate_into_language("no .*growth", language = language)] <- NA_character_
|
||||||
@ -238,8 +242,6 @@ as.mo <- function(x,
|
|||||||
x_coerced <- vapply(FUN.VALUE = character(1), x_unique, function(x_search) {
|
x_coerced <- vapply(FUN.VALUE = character(1), x_unique, function(x_search) {
|
||||||
progress$tick()
|
progress$tick()
|
||||||
|
|
||||||
print(x_search)
|
|
||||||
|
|
||||||
# some required cleaning steps
|
# some required cleaning steps
|
||||||
x_out <- trimws2(x_search)
|
x_out <- trimws2(x_search)
|
||||||
# this applies the `remove_from_input` argument, which defaults to mo_cleaning_regex()
|
# this applies the `remove_from_input` argument, which defaults to mo_cleaning_regex()
|
||||||
@ -248,7 +250,11 @@ as.mo <- function(x,
|
|||||||
x_search_cleaned <- x_out
|
x_search_cleaned <- x_out
|
||||||
x_out <- tolower(x_out)
|
x_out <- tolower(x_out)
|
||||||
|
|
||||||
print(x_out)
|
# input must not be too short
|
||||||
|
if (nchar(x_out) < 3) {
|
||||||
|
return("UNKNOWN")
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# take out the parts, split by space
|
# take out the parts, split by space
|
||||||
x_parts <- strsplit(gsub("-", " ", x_out, fixed = TRUE), " ", fixed = TRUE)[[1]]
|
x_parts <- strsplit(gsub("-", " ", x_out, fixed = TRUE), " ", fixed = TRUE)[[1]]
|
||||||
@ -282,7 +288,7 @@ as.mo <- function(x,
|
|||||||
} else {
|
} else {
|
||||||
mo_to_search <- MO_lookup$fullname[filtr]
|
mo_to_search <- MO_lookup$fullname[filtr]
|
||||||
}
|
}
|
||||||
pkg_env$mo_to_search <- mo_to_search
|
AMR_env$mo_to_search <- mo_to_search
|
||||||
# determine the matching score on the original search value
|
# determine the matching score on the original search value
|
||||||
m <- mo_matching_score(x = x_search_cleaned, n = mo_to_search)
|
m <- mo_matching_score(x = x_search_cleaned, n = mo_to_search)
|
||||||
if (is.null(minimum_matching_score)) {
|
if (is.null(minimum_matching_score)) {
|
||||||
@ -302,20 +308,21 @@ as.mo <- function(x,
|
|||||||
result_mo <- NA_character_
|
result_mo <- NA_character_
|
||||||
} else {
|
} else {
|
||||||
result_mo <- MO_lookup$mo[match(top_hits[1], MO_lookup$fullname)]
|
result_mo <- MO_lookup$mo[match(top_hits[1], MO_lookup$fullname)]
|
||||||
pkg_env$mo_uncertainties <- rbind(pkg_env$mo_uncertainties,
|
AMR_env$mo_uncertainties <- rbind(AMR_env$mo_uncertainties,
|
||||||
data.frame(
|
data.frame(
|
||||||
minimum_matching_score = ifelse(is.null(minimum_matching_score), "NULL", minimum_matching_score),
|
|
||||||
original_input = x_search,
|
original_input = x_search,
|
||||||
input = x_search_cleaned,
|
input = x_search_cleaned,
|
||||||
fullname = top_hits[1],
|
fullname = top_hits[1],
|
||||||
mo = result_mo,
|
mo = result_mo,
|
||||||
candidates = ifelse(length(top_hits) > 1, paste(top_hits[2:min(26, length(top_hits))], collapse = ", "), ""),
|
candidates = ifelse(length(top_hits) > 1, paste(top_hits[2:min(26, length(top_hits))], collapse = ", "), ""),
|
||||||
|
minimum_matching_score = ifelse(is.null(minimum_matching_score), "NULL", minimum_matching_score),
|
||||||
|
keep_synonyms = keep_synonyms,
|
||||||
stringsAsFactors = FALSE
|
stringsAsFactors = FALSE
|
||||||
),
|
),
|
||||||
stringsAsFactors = FALSE
|
stringsAsFactors = FALSE
|
||||||
)
|
)
|
||||||
# save to package env to save time for next time
|
# save to package env to save time for next time
|
||||||
pkg_env$mo_previously_coerced <- unique(rbind(pkg_env$mo_previously_coerced,
|
AMR_env$mo_previously_coerced <- unique(rbind(AMR_env$mo_previously_coerced,
|
||||||
data.frame(
|
data.frame(
|
||||||
x = paste(x_search, minimum_matching_score),
|
x = paste(x_search, minimum_matching_score),
|
||||||
mo = result_mo,
|
mo = result_mo,
|
||||||
@ -334,21 +341,21 @@ as.mo <- function(x,
|
|||||||
out[is.na(out)] <- x_coerced[match(x[is.na(out)], x_unique)]
|
out[is.na(out)] <- x_coerced[match(x[is.na(out)], x_unique)]
|
||||||
|
|
||||||
# Throw note about uncertainties ----
|
# Throw note about uncertainties ----
|
||||||
if (isTRUE(info) && NROW(pkg_env$mo_uncertainties) > 0) {
|
if (isTRUE(info) && NROW(AMR_env$mo_uncertainties) > 0) {
|
||||||
if (message_not_thrown_before("as.mo", "uncertainties", pkg_env$mo_uncertainties$original_input)) {
|
if (message_not_thrown_before("as.mo", "uncertainties", AMR_env$mo_uncertainties$original_input)) {
|
||||||
plural <- c("", "this")
|
plural <- c("", "this")
|
||||||
if (length(pkg_env$mo_uncertainties$original_input) > 1) {
|
if (length(AMR_env$mo_uncertainties$original_input) > 1) {
|
||||||
plural <- c("s", "these uncertainties")
|
plural <- c("s", "these uncertainties")
|
||||||
}
|
}
|
||||||
if (length(pkg_env$mo_uncertainties$original_input) <= 3) {
|
if (length(AMR_env$mo_uncertainties$original_input) <= 3) {
|
||||||
examples <- vector_and(paste0(
|
examples <- vector_and(paste0(
|
||||||
'"', pkg_env$mo_uncertainties$original_input,
|
'"', AMR_env$mo_uncertainties$original_input,
|
||||||
'" (assumed ', font_italic(pkg_env$mo_uncertainties$fullname, collapse = NULL), ")"
|
'" (assumed ', font_italic(AMR_env$mo_uncertainties$fullname, collapse = NULL), ")"
|
||||||
),
|
),
|
||||||
quotes = FALSE
|
quotes = FALSE
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
examples <- paste0(nr2char(length(pkg_env$mo_uncertainties$original_input)), " microorganism", plural[1])
|
examples <- paste0(nr2char(length(AMR_env$mo_uncertainties$original_input)), " microorganism", plural[1])
|
||||||
}
|
}
|
||||||
msg <- paste0(
|
msg <- paste0(
|
||||||
"Microorganism translation was uncertain for ", examples,
|
"Microorganism translation was uncertain for ", examples,
|
||||||
@ -364,18 +371,18 @@ as.mo <- function(x,
|
|||||||
gbif_matches[!gbif_matches %in% AMR::microorganisms$gbif] <- NA
|
gbif_matches[!gbif_matches %in% AMR::microorganisms$gbif] <- NA
|
||||||
lpsn_matches <- AMR::microorganisms$lpsn_renamed_to[match(out, AMR::microorganisms$mo)]
|
lpsn_matches <- AMR::microorganisms$lpsn_renamed_to[match(out, AMR::microorganisms$mo)]
|
||||||
lpsn_matches[!lpsn_matches %in% AMR::microorganisms$lpsn] <- NA
|
lpsn_matches[!lpsn_matches %in% AMR::microorganisms$lpsn] <- NA
|
||||||
pkg_env$mo_renamed <- list(old = out[!is.na(gbif_matches) | !is.na(lpsn_matches)],
|
AMR_env$mo_renamed <- list(old = out[!is.na(gbif_matches) | !is.na(lpsn_matches)],
|
||||||
gbif_matches = gbif_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)],
|
gbif_matches = gbif_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)],
|
||||||
lpsn_matches = lpsn_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)])
|
lpsn_matches = lpsn_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)])
|
||||||
if (isFALSE(keep_synonyms)) {
|
if (isFALSE(keep_synonyms)) {
|
||||||
out[which(!is.na(gbif_matches))] <- AMR::microorganisms$mo[match(gbif_matches[which(!is.na(gbif_matches))], AMR::microorganisms$gbif)]
|
out[which(!is.na(gbif_matches))] <- AMR::microorganisms$mo[match(gbif_matches[which(!is.na(gbif_matches))], AMR::microorganisms$gbif)]
|
||||||
out[which(!is.na(lpsn_matches))] <- AMR::microorganisms$mo[match(lpsn_matches[which(!is.na(lpsn_matches))], AMR::microorganisms$lpsn)]
|
out[which(!is.na(lpsn_matches))] <- AMR::microorganisms$mo[match(lpsn_matches[which(!is.na(lpsn_matches))], AMR::microorganisms$lpsn)]
|
||||||
if (isTRUE(info) && length(pkg_env$mo_renamed$old) > 0) {
|
if (isTRUE(info) && length(AMR_env$mo_renamed$old) > 0) {
|
||||||
print(mo_renamed(), extra_txt = " (use `keep_synonyms = TRUE` to leave uncorrected)")
|
print(mo_renamed(), extra_txt = " (use `keep_synonyms = TRUE` to leave uncorrected)")
|
||||||
}
|
}
|
||||||
} else if (is.null(getOption("AMR_keep_synonyms")) && length(pkg_env$mo_renamed$old) > 0 && message_not_thrown_before("as.mo", "keep_synonyms_warning", entire_session = TRUE)) {
|
} else if (is.null(getOption("AMR_keep_synonyms")) && length(AMR_env$mo_renamed$old) > 0 && message_not_thrown_before("as.mo", "keep_synonyms_warning", entire_session = TRUE)) {
|
||||||
# keep synonyms is TRUE, so check if any do have synonyms
|
# keep synonyms is TRUE, so check if any do have synonyms
|
||||||
warning_("Function `as.mo()` returned some old taxonomic names. Use `as.mo(..., keep_synonyms = FALSE)` to clean the input to currently accepted taxonomic names, or set the R option `AMR_keep_synonyms` to `FALSE`. This warning will be shown once per session.")
|
warning_("Function `as.mo()` returned ", nr2char(length(unique(AMR_env$mo_renamed$old))), " old taxonomic name", ifelse(length(unique(AMR_env$mo_renamed$old)) > 1, "s", ""), ". Use `as.mo(..., keep_synonyms = FALSE)` to clean the input to currently accepted taxonomic names, or set the R option `AMR_keep_synonyms` to `FALSE`. This warning will be shown once per session.")
|
||||||
}
|
}
|
||||||
|
|
||||||
# Apply Becker ----
|
# Apply Becker ----
|
||||||
@ -432,7 +439,10 @@ as.mo <- function(x,
|
|||||||
|
|
||||||
# All unknowns ----
|
# All unknowns ----
|
||||||
out[is.na(out) & !is.na(x)] <- "UNKNOWN"
|
out[is.na(out) & !is.na(x)] <- "UNKNOWN"
|
||||||
pkg_env$mo_failures <- unique(x[out == "UNKNOWN" & x != "UNKNOWN" & !is.na(x)])
|
AMR_env$mo_failures <- unique(x[out == "UNKNOWN" & x != "UNKNOWN" & !is.na(x)])
|
||||||
|
if (length(AMR_env$mo_failures) > 0) {
|
||||||
|
warning_("The following input could not be coerced and was returned as \"UNKNOWN\": ", vector_and(AMR_env$mo_failures, quotes = TRUE), ".\nYou can retrieve this list with `mo_failures()`.")
|
||||||
|
}
|
||||||
|
|
||||||
# Return class ----
|
# Return class ----
|
||||||
set_clean_class(out,
|
set_clean_class(out,
|
||||||
@ -440,12 +450,73 @@ as.mo <- function(x,
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# OTHER DOCUMENTED FUNCTIONS ----------------------------------------------
|
||||||
|
|
||||||
#' @rdname as.mo
|
#' @rdname as.mo
|
||||||
#' @export
|
#' @export
|
||||||
is.mo <- function(x) {
|
is.mo <- function(x) {
|
||||||
inherits(x, "mo")
|
inherits(x, "mo")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#' @rdname as.mo
|
||||||
|
#' @export
|
||||||
|
mo_uncertainties <- function() {
|
||||||
|
set_clean_class(AMR_env$mo_uncertainties, new_class = c("mo_uncertainties", "data.frame"))
|
||||||
|
}
|
||||||
|
|
||||||
|
#' @rdname as.mo
|
||||||
|
#' @export
|
||||||
|
mo_renamed <- function() {
|
||||||
|
x <- AMR_env$mo_renamed
|
||||||
|
|
||||||
|
x$new <- synonym_mo_to_accepted_mo(x$old)
|
||||||
|
mo_old <- AMR::microorganisms$fullname[match(x$old, AMR::microorganisms$mo)]
|
||||||
|
mo_new <- AMR::microorganisms$fullname[match(x$new, AMR::microorganisms$mo)]
|
||||||
|
ref_old <- AMR::microorganisms$ref[match(x$old, AMR::microorganisms$mo)]
|
||||||
|
ref_new <- AMR::microorganisms$ref[match(x$new, AMR::microorganisms$mo)]
|
||||||
|
|
||||||
|
df_renamed <- data.frame(old = mo_old,
|
||||||
|
new = mo_new,
|
||||||
|
ref_old = ref_old,
|
||||||
|
ref_new = ref_new,
|
||||||
|
stringsAsFactors = FALSE)
|
||||||
|
df_renamed <- unique(df_renamed)
|
||||||
|
df_renamed <- df_renamed[order(df_renamed$old), , drop = FALSE]
|
||||||
|
set_clean_class(df_renamed, new_class = c("mo_renamed", "data.frame"))
|
||||||
|
}
|
||||||
|
|
||||||
|
#' @rdname as.mo
|
||||||
|
#' @export
|
||||||
|
mo_failures <- function() {
|
||||||
|
AMR_env$mo_failures
|
||||||
|
}
|
||||||
|
|
||||||
|
#' @rdname as.mo
|
||||||
|
#' @export
|
||||||
|
mo_reset_session <- function() {
|
||||||
|
if (NROW(AMR_env$mo_previously_coerced) > 0) {
|
||||||
|
message_("Reset ", nr2char(NROW(AMR_env$mo_previously_coerced)), " previously matched input value", ifelse(NROW(AMR_env$mo_previously_coerced) > 1, "s", ""), ".")
|
||||||
|
AMR_env$mo_previously_coerced <- AMR_env$mo_previously_coerced[0, , drop = FALSE]
|
||||||
|
AMR_env$mo_uncertainties <- AMR_env$mo_uncertainties[0, , drop = FALSE]
|
||||||
|
} else {
|
||||||
|
message_("No previously matched input values to reset.")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#' @rdname as.mo
|
||||||
|
#' @export
|
||||||
|
mo_cleaning_regex <- function() {
|
||||||
|
paste0(
|
||||||
|
"(",
|
||||||
|
"[^A-Za-z- \\(\\)\\[\\]{}]+",
|
||||||
|
"|",
|
||||||
|
"([({]|\\[).+([})]|\\])",
|
||||||
|
"|",
|
||||||
|
"(^| )(e?spp|e?ssp|e?ss|e?sp|e?subsp|sube?species|biovar|biotype|serovar|e?species)[.]*( |$))")
|
||||||
|
}
|
||||||
|
|
||||||
|
# UNDOCUMENTED METHODS ----------------------------------------------------
|
||||||
|
|
||||||
# will be exported using s3_register() in R/zzz.R
|
# will be exported using s3_register() in R/zzz.R
|
||||||
pillar_shaft.mo <- function(x, ...) {
|
pillar_shaft.mo <- function(x, ...) {
|
||||||
out <- format(x)
|
out <- format(x)
|
||||||
@ -675,18 +746,6 @@ rep.mo <- function(x, ...) {
|
|||||||
y
|
y
|
||||||
}
|
}
|
||||||
|
|
||||||
#' @rdname as.mo
|
|
||||||
#' @export
|
|
||||||
mo_failures <- function() {
|
|
||||||
pkg_env$mo_failures
|
|
||||||
}
|
|
||||||
|
|
||||||
#' @rdname as.mo
|
|
||||||
#' @export
|
|
||||||
mo_uncertainties <- function() {
|
|
||||||
set_clean_class(pkg_env$mo_uncertainties, new_class = c("mo_uncertainties", "data.frame"))
|
|
||||||
}
|
|
||||||
|
|
||||||
#' @method print mo_uncertainties
|
#' @method print mo_uncertainties
|
||||||
#' @export
|
#' @export
|
||||||
#' @noRd
|
#' @noRd
|
||||||
@ -768,7 +827,13 @@ print.mo_uncertainties <- function(x, ...) {
|
|||||||
),
|
),
|
||||||
collapse = "\n"
|
collapse = "\n"
|
||||||
),
|
),
|
||||||
|
# Add "Based on {input}" text if it differs from the original input
|
||||||
ifelse(x[i, ]$original_input != x[i, ]$input, paste0(strrep(" ", nchar(x[i, ]$original_input) + 6), "Based on input \"", x[i, ]$input, "\""), ""),
|
ifelse(x[i, ]$original_input != x[i, ]$input, paste0(strrep(" ", nchar(x[i, ]$original_input) + 6), "Based on input \"", x[i, ]$input, "\""), ""),
|
||||||
|
# Add note if result was coerced to accepted taxonomic name
|
||||||
|
ifelse(x[i, ]$keep_synonyms == FALSE & x[i, ]$mo %in% AMR::microorganisms$mo[which(AMR::microorganisms$status == "synonym")],
|
||||||
|
paste0(strrep(" ", nchar(x[i, ]$original_input) + 6),
|
||||||
|
font_red(paste0("This old taxonomic name was converted to ", font_italic(AMR::microorganisms$fullname[match(synonym_mo_to_accepted_mo(x[i, ]$mo), AMR::microorganisms$mo)], collapse = NULL), " (", synonym_mo_to_accepted_mo(x[i, ]$mo), ")."), collapse = NULL)),
|
||||||
|
""),
|
||||||
candidates,
|
candidates,
|
||||||
sep = "\n"
|
sep = "\n"
|
||||||
)
|
)
|
||||||
@ -777,30 +842,6 @@ print.mo_uncertainties <- function(x, ...) {
|
|||||||
cat(txt)
|
cat(txt)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#' @rdname as.mo
|
|
||||||
#' @export
|
|
||||||
mo_renamed <- function() {
|
|
||||||
x <- pkg_env$mo_renamed
|
|
||||||
|
|
||||||
x$new <- ifelse(is.na(x$lpsn_matches),
|
|
||||||
AMR::microorganisms$mo[match(x$gbif_matches, AMR::microorganisms$gbif)],
|
|
||||||
AMR::microorganisms$mo[match(x$lpsn_matches, AMR::microorganisms$lpsn)])
|
|
||||||
mo_old <- AMR::microorganisms$fullname[match(x$old, AMR::microorganisms$mo)]
|
|
||||||
mo_new <- AMR::microorganisms$fullname[match(x$new, AMR::microorganisms$mo)]
|
|
||||||
ref_old <- AMR::microorganisms$ref[match(x$old, AMR::microorganisms$mo)]
|
|
||||||
ref_new <- AMR::microorganisms$ref[match(x$new, AMR::microorganisms$mo)]
|
|
||||||
|
|
||||||
df_renamed <- data.frame(old = mo_old,
|
|
||||||
new = mo_new,
|
|
||||||
ref_old = ref_old,
|
|
||||||
ref_new = ref_new,
|
|
||||||
stringsAsFactors = FALSE)
|
|
||||||
df_renamed <- unique(df_renamed)
|
|
||||||
df_renamed <- df_renamed[order(df_renamed$old), , drop = FALSE]
|
|
||||||
set_clean_class(df_renamed, new_class = c("mo_renamed", "data.frame"))
|
|
||||||
}
|
|
||||||
|
|
||||||
#' @method print mo_renamed
|
#' @method print mo_renamed
|
||||||
#' @export
|
#' @export
|
||||||
#' @noRd
|
#' @noRd
|
||||||
@ -812,6 +853,8 @@ print.mo_renamed <- function(x, extra_txt = "", n = 25, ...) {
|
|||||||
|
|
||||||
x$ref_old[!is.na(x$ref_old)] <- paste0(" (", gsub("et al.", font_italic("et al."), x$ref_old[!is.na(x$ref_old)], fixed = TRUE), ")")
|
x$ref_old[!is.na(x$ref_old)] <- paste0(" (", gsub("et al.", font_italic("et al."), x$ref_old[!is.na(x$ref_old)], fixed = TRUE), ")")
|
||||||
x$ref_new[!is.na(x$ref_new)] <- paste0(" (", gsub("et al.", font_italic("et al."), x$ref_new[!is.na(x$ref_new)], fixed = TRUE), ")")
|
x$ref_new[!is.na(x$ref_new)] <- paste0(" (", gsub("et al.", font_italic("et al."), x$ref_new[!is.na(x$ref_new)], fixed = TRUE), ")")
|
||||||
|
x$ref_old[is.na(x$ref_old)] <- " (author unknown)"
|
||||||
|
x$ref_new[is.na(x$ref_new)] <- " (author unknown)"
|
||||||
|
|
||||||
rows <- seq_len(min(NROW(x), n))
|
rows <- seq_len(min(NROW(x), n))
|
||||||
|
|
||||||
@ -825,28 +868,57 @@ print.mo_renamed <- function(x, extra_txt = "", n = 25, ...) {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
#' @rdname as.mo
|
# UNDOCUMENTED HELPER FUNCTIONS -------------------------------------------
|
||||||
#' @export
|
|
||||||
mo_reset_session <- function() {
|
|
||||||
if (NROW(pkg_env$mo_previously_coerced) > 0) {
|
|
||||||
message_("Reset ", NROW(pkg_env$mo_previously_coerced), " previously matched input values.")
|
|
||||||
pkg_env$mo_previously_coerced <- pkg_env$mo_previously_coerced[0, , drop = FALSE]
|
|
||||||
pkg_env$mo_uncertainties <- pkg_env$mo_uncertainties[0, , drop = FALSE]
|
|
||||||
} else {
|
|
||||||
message_("No previously matched input values to reset.")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#' @rdname as.mo
|
convert_colloquial_input <- function(x) {
|
||||||
#' @export
|
x.bak <- trimws2(x)
|
||||||
mo_cleaning_regex <- function() {
|
x <- trimws2(tolower(x))
|
||||||
paste0(
|
out <- rep(NA_character_, length(x))
|
||||||
"(",
|
|
||||||
"[^A-Za-z- \\(\\)\\[\\]{}]+",
|
# Streptococci, like GBS = Group B Streptococci (B_STRPT_GRPB)
|
||||||
"|",
|
out[x %like_case% "^g[abcdfghkl]s$"] <- gsub("g([abcdfghkl])s",
|
||||||
"([({]|\\[).+([})]|\\])",
|
"B_STRPT_GRP\\U\\1",
|
||||||
"|",
|
x[x %like_case% "^g[abcdfghkl]s$"],
|
||||||
"(^| )(e?spp|e?ssp|e?ss|e?sp|e?subsp|sube?species|biovar|biotype|serovar|e?species)( |$))")
|
perl = TRUE)
|
||||||
|
# Streptococci in different languages, like "estreptococos grupo B"
|
||||||
|
out[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"] <- gsub(".*e?strepto[ck]o[ck].* ([abcdfghkl])$",
|
||||||
|
"B_STRPT_GRP\\U\\1",
|
||||||
|
x[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"],
|
||||||
|
perl = TRUE)
|
||||||
|
out[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"] <- gsub(".*group ([abcdfghkl]) strepto[ck]o[ck].*",
|
||||||
|
"B_STRPT_GRP\\U\\1",
|
||||||
|
x[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"],
|
||||||
|
perl = TRUE)
|
||||||
|
out[x %like_case% "ha?emoly.*strep"] <- "B_STRPT_HAEM"
|
||||||
|
out[x %like_case% "(strepto.* mil+er+i|^mgs[^a-z]*$)"] <- "B_STRPT_MILL"
|
||||||
|
out[x %like_case% "mil+er+i gr"] <- "B_STRPT_MILL"
|
||||||
|
out[x %like_case% "((strepto|^s).* viridans|^vgs[^a-z]*$)"] <- "B_STRPT_VIRI"
|
||||||
|
|
||||||
|
# CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese)
|
||||||
|
out[x %like_case% "([ck]oagulas[ea].negatie?[vf]|^[ck]o?ns[^a-z]*$)"] <- "B_STPHY_CONS"
|
||||||
|
out[x %like_case% "([ck]oagulas[ea].positie?[vf]|^[ck]o?ps[^a-z]*$)"] <- "B_STPHY_COPS"
|
||||||
|
|
||||||
|
# Gram stains
|
||||||
|
out[x %like_case% "gram[ -]?neg.*|negatie?[vf]"] <- "B_GRAMN"
|
||||||
|
out[x %like_case% "gram[ -]?pos.*|positie?[vf]"] <- "B_GRAMP"
|
||||||
|
|
||||||
|
# yeasts and fungi
|
||||||
|
out[x %like_case% "^yeast?"] <- "F_YEAST"
|
||||||
|
out[x %like_case% "^fung(us|i)"] <- "F_FUNGUS"
|
||||||
|
|
||||||
|
# Salmonella city names, starting with capital species name - they are all S. enterica
|
||||||
|
out[x.bak %like_case% "[sS]almonella [A-Z][a-z]+ ?.*" & x %unlike% "typhi"] <- "B_SLMNL_ENTR"
|
||||||
|
out[x %like_case% "salmonella group"] <- "B_SLMNL"
|
||||||
|
|
||||||
|
# trivial names known to the field
|
||||||
|
out[x %like_case% "meningo[ck]o[ck]"] <- "B_NESSR_MNNG"
|
||||||
|
out[x %like_case% "gono[ck]o[ck]"] <- "B_NESSR_GNRR"
|
||||||
|
out[x %like_case% "pneumo[ck]o[ck]"] <- "B_STRPT_PNMN"
|
||||||
|
|
||||||
|
# unexisting names (xxx and con are WHONET codes)
|
||||||
|
out[x %in% c("con", "other", "none", "unknown") | x %like_case% "virus"] <- "UNKNOWN"
|
||||||
|
|
||||||
|
out
|
||||||
}
|
}
|
||||||
|
|
||||||
nr2char <- function(x) {
|
nr2char <- function(x) {
|
||||||
@ -861,17 +933,6 @@ nr2char <- function(x) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
get_mo_uncertainties <- function() {
|
|
||||||
remember <- list(uncertainties = pkg_env$mo_uncertainties)
|
|
||||||
# empty them, otherwise e.g. mo_shortname("Chlamydophila psittaci") will give 3 notes
|
|
||||||
pkg_env$mo_uncertainties <- NULL
|
|
||||||
remember
|
|
||||||
}
|
|
||||||
|
|
||||||
load_mo_uncertainties <- function(metadata) {
|
|
||||||
pkg_env$mo_uncertainties <- metadata$uncertainties
|
|
||||||
}
|
|
||||||
|
|
||||||
parse_and_convert <- function(x) {
|
parse_and_convert <- function(x) {
|
||||||
if (tryCatch(is.character(x) && all(Encoding(x) == "unknown", na.rm = TRUE), error = function(e) FALSE)) {
|
if (tryCatch(is.character(x) && all(Encoding(x) == "unknown", na.rm = TRUE), error = function(e) FALSE)) {
|
||||||
return(trimws2(x))
|
return(trimws2(x))
|
||||||
@ -1008,51 +1069,24 @@ repair_reference_df <- function(reference_df) {
|
|||||||
reference_df
|
reference_df
|
||||||
}
|
}
|
||||||
|
|
||||||
convert_colloquial_input <- function(x) {
|
get_mo_uncertainties <- function() {
|
||||||
x.bak <- trimws2(x)
|
remember <- list(uncertainties = AMR_env$mo_uncertainties)
|
||||||
x <- trimws2(tolower(x))
|
# empty them, otherwise e.g. mo_shortname("Chlamydophila psittaci") will give 3 notes
|
||||||
out <- rep(NA_character_, length(x))
|
AMR_env$mo_uncertainties <- NULL
|
||||||
|
remember
|
||||||
# Streptococci, like GBS = Group B Streptococci (B_STRPT_GRPB)
|
}
|
||||||
out[x %like_case% "^g[abcdfghkl]s$"] <- gsub("g([abcdfghkl])s",
|
|
||||||
"B_STRPT_GRP\\U\\1",
|
load_mo_uncertainties <- function(metadata) {
|
||||||
x[x %like_case% "^g[abcdfghkl]s$"],
|
AMR_env$mo_uncertainties <- metadata$uncertainties
|
||||||
perl = TRUE)
|
}
|
||||||
# Streptococci in different languages, like "estreptococos grupo B"
|
|
||||||
out[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"] <- gsub(".*e?strepto[ck]o[ck].* ([abcdfghkl])$",
|
synonym_mo_to_accepted_mo <- function(x) {
|
||||||
"B_STRPT_GRP\\U\\1",
|
x_gbif <- AMR::microorganisms$gbif_renamed_to[match(x, AMR::microorganisms$mo)]
|
||||||
x[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"],
|
x_lpsn <- AMR::microorganisms$lpsn_renamed_to[match(x, AMR::microorganisms$mo)]
|
||||||
perl = TRUE)
|
x_gbif[!x_gbif %in% AMR::microorganisms$gbif] <- NA
|
||||||
out[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"] <- gsub(".*group ([abcdfghkl]) strepto[ck]o[ck].*",
|
x_lpsn[!x_lpsn %in% AMR::microorganisms$lpsn] <- NA
|
||||||
"B_STRPT_GRP\\U\\1",
|
|
||||||
x[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"],
|
ifelse(is.na(x_lpsn),
|
||||||
perl = TRUE)
|
AMR::microorganisms$mo[match(x_gbif, AMR::microorganisms$gbif)],
|
||||||
out[x %like_case% "ha?emoly.*strep"] <- "B_STRPT_HAEM"
|
AMR::microorganisms$mo[match(x_lpsn, AMR::microorganisms$lpsn)])
|
||||||
out[x %like_case% "(strepto.* mil+er+i|^mgs[^a-z]*$)"] <- "B_STRPT_MILL"
|
|
||||||
out[x %like_case% "((strepto|^s).* viridans|^vgs[^a-z]*$)"] <- "B_STRPT_VIRI"
|
|
||||||
|
|
||||||
# CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese)
|
|
||||||
out[x %like_case% "([ck]oagulas[ea].negatie?[vf]|^[ck]o?ns[^a-z]*$)"] <- "B_STPHY_CONS"
|
|
||||||
out[x %like_case% "([ck]oagulas[ea].positie?[vf]|^[ck]o?ps[^a-z]*$)"] <- "B_STPHY_COPS"
|
|
||||||
|
|
||||||
# Gram stains
|
|
||||||
out[x %like_case% "gram[ -]?neg.*|negatie?[vf]"] <- "B_GRAMN"
|
|
||||||
out[x %like_case% "gram[ -]?pos.*|positie?[vf]"] <- "B_GRAMP"
|
|
||||||
|
|
||||||
# yeasts and fungi
|
|
||||||
out[x %like_case% "^yeast?"] <- "F_YEAST"
|
|
||||||
out[x %like_case% "^fung(us|i)"] <- "F_FUNGUS"
|
|
||||||
|
|
||||||
# Salmonella city names, starting with capital species name - they are all S. enterica
|
|
||||||
out[x.bak %like_case% "[sS]almonella [A-Z][a-z]+ ?.*" & x %unlike% "typhi"] <- "B_SLMNL_ENTR"
|
|
||||||
|
|
||||||
# trivial names known to the field
|
|
||||||
out[x %like_case% "meningo[ck]o[ck]"] <- "B_NESSR_MNNG"
|
|
||||||
out[x %like_case% "gono[ck]o[ck]"] <- "B_NESSR_GNRR"
|
|
||||||
out[x %like_case% "pneumo[ck]o[ck]"] <- "B_STRPT_PNMN"
|
|
||||||
|
|
||||||
# unexisting names (xxx and con are WHONET codes)
|
|
||||||
out[x %in% c("con", "other", "none", "unknown") | x %like_case% "virus"] <- "UNKNOWN"
|
|
||||||
|
|
||||||
out
|
|
||||||
}
|
}
|
||||||
|
@ -80,6 +80,9 @@ mo_matching_score <- function(x, n) {
|
|||||||
# only keep one space
|
# only keep one space
|
||||||
x <- gsub(" +", " ", x)
|
x <- gsub(" +", " ", x)
|
||||||
|
|
||||||
|
# start with a capital letter
|
||||||
|
substr(x, 1, 1) <- toupper(substr(x, 1, 1))
|
||||||
|
|
||||||
# n is always a taxonomically valid full name
|
# n is always a taxonomically valid full name
|
||||||
if (length(n) == 1) {
|
if (length(n) == 1) {
|
||||||
n <- rep(n, length(x))
|
n <- rep(n, length(x))
|
||||||
|
@ -33,13 +33,13 @@
|
|||||||
#' @param x any [character] (vector) that can be coerced to a valid microorganism code with [as.mo()]. Can be left blank for auto-guessing the column containing microorganism codes if used in a data set, see *Examples*.
|
#' @param x any [character] (vector) that can be coerced to a valid microorganism code with [as.mo()]. Can be left blank for auto-guessing the column containing microorganism codes if used in a data set, see *Examples*.
|
||||||
#' @param property one of the column names of the [microorganisms] data set: `r vector_or(colnames(microorganisms), sort = FALSE, quotes = TRUE)`, or must be `"shortname"`
|
#' @param property one of the column names of the [microorganisms] data set: `r vector_or(colnames(microorganisms), sort = FALSE, quotes = TRUE)`, or must be `"shortname"`
|
||||||
#' @inheritParams as.mo
|
#' @inheritParams as.mo
|
||||||
#' @param ... other arguments passed on to [as.mo()], such as 'allow_uncertain' and 'ignore_pattern'
|
#' @param ... other arguments passed on to [as.mo()], such as 'minimum_matching_score', 'ignore_pattern', and 'remove_from_input'
|
||||||
#' @param ab any (vector of) text that can be coerced to a valid antibiotic code with [as.ab()]
|
#' @param ab any (vector of) text that can be coerced to a valid antibiotic code with [as.ab()]
|
||||||
#' @param open browse the URL using [`browseURL()`][utils::browseURL()]
|
#' @param open browse the URL using [`browseURL()`][utils::browseURL()]
|
||||||
#' @details All functions will, at default, keep old taxonomic properties. Please refer to this example, knowing that *Escherichia blattae* was renamed to *Shimwellia blattae* in 2010:
|
#' @details All functions will, at default, keep old taxonomic properties. Please refer to this example, knowing that *Escherichia blattae* was renamed to *Shimwellia blattae* in 2010:
|
||||||
#' - `mo_name("Escherichia blattae")` will return `"Shimwellia blattae"` (with a message about the renaming)
|
#' - `mo_name("Escherichia blattae")` will return `"Shimwellia blattae"` (with a message about the renaming)
|
||||||
#' - `mo_ref("Escherichia blattae")` will return `"Burgess et al., 1973"` (with a message about the renaming)
|
#' - `mo_ref("Escherichia blattae", keep_synonyms = TRUE)` will return `"Burgess et al., 1973"` (with a warning about the renaming)
|
||||||
#' - `mo_ref("Shimwellia blattae")` will return `"Priest et al., 2010"` (without a message)
|
#' - `mo_ref("Shimwellia blattae", keep_synonyms = FALSE)` will return `"Priest et al., 2010"` (without a message)
|
||||||
#'
|
#'
|
||||||
#' The short name - [mo_shortname()] - almost always returns the first character of the genus and the full species, like `"E. coli"`. Exceptions are abbreviations of staphylococci (such as *"CoNS"*, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (such as *"GBS"*, Group B Streptococci). Please bear in mind that e.g. *E. coli* could mean *Escherichia coli* (kingdom of Bacteria) as well as *Entamoeba coli* (kingdom of Protozoa). Returning to the full name will be done using [as.mo()] internally, giving priority to bacteria and human pathogens, i.e. `"E. coli"` will be considered *Escherichia coli*. In other words, `mo_fullname(mo_shortname("Entamoeba coli"))` returns `"Escherichia coli"`.
|
#' The short name - [mo_shortname()] - almost always returns the first character of the genus and the full species, like `"E. coli"`. Exceptions are abbreviations of staphylococci (such as *"CoNS"*, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (such as *"GBS"*, Group B Streptococci). Please bear in mind that e.g. *E. coli* could mean *Escherichia coli* (kingdom of Bacteria) as well as *Entamoeba coli* (kingdom of Protozoa). Returning to the full name will be done using [as.mo()] internally, giving priority to bacteria and human pathogens, i.e. `"E. coli"` will be considered *Escherichia coli*. In other words, `mo_fullname(mo_shortname("Entamoeba coli"))` returns `"Escherichia coli"`.
|
||||||
#'
|
#'
|
||||||
@ -504,7 +504,7 @@ mo_is_intrinsic_resistant <- function(x, ab, language = get_AMR_locale(), keep_s
|
|||||||
stop_("length of `x` and `ab` must be equal, or one of them must be of length 1.")
|
stop_("length of `x` and `ab` must be equal, or one of them must be of length 1.")
|
||||||
}
|
}
|
||||||
|
|
||||||
# show used version number once per session (pkg_env will reload every session)
|
# show used version number once per session (AMR_env will reload every session)
|
||||||
if (message_not_thrown_before("mo_is_intrinsic_resistant", "version.mo", entire_session = TRUE)) {
|
if (message_not_thrown_before("mo_is_intrinsic_resistant", "version.mo", entire_session = TRUE)) {
|
||||||
message_(
|
message_(
|
||||||
"Determining intrinsic resistance based on ",
|
"Determining intrinsic resistance based on ",
|
||||||
|
@ -134,7 +134,7 @@ set_mo_source <- function(path, destination = getOption("AMR_mo_source", "~/mo_s
|
|||||||
stop_ifnot(interactive(), "this function can only be used in interactive mode, since it must ask for the user's permission to write a file to their file system.")
|
stop_ifnot(interactive(), "this function can only be used in interactive mode, since it must ask for the user's permission to write a file to their file system.")
|
||||||
|
|
||||||
if (is.null(path) || path %in% c(FALSE, "")) {
|
if (is.null(path) || path %in% c(FALSE, "")) {
|
||||||
pkg_env$mo_source <- NULL
|
AMR_env$mo_source <- NULL
|
||||||
if (file.exists(mo_source_destination)) {
|
if (file.exists(mo_source_destination)) {
|
||||||
unlink(mo_source_destination)
|
unlink(mo_source_destination)
|
||||||
message_("Removed mo_source file '", font_bold(mo_source_destination), "'",
|
message_("Removed mo_source file '", font_bold(mo_source_destination), "'",
|
||||||
@ -227,7 +227,7 @@ set_mo_source <- function(path, destination = getOption("AMR_mo_source", "~/mo_s
|
|||||||
attr(df, "mo_source_destination") <- mo_source_destination
|
attr(df, "mo_source_destination") <- mo_source_destination
|
||||||
attr(df, "mo_source_timestamp") <- file.mtime(path)
|
attr(df, "mo_source_timestamp") <- file.mtime(path)
|
||||||
saveRDS(df, mo_source_destination)
|
saveRDS(df, mo_source_destination)
|
||||||
pkg_env$mo_source <- df
|
AMR_env$mo_source <- df
|
||||||
message_(
|
message_(
|
||||||
action, " mo_source file '", font_bold(mo_source_destination),
|
action, " mo_source file '", font_bold(mo_source_destination),
|
||||||
"' (", formatted_filesize(mo_source_destination),
|
"' (", formatted_filesize(mo_source_destination),
|
||||||
@ -247,24 +247,24 @@ get_mo_source <- function(destination = getOption("AMR_mo_source", "~/mo_source.
|
|||||||
}
|
}
|
||||||
return(NULL)
|
return(NULL)
|
||||||
}
|
}
|
||||||
if (is.null(pkg_env$mo_source)) {
|
if (is.null(AMR_env$mo_source)) {
|
||||||
pkg_env$mo_source <- readRDS(path.expand(destination))
|
AMR_env$mo_source <- readRDS(path.expand(destination))
|
||||||
}
|
}
|
||||||
|
|
||||||
old_time <- attributes(pkg_env$mo_source)$mo_source_timestamp
|
old_time <- attributes(AMR_env$mo_source)$mo_source_timestamp
|
||||||
new_time <- file.mtime(attributes(pkg_env$mo_source)$mo_source_location)
|
new_time <- file.mtime(attributes(AMR_env$mo_source)$mo_source_location)
|
||||||
if (interactive() && !identical(old_time, new_time)) {
|
if (interactive() && !identical(old_time, new_time)) {
|
||||||
# source file was updated, also update reference
|
# source file was updated, also update reference
|
||||||
set_mo_source(attributes(pkg_env$mo_source)$mo_source_location)
|
set_mo_source(attributes(AMR_env$mo_source)$mo_source_location)
|
||||||
}
|
}
|
||||||
pkg_env$mo_source
|
AMR_env$mo_source
|
||||||
}
|
}
|
||||||
|
|
||||||
check_validity_mo_source <- function(x, refer_to_name = "`reference_df`", stop_on_error = TRUE) {
|
check_validity_mo_source <- function(x, refer_to_name = "`reference_df`", stop_on_error = TRUE) {
|
||||||
if (paste(deparse(substitute(x)), collapse = "") == "get_mo_source()") {
|
if (paste(deparse(substitute(x)), collapse = "") == "get_mo_source()") {
|
||||||
return(TRUE)
|
return(TRUE)
|
||||||
}
|
}
|
||||||
if (is.null(pkg_env$mo_source) && (identical(x, get_mo_source()))) {
|
if (is.null(AMR_env$mo_source) && (identical(x, get_mo_source()))) {
|
||||||
return(TRUE)
|
return(TRUE)
|
||||||
}
|
}
|
||||||
if (is.null(x)) {
|
if (is.null(x)) {
|
||||||
|
10
R/rsi.R
10
R/rsi.R
@ -906,8 +906,8 @@ as_rsi_method <- function(method_short,
|
|||||||
}
|
}
|
||||||
|
|
||||||
# write to verbose output
|
# write to verbose output
|
||||||
pkg_env$rsi_interpretation_history <- rbind(
|
AMR_env$rsi_interpretation_history <- rbind(
|
||||||
pkg_env$rsi_interpretation_history,
|
AMR_env$rsi_interpretation_history,
|
||||||
data.frame(
|
data.frame(
|
||||||
datetime = Sys.time(),
|
datetime = Sys.time(),
|
||||||
index = i,
|
index = i,
|
||||||
@ -964,7 +964,7 @@ as_rsi_method <- function(method_short,
|
|||||||
rsi_interpretation_history <- function(clean = FALSE) {
|
rsi_interpretation_history <- function(clean = FALSE) {
|
||||||
meet_criteria(clean, allow_class = "logical", has_length = 1)
|
meet_criteria(clean, allow_class = "logical", has_length = 1)
|
||||||
|
|
||||||
out.bak <- pkg_env$rsi_interpretation_history
|
out.bak <- AMR_env$rsi_interpretation_history
|
||||||
out <- out.bak
|
out <- out.bak
|
||||||
if (NROW(out) == 0) {
|
if (NROW(out) == 0) {
|
||||||
message_("No results to return. Run `as.rsi()` on MIC values or disk diffusion zones first to see a 'logbook' data set here.")
|
message_("No results to return. Run `as.rsi()` on MIC values or disk diffusion zones first to see a 'logbook' data set here.")
|
||||||
@ -975,9 +975,9 @@ rsi_interpretation_history <- function(clean = FALSE) {
|
|||||||
out$interpretation <- as.rsi(out$interpretation)
|
out$interpretation <- as.rsi(out$interpretation)
|
||||||
# keep stored for next use
|
# keep stored for next use
|
||||||
if (isTRUE(clean)) {
|
if (isTRUE(clean)) {
|
||||||
pkg_env$rsi_interpretation_history <- pkg_env$rsi_interpretation_history[0, , drop = FALSE]
|
AMR_env$rsi_interpretation_history <- AMR_env$rsi_interpretation_history[0, , drop = FALSE]
|
||||||
} else {
|
} else {
|
||||||
pkg_env$rsi_interpretation_history <- out.bak
|
AMR_env$rsi_interpretation_history <- out.bak
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pkg_is_available("tibble", also_load = FALSE)) {
|
if (pkg_is_available("tibble", also_load = FALSE)) {
|
||||||
|
19
R/zzz.R
19
R/zzz.R
@ -28,23 +28,24 @@
|
|||||||
# ==================================================================== #
|
# ==================================================================== #
|
||||||
|
|
||||||
# set up package environment, used by numerous AMR functions
|
# set up package environment, used by numerous AMR functions
|
||||||
pkg_env <- new.env(hash = FALSE)
|
AMR_env <- new.env(hash = FALSE)
|
||||||
pkg_env$mo_uncertainties <- data.frame(
|
AMR_env$mo_uncertainties <- data.frame(
|
||||||
uncertainty = integer(0),
|
|
||||||
original_input = character(0),
|
original_input = character(0),
|
||||||
input = character(0),
|
input = character(0),
|
||||||
fullname = character(0),
|
fullname = character(0),
|
||||||
mo = character(0),
|
mo = character(0),
|
||||||
candidates = character(0),
|
candidates = character(0),
|
||||||
|
minimum_matching_score = integer(0),
|
||||||
|
keep_synonyms = logical(0),
|
||||||
stringsAsFactors = FALSE
|
stringsAsFactors = FALSE
|
||||||
)
|
)
|
||||||
pkg_env$mo_renamed <- list()
|
AMR_env$mo_renamed <- list()
|
||||||
pkg_env$mo_previously_coerced <- data.frame(
|
AMR_env$mo_previously_coerced <- data.frame(
|
||||||
x = character(0),
|
x = character(0),
|
||||||
mo = character(0),
|
mo = character(0),
|
||||||
stringsAsFactors = FALSE
|
stringsAsFactors = FALSE
|
||||||
)
|
)
|
||||||
pkg_env$rsi_interpretation_history <- data.frame(
|
AMR_env$rsi_interpretation_history <- data.frame(
|
||||||
datetime = Sys.time()[0],
|
datetime = Sys.time()[0],
|
||||||
index = integer(0),
|
index = integer(0),
|
||||||
ab_input = character(0),
|
ab_input = character(0),
|
||||||
@ -60,7 +61,7 @@ pkg_env$rsi_interpretation_history <- data.frame(
|
|||||||
interpretation = character(0),
|
interpretation = character(0),
|
||||||
stringsAsFactors = FALSE
|
stringsAsFactors = FALSE
|
||||||
)
|
)
|
||||||
pkg_env$has_data.table <- pkg_is_available("data.table", also_load = FALSE)
|
AMR_env$has_data.table <- pkg_is_available("data.table", also_load = FALSE)
|
||||||
|
|
||||||
# determine info icon for messages
|
# determine info icon for messages
|
||||||
utf8_supported <- isTRUE(base::l10n_info()$`UTF-8`)
|
utf8_supported <- isTRUE(base::l10n_info()$`UTF-8`)
|
||||||
@ -69,9 +70,9 @@ is_latex <- tryCatch(import_fn("is_latex_output", "knitr", error_on_fail = FALSE
|
|||||||
)
|
)
|
||||||
if (utf8_supported && !is_latex) {
|
if (utf8_supported && !is_latex) {
|
||||||
# \u2139 is a symbol officially named 'information source'
|
# \u2139 is a symbol officially named 'information source'
|
||||||
pkg_env$info_icon <- "\u2139"
|
AMR_env$info_icon <- "\u2139"
|
||||||
} else {
|
} else {
|
||||||
pkg_env$info_icon <- "i"
|
AMR_env$info_icon <- "i"
|
||||||
}
|
}
|
||||||
|
|
||||||
.onLoad <- function(lib, pkg) {
|
.onLoad <- function(lib, pkg) {
|
||||||
|
@ -109,7 +109,7 @@ read_EUCAST <- function(sheet, file, guideline_name) {
|
|||||||
for (i in seq_len(length(x))) {
|
for (i in seq_len(length(x))) {
|
||||||
y <- trimws2(unlist(strsplit(x[i], "(,|and)")))
|
y <- trimws2(unlist(strsplit(x[i], "(,|and)")))
|
||||||
y <- trimws2(gsub("[(].*[)]", "", y))
|
y <- trimws2(gsub("[(].*[)]", "", y))
|
||||||
y <- suppressWarnings(suppressMessages(as.mo(y, allow_uncertain = FALSE)))
|
y <- suppressWarnings(suppressMessages(as.mo(y)))
|
||||||
if (!is.null(mo_uncertainties())) uncertainties <<- add_uncertainties(uncertainties, mo_uncertainties())
|
if (!is.null(mo_uncertainties())) uncertainties <<- add_uncertainties(uncertainties, mo_uncertainties())
|
||||||
y <- y[!is.na(y) & y != "UNKNOWN"]
|
y <- y[!is.na(y) & y != "UNKNOWN"]
|
||||||
x[i] <- paste(y, collapse = "|")
|
x[i] <- paste(y, collapse = "|")
|
||||||
|
Binary file not shown.
@ -40,7 +40,7 @@ expect_equal(as.character(as.mo("Escherichia coli")), "B_ESCHR_COLI")
|
|||||||
expect_equal(as.character(as.mo(112283007)), "B_ESCHR_COLI")
|
expect_equal(as.character(as.mo(112283007)), "B_ESCHR_COLI")
|
||||||
expect_equal(as.character(as.mo("Escherichia species")), "B_ESCHR")
|
expect_equal(as.character(as.mo("Escherichia species")), "B_ESCHR")
|
||||||
expect_equal(as.character(as.mo("Escherichia")), "B_ESCHR")
|
expect_equal(as.character(as.mo("Escherichia")), "B_ESCHR")
|
||||||
expect_equal(as.character(as.mo("Esch spp.")), "B_ESCHR")
|
expect_equal(as.character(as.mo("Eschr spp.")), "B_ESCHR")
|
||||||
expect_equal(as.character(as.mo(" B_ESCHR_COLI ")), "B_ESCHR_COLI")
|
expect_equal(as.character(as.mo(" B_ESCHR_COLI ")), "B_ESCHR_COLI")
|
||||||
expect_equal(as.character(as.mo("e coli")), "B_ESCHR_COLI") # not Campylobacter
|
expect_equal(as.character(as.mo("e coli")), "B_ESCHR_COLI") # not Campylobacter
|
||||||
expect_equal(as.character(as.mo("klpn")), "B_KLBSL_PNMN")
|
expect_equal(as.character(as.mo("klpn")), "B_KLBSL_PNMN")
|
||||||
@ -53,9 +53,8 @@ expect_equal(as.character(as.mo("Strepto")), "B_STRPT")
|
|||||||
expect_equal(as.character(as.mo("Streptococcus")), "B_STRPT") # not Peptostreptoccus
|
expect_equal(as.character(as.mo("Streptococcus")), "B_STRPT") # not Peptostreptoccus
|
||||||
expect_equal(as.character(as.mo("Estreptococos grupo B")), "B_STRPT_GRPB")
|
expect_equal(as.character(as.mo("Estreptococos grupo B")), "B_STRPT_GRPB")
|
||||||
expect_equal(as.character(as.mo("Group B Streptococci")), "B_STRPT_GRPB")
|
expect_equal(as.character(as.mo("Group B Streptococci")), "B_STRPT_GRPB")
|
||||||
expect_equal(as.character(as.mo(c("mycobacterie", "mycobakterium"))), c("B_MYCBC", "B_MYCBC"))
|
|
||||||
|
|
||||||
expect_equal(as.character(as.mo(c("GAS", "GBS", "a MGS", "haemoly strep"))), c("B_STRPT_GRPA", "B_STRPT_GRPB", "B_STRPT_MILL", "B_STRPT_HAEM"))
|
expect_equal(as.character(as.mo(c("GAS", "GBS", "haemoly strep"))), c("B_STRPT_GRPA", "B_STRPT_GRPB", "B_STRPT_HAEM"))
|
||||||
|
|
||||||
|
|
||||||
expect_equal(as.character(as.mo("S. pyo")), "B_STRPT_PYGN") # not Actinomyces pyogenes
|
expect_equal(as.character(as.mo("S. pyo")), "B_STRPT_PYGN") # not Actinomyces pyogenes
|
||||||
@ -90,14 +89,13 @@ expect_identical(
|
|||||||
"staaur",
|
"staaur",
|
||||||
"S. aureus",
|
"S. aureus",
|
||||||
"S aureus",
|
"S aureus",
|
||||||
"Sthafilokkockus aureeuzz",
|
"Sthafilokkockus aureus",
|
||||||
"Staphylococcus aureus",
|
"Staphylococcus aureus",
|
||||||
"MRSA",
|
"MRSA",
|
||||||
"VISA",
|
"VISA"
|
||||||
"meth.-resis. S. aureus (MRSA)"
|
), minimum_matching_score = 0)
|
||||||
))
|
|
||||||
)),
|
)),
|
||||||
rep("B_STPHY_AURS", 10)
|
rep("B_STPHY_AURS", 9)
|
||||||
)
|
)
|
||||||
expect_identical(
|
expect_identical(
|
||||||
as.character(
|
as.character(
|
||||||
@ -148,8 +146,8 @@ expect_identical(as.character(as.mo("STCPYO", Lancefield = TRUE)), "B_STRPT_GRPA
|
|||||||
expect_identical(as.character(as.mo("S. agalactiae", Lancefield = FALSE)), "B_STRPT_AGLC")
|
expect_identical(as.character(as.mo("S. agalactiae", Lancefield = FALSE)), "B_STRPT_AGLC")
|
||||||
expect_identical(as.character(as.mo("S. agalactiae", Lancefield = TRUE)), "B_STRPT_GRPB") # group B
|
expect_identical(as.character(as.mo("S. agalactiae", Lancefield = TRUE)), "B_STRPT_GRPB") # group B
|
||||||
expect_identical(as.character(suppressWarnings(as.mo("estreptococos grupo B"))), "B_STRPT_GRPB")
|
expect_identical(as.character(suppressWarnings(as.mo("estreptococos grupo B"))), "B_STRPT_GRPB")
|
||||||
expect_identical(as.character(as.mo("S. equisimilis", Lancefield = FALSE)), "B_STRPT_DYSG_EQSM")
|
expect_identical(as.character(as.mo("S. equi", Lancefield = FALSE)), "B_STRPT_EQUI")
|
||||||
expect_identical(as.character(as.mo("S. equisimilis", Lancefield = TRUE)), "B_STRPT_GRPC") # group C
|
expect_identical(as.character(as.mo("S. equi", Lancefield = TRUE)), "B_STRPT_GRPC") # group C
|
||||||
# Enterococci must only be influenced if Lancefield = "all"
|
# Enterococci must only be influenced if Lancefield = "all"
|
||||||
expect_identical(as.character(as.mo("E. faecium", Lancefield = FALSE)), "B_ENTRC_FACM")
|
expect_identical(as.character(as.mo("E. faecium", Lancefield = FALSE)), "B_ENTRC_FACM")
|
||||||
expect_identical(as.character(as.mo("E. faecium", Lancefield = TRUE)), "B_ENTRC_FACM")
|
expect_identical(as.character(as.mo("E. faecium", Lancefield = TRUE)), "B_ENTRC_FACM")
|
||||||
@ -213,19 +211,17 @@ expect_equal(
|
|||||||
|
|
||||||
# check empty values
|
# check empty values
|
||||||
expect_equal(
|
expect_equal(
|
||||||
as.character(suppressWarnings(as.mo(""))),
|
as.character(as.mo("")),
|
||||||
NA_character_
|
NA_character_
|
||||||
)
|
)
|
||||||
|
|
||||||
# check less prevalent MOs
|
# check less prevalent MOs
|
||||||
expect_equal(as.character(as.mo("Gomphosphaeria aponina delicatula")), "B_GMPHS_APNN_DLCT")
|
expect_equal(as.character(as.mo("Actinosynnema pretiosum auranticum")), "B_ANNMA_PRTS_ARNT")
|
||||||
expect_equal(as.character(as.mo("Gomphosphaeria apo del")), "B_GMPHS_APNN_DLCT")
|
expect_equal(as.character(as.mo("Actinosynnema preti aura")), "B_ANNMA_PRTS_ARNT")
|
||||||
expect_equal(as.character(as.mo("G apo deli")), "B_GMPHS_APNN_DLCT")
|
expect_equal(as.character(as.mo("A pre aur")), "B_ANNMA_PRTS_ARNT")
|
||||||
expect_equal(as.character(as.mo("Gomphosphaeria aponina")), "B_GMPHS_APNN")
|
expect_equal(as.character(as.mo("Actinosynnema pretiosum")), "B_ANNMA_PRTS")
|
||||||
expect_equal(as.character(as.mo("Gomphosphaeria species")), "B_GMPHS")
|
expect_equal(as.character(as.mo("Actinosynnema")), "B_ANNMA")
|
||||||
expect_equal(as.character(as.mo("Gomphosphaeria")), "B_GMPHS")
|
expect_equal(as.character(as.mo(" B_ANNMA_PRTS ")), "B_ANNMA_PRTS")
|
||||||
expect_equal(as.character(as.mo(" B_GMPHS_APNN ")), "B_GMPHS_APNN")
|
|
||||||
expect_equal(as.character(as.mo("g aponina")), "B_GMPHS_APNN")
|
|
||||||
|
|
||||||
# check old names
|
# check old names
|
||||||
expect_equal(suppressMessages(as.character(as.mo("Escherichia blattae"))), "B_SHMWL_BLTT")
|
expect_equal(suppressMessages(as.character(as.mo("Escherichia blattae"))), "B_SHMWL_BLTT")
|
||||||
@ -250,7 +246,7 @@ expect_error(as.mo("E. coli", reference_df = data.frame(mycol = "TestingOwnID"))
|
|||||||
|
|
||||||
# combination of existing mo and other code
|
# combination of existing mo and other code
|
||||||
expect_identical(
|
expect_identical(
|
||||||
as.character(as.mo(c("B_ESCHR_COL", "ESCCOL"))),
|
suppressWarnings(as.character(as.mo(c("B_ESCHR_COL", "ESCCOL")))),
|
||||||
c("B_ESCHR_COLI", "B_ESCHR_COLI")
|
c("B_ESCHR_COLI", "B_ESCHR_COLI")
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -274,7 +270,7 @@ expect_equal(
|
|||||||
c("B_MCRBC_PRXY", "B_STRPT_SUIS", "B_RLTLL_TRRG")
|
c("B_MCRBC_PRXY", "B_STRPT_SUIS", "B_RLTLL_TRRG")
|
||||||
)
|
)
|
||||||
expect_stdout(print(mo_uncertainties()))
|
expect_stdout(print(mo_uncertainties()))
|
||||||
x <- as.mo("S. aur")
|
x <- as.mo("Sta. aur")
|
||||||
# many hits
|
# many hits
|
||||||
expect_stdout(print(mo_uncertainties()))
|
expect_stdout(print(mo_uncertainties()))
|
||||||
|
|
||||||
|
@ -4,9 +4,9 @@
|
|||||||
\alias{as.mo}
|
\alias{as.mo}
|
||||||
\alias{mo}
|
\alias{mo}
|
||||||
\alias{is.mo}
|
\alias{is.mo}
|
||||||
\alias{mo_failures}
|
|
||||||
\alias{mo_uncertainties}
|
\alias{mo_uncertainties}
|
||||||
\alias{mo_renamed}
|
\alias{mo_renamed}
|
||||||
|
\alias{mo_failures}
|
||||||
\alias{mo_reset_session}
|
\alias{mo_reset_session}
|
||||||
\alias{mo_cleaning_regex}
|
\alias{mo_cleaning_regex}
|
||||||
\title{Transform Input to a Microorganism Code}
|
\title{Transform Input to a Microorganism Code}
|
||||||
@ -27,12 +27,12 @@ as.mo(
|
|||||||
|
|
||||||
is.mo(x)
|
is.mo(x)
|
||||||
|
|
||||||
mo_failures()
|
|
||||||
|
|
||||||
mo_uncertainties()
|
mo_uncertainties()
|
||||||
|
|
||||||
mo_renamed()
|
mo_renamed()
|
||||||
|
|
||||||
|
mo_failures()
|
||||||
|
|
||||||
mo_reset_session()
|
mo_reset_session()
|
||||||
|
|
||||||
mo_cleaning_regex()
|
mo_cleaning_regex()
|
||||||
|
@ -3,9 +3,9 @@
|
|||||||
\docType{data}
|
\docType{data}
|
||||||
\name{microorganisms.codes}
|
\name{microorganisms.codes}
|
||||||
\alias{microorganisms.codes}
|
\alias{microorganisms.codes}
|
||||||
\title{Data Set with 5,508 Common Microorganism Codes}
|
\title{Data Set with 5,411 Common Microorganism Codes}
|
||||||
\format{
|
\format{
|
||||||
A \link[tibble:tibble]{tibble} with 5,508 observations and 2 variables:
|
A \link[tibble:tibble]{tibble} with 5,411 observations and 2 variables:
|
||||||
\itemize{
|
\itemize{
|
||||||
\item \code{code}\cr Commonly used code of a microorganism
|
\item \code{code}\cr Commonly used code of a microorganism
|
||||||
\item \code{mo}\cr ID of the microorganism in the \link{microorganisms} data set
|
\item \code{mo}\cr ID of the microorganism in the \link{microorganisms} data set
|
||||||
|
@ -261,7 +261,7 @@ mo_property(
|
|||||||
|
|
||||||
\item{keep_synonyms}{a \link{logical} to indicate if old, previously valid taxonomic names must be preserved and not be corrected to currently accepted names. The default is \code{FALSE}, which will return a note if old taxonomic names were processed. The default can be set with \code{options(AMR_keep_synonyms = TRUE)} or \code{options(AMR_keep_synonyms = FALSE)}.}
|
\item{keep_synonyms}{a \link{logical} to indicate if old, previously valid taxonomic names must be preserved and not be corrected to currently accepted names. The default is \code{FALSE}, which will return a note if old taxonomic names were processed. The default can be set with \code{options(AMR_keep_synonyms = TRUE)} or \code{options(AMR_keep_synonyms = FALSE)}.}
|
||||||
|
|
||||||
\item{...}{other arguments passed on to \code{\link[=as.mo]{as.mo()}}, such as 'allow_uncertain' and 'ignore_pattern'}
|
\item{...}{other arguments passed on to \code{\link[=as.mo]{as.mo()}}, such as 'minimum_matching_score', 'ignore_pattern', and 'remove_from_input'}
|
||||||
|
|
||||||
\item{ab}{any (vector of) text that can be coerced to a valid antibiotic code with \code{\link[=as.ab]{as.ab()}}}
|
\item{ab}{any (vector of) text that can be coerced to a valid antibiotic code with \code{\link[=as.ab]{as.ab()}}}
|
||||||
|
|
||||||
@ -285,8 +285,8 @@ Use these functions to return a specific property of a microorganism based on th
|
|||||||
All functions will, at default, keep old taxonomic properties. Please refer to this example, knowing that \emph{Escherichia blattae} was renamed to \emph{Shimwellia blattae} in 2010:
|
All functions will, at default, keep old taxonomic properties. Please refer to this example, knowing that \emph{Escherichia blattae} was renamed to \emph{Shimwellia blattae} in 2010:
|
||||||
\itemize{
|
\itemize{
|
||||||
\item \code{mo_name("Escherichia blattae")} will return \code{"Shimwellia blattae"} (with a message about the renaming)
|
\item \code{mo_name("Escherichia blattae")} will return \code{"Shimwellia blattae"} (with a message about the renaming)
|
||||||
\item \code{mo_ref("Escherichia blattae")} will return \code{"Burgess et al., 1973"} (with a message about the renaming)
|
\item \code{mo_ref("Escherichia blattae", keep_synonyms = TRUE)} will return \code{"Burgess et al., 1973"} (with a warning about the renaming)
|
||||||
\item \code{mo_ref("Shimwellia blattae")} will return \code{"Priest et al., 2010"} (without a message)
|
\item \code{mo_ref("Shimwellia blattae", keep_synonyms = FALSE)} will return \code{"Priest et al., 2010"} (without a message)
|
||||||
}
|
}
|
||||||
|
|
||||||
The short name - \code{\link[=mo_shortname]{mo_shortname()}} - almost always returns the first character of the genus and the full species, like \code{"E. coli"}. Exceptions are abbreviations of staphylococci (such as \emph{"CoNS"}, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (such as \emph{"GBS"}, Group B Streptococci). Please bear in mind that e.g. \emph{E. coli} could mean \emph{Escherichia coli} (kingdom of Bacteria) as well as \emph{Entamoeba coli} (kingdom of Protozoa). Returning to the full name will be done using \code{\link[=as.mo]{as.mo()}} internally, giving priority to bacteria and human pathogens, i.e. \code{"E. coli"} will be considered \emph{Escherichia coli}. In other words, \code{mo_fullname(mo_shortname("Entamoeba coli"))} returns \code{"Escherichia coli"}.
|
The short name - \code{\link[=mo_shortname]{mo_shortname()}} - almost always returns the first character of the genus and the full species, like \code{"E. coli"}. Exceptions are abbreviations of staphylococci (such as \emph{"CoNS"}, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (such as \emph{"GBS"}, Group B Streptococci). Please bear in mind that e.g. \emph{E. coli} could mean \emph{Escherichia coli} (kingdom of Bacteria) as well as \emph{Entamoeba coli} (kingdom of Protozoa). Returning to the full name will be done using \code{\link[=as.mo]{as.mo()}} internally, giving priority to bacteria and human pathogens, i.e. \code{"E. coli"} will be considered \emph{Escherichia coli}. In other words, \code{mo_fullname(mo_shortname("Entamoeba coli"))} returns \code{"Escherichia coli"}.
|
||||||
|
Loading…
Reference in New Issue
Block a user