mirror of
https://github.com/msberends/AMR.git
synced 2025-07-09 19:01:51 +02:00
styled, unit test fix
This commit is contained in:
181
R/guess_ab_col.R
181
R/guess_ab_col.R
@ -9,7 +9,7 @@
|
||||
# (c) 2018-2022 Berends MS, Luz CF et al. #
|
||||
# Developed at the University of Groningen, the Netherlands, in #
|
||||
# collaboration with non-profit organisations Certe Medical #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# #
|
||||
# This R package is free software; you can freely use and distribute #
|
||||
# it for both personal and commercial purposes under the terms of the #
|
||||
@ -34,8 +34,10 @@
|
||||
#' @return A column name of `x`, or `NULL` when no result is found.
|
||||
#' @export
|
||||
#' @examples
|
||||
#' df <- data.frame(amox = "S",
|
||||
#' tetr = "R")
|
||||
#' df <- data.frame(
|
||||
#' amox = "S",
|
||||
#' tetr = "R"
|
||||
#' )
|
||||
#'
|
||||
#' guess_ab_col(df, "amoxicillin")
|
||||
#' # [1] "amox"
|
||||
@ -47,8 +49,10 @@
|
||||
#' # [1] "tetr"
|
||||
#'
|
||||
#' # WHONET codes
|
||||
#' df <- data.frame(AMP_ND10 = "R",
|
||||
#' AMC_ED20 = "S")
|
||||
#' df <- data.frame(
|
||||
#' AMP_ND10 = "R",
|
||||
#' AMC_ED20 = "S"
|
||||
#' )
|
||||
#' guess_ab_col(df, "ampicillin")
|
||||
#' # [1] "AMP_ND10"
|
||||
#' guess_ab_col(df, "J01CR02")
|
||||
@ -57,8 +61,10 @@
|
||||
#' # [1] "AMC_ED20"
|
||||
#'
|
||||
#' # Longer names take precendence:
|
||||
#' df <- data.frame(AMP_ED2 = "S",
|
||||
#' AMP_ED20 = "S")
|
||||
#' df <- data.frame(
|
||||
#' AMP_ED2 = "S",
|
||||
#' AMP_ED20 = "S"
|
||||
#' )
|
||||
#' guess_ab_col(df, "ampicillin")
|
||||
#' # [1] "AMP_ED20"
|
||||
guess_ab_col <- function(x = NULL, search_string = NULL, verbose = FALSE, only_rsi_columns = FALSE) {
|
||||
@ -66,30 +72,35 @@ guess_ab_col <- function(x = NULL, search_string = NULL, verbose = FALSE, only_r
|
||||
meet_criteria(search_string, allow_class = "character", has_length = 1, allow_NULL = TRUE)
|
||||
meet_criteria(verbose, allow_class = "logical", has_length = 1)
|
||||
meet_criteria(only_rsi_columns, allow_class = "logical", has_length = 1)
|
||||
|
||||
|
||||
if (is.null(x) & is.null(search_string)) {
|
||||
return(as.name("guess_ab_col"))
|
||||
} else {
|
||||
meet_criteria(search_string, allow_class = "character", has_length = 1, allow_NULL = FALSE)
|
||||
}
|
||||
|
||||
all_found <- get_column_abx(x, info = verbose, only_rsi_columns = only_rsi_columns,
|
||||
verbose = verbose, fn = "guess_ab_col")
|
||||
|
||||
all_found <- get_column_abx(x,
|
||||
info = verbose, only_rsi_columns = only_rsi_columns,
|
||||
verbose = verbose, fn = "guess_ab_col"
|
||||
)
|
||||
search_string.ab <- suppressWarnings(as.ab(search_string))
|
||||
ab_result <- unname(all_found[names(all_found) == search_string.ab])
|
||||
|
||||
|
||||
if (length(ab_result) == 0) {
|
||||
if (verbose == TRUE) {
|
||||
message_("No column found as input for ", search_string,
|
||||
" (", ab_name(search_string, language = NULL, tolower = TRUE), ").",
|
||||
add_fn = font_black,
|
||||
as_note = FALSE)
|
||||
" (", ab_name(search_string, language = NULL, tolower = TRUE), ").",
|
||||
add_fn = font_black,
|
||||
as_note = FALSE
|
||||
)
|
||||
}
|
||||
return(NULL)
|
||||
} else {
|
||||
if (verbose == TRUE) {
|
||||
message_("Using column '", font_bold(ab_result), "' as input for ", search_string,
|
||||
" (", ab_name(search_string, language = NULL, tolower = TRUE), ").")
|
||||
message_(
|
||||
"Using column '", font_bold(ab_result), "' as input for ", search_string,
|
||||
" (", ab_name(search_string, language = NULL, tolower = TRUE), ")."
|
||||
)
|
||||
}
|
||||
return(ab_result)
|
||||
}
|
||||
@ -106,16 +117,20 @@ get_column_abx <- function(x,
|
||||
reuse_previous_result = TRUE,
|
||||
fn = NULL) {
|
||||
# check if retrieved before, then get it from package environment
|
||||
if (isTRUE(reuse_previous_result) && identical(unique_call_id(entire_session = FALSE,
|
||||
match_fn = fn),
|
||||
pkg_env$get_column_abx.call)) {
|
||||
if (isTRUE(reuse_previous_result) && identical(
|
||||
unique_call_id(
|
||||
entire_session = FALSE,
|
||||
match_fn = fn
|
||||
),
|
||||
pkg_env$get_column_abx.call
|
||||
)) {
|
||||
# so within the same call, within the same environment, we got here again.
|
||||
# but we could've come from another function within the same call, so now only check the columns that changed
|
||||
|
||||
|
||||
# first remove the columns that are not existing anymore
|
||||
previous <- pkg_env$get_column_abx.out
|
||||
current <- previous[previous %in% colnames(x)]
|
||||
|
||||
|
||||
# then compare columns in current call with columns in original call
|
||||
new_cols <- colnames(x)[!colnames(x) %in% pkg_env$get_column_abx.checked_cols]
|
||||
if (length(new_cols) > 0) {
|
||||
@ -125,7 +140,7 @@ get_column_abx <- function(x,
|
||||
# order according to columns in current call
|
||||
current <- current[match(colnames(x)[colnames(x) %in% current], current)]
|
||||
}
|
||||
|
||||
|
||||
# update pkg environment to improve speed on next run
|
||||
pkg_env$get_column_abx.out <- current
|
||||
pkg_env$get_column_abx.checked_cols <- colnames(x)
|
||||
@ -133,7 +148,7 @@ get_column_abx <- function(x,
|
||||
# and return right values
|
||||
return(pkg_env$get_column_abx.out)
|
||||
}
|
||||
|
||||
|
||||
meet_criteria(x, allow_class = "data.frame")
|
||||
meet_criteria(soft_dependencies, allow_class = "character", allow_NULL = TRUE)
|
||||
meet_criteria(hard_dependencies, allow_class = "character", allow_NULL = TRUE)
|
||||
@ -141,11 +156,11 @@ get_column_abx <- function(x,
|
||||
meet_criteria(info, allow_class = "logical", has_length = 1)
|
||||
meet_criteria(only_rsi_columns, allow_class = "logical", has_length = 1)
|
||||
meet_criteria(sort, allow_class = "logical", has_length = 1)
|
||||
|
||||
|
||||
if (info == TRUE) {
|
||||
message_("Auto-guessing columns suitable for analysis", appendLF = FALSE, as_note = FALSE)
|
||||
}
|
||||
|
||||
|
||||
x <- as.data.frame(x, stringsAsFactors = FALSE)
|
||||
x.bak <- x
|
||||
if (only_rsi_columns == TRUE) {
|
||||
@ -156,8 +171,9 @@ get_column_abx <- function(x,
|
||||
# only test maximum of 10,000 values per column
|
||||
if (info == TRUE) {
|
||||
message_(" (using only ", font_bold("the first 10,000 rows"), ")...",
|
||||
appendLF = FALSE,
|
||||
as_note = FALSE)
|
||||
appendLF = FALSE,
|
||||
as_note = FALSE
|
||||
)
|
||||
}
|
||||
x <- x[1:10000, , drop = FALSE]
|
||||
} else if (info == TRUE) {
|
||||
@ -165,32 +181,36 @@ get_column_abx <- function(x,
|
||||
}
|
||||
|
||||
# only check columns that are a valid AB code, ATC code, name, abbreviation or synonym,
|
||||
# or already have the <rsi> class (as.rsi)
|
||||
# or already have the <rsi> class (as.rsi)
|
||||
# and that they have no more than 50% invalid values
|
||||
vectr_antibiotics <- unlist(AB_lookup$generalised_all)
|
||||
vectr_antibiotics <- vectr_antibiotics[!is.na(vectr_antibiotics) & nchar(vectr_antibiotics) >= 3]
|
||||
x_columns <- vapply(FUN.VALUE = character(1),
|
||||
colnames(x),
|
||||
function(col, df = x) {
|
||||
if (generalise_antibiotic_name(col) %in% vectr_antibiotics ||
|
||||
is.rsi(x[, col, drop = TRUE]) ||
|
||||
is.rsi.eligible(x[, col, drop = TRUE], threshold = 0.5)
|
||||
) {
|
||||
return(col)
|
||||
} else {
|
||||
return(NA_character_)
|
||||
}
|
||||
}, USE.NAMES = FALSE)
|
||||
|
||||
x_columns <- vapply(
|
||||
FUN.VALUE = character(1),
|
||||
colnames(x),
|
||||
function(col, df = x) {
|
||||
if (generalise_antibiotic_name(col) %in% vectr_antibiotics ||
|
||||
is.rsi(x[, col, drop = TRUE]) ||
|
||||
is.rsi.eligible(x[, col, drop = TRUE], threshold = 0.5)
|
||||
) {
|
||||
return(col)
|
||||
} else {
|
||||
return(NA_character_)
|
||||
}
|
||||
}, USE.NAMES = FALSE
|
||||
)
|
||||
|
||||
x_columns <- x_columns[!is.na(x_columns)]
|
||||
x <- x[, x_columns, drop = FALSE] # without drop = FALSE, x will become a vector when x_columns is length 1
|
||||
df_trans <- data.frame(colnames = colnames(x),
|
||||
abcode = suppressWarnings(as.ab(colnames(x), info = FALSE)),
|
||||
stringsAsFactors = FALSE)
|
||||
df_trans <- data.frame(
|
||||
colnames = colnames(x),
|
||||
abcode = suppressWarnings(as.ab(colnames(x), info = FALSE)),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
df_trans <- df_trans[!is.na(df_trans$abcode), , drop = FALSE]
|
||||
out <- as.character(df_trans$colnames)
|
||||
names(out) <- df_trans$abcode
|
||||
|
||||
|
||||
# add from self-defined dots (...):
|
||||
# such as get_column_abx(example_isolates %>% rename(thisone = AMX), amox = "thisone")
|
||||
all_okay <- TRUE
|
||||
@ -204,8 +224,9 @@ get_column_abx <- function(x,
|
||||
message_(" WARNING", add_fn = list(font_yellow, font_bold), as_note = FALSE)
|
||||
}
|
||||
warning_("Invalid antibiotic reference(s): ", vector_and(names(dots)[is.na(newnames)], quotes = FALSE),
|
||||
call = FALSE,
|
||||
immediate = TRUE)
|
||||
call = FALSE,
|
||||
immediate = TRUE
|
||||
)
|
||||
all_okay <- FALSE
|
||||
}
|
||||
unexisting_cols <- which(!vapply(FUN.VALUE = logical(1), dots, function(col) all(col %in% x_columns)))
|
||||
@ -214,7 +235,8 @@ get_column_abx <- function(x,
|
||||
message_(" ERROR", add_fn = list(font_red, font_bold), as_note = FALSE)
|
||||
}
|
||||
stop_("Column(s) not found: ", vector_and(unlist(dots[[unexisting_cols]]), quotes = FALSE),
|
||||
call = FALSE)
|
||||
call = FALSE
|
||||
)
|
||||
all_okay <- FALSE
|
||||
}
|
||||
# turn all NULLs to NAs
|
||||
@ -226,7 +248,7 @@ get_column_abx <- function(x,
|
||||
# delete NAs, this will make e.g. eucast_rules(... TMP = NULL) work to prevent TMP from being used
|
||||
out <- out[!is.na(out)]
|
||||
}
|
||||
|
||||
|
||||
if (length(out) == 0) {
|
||||
if (info == TRUE & all_okay == TRUE) {
|
||||
message_("No columns found.")
|
||||
@ -236,7 +258,7 @@ get_column_abx <- function(x,
|
||||
pkg_env$get_column_abx.out <- out
|
||||
return(out)
|
||||
}
|
||||
|
||||
|
||||
# sort on name
|
||||
if (sort == TRUE) {
|
||||
out <- out[order(names(out), out)]
|
||||
@ -246,7 +268,7 @@ get_column_abx <- function(x,
|
||||
if (length(duplicates) > 0) {
|
||||
all_okay <- FALSE
|
||||
}
|
||||
|
||||
|
||||
if (info == TRUE) {
|
||||
if (all_okay == TRUE) {
|
||||
message_(" OK.", add_fn = list(font_green, font_bold), as_note = FALSE)
|
||||
@ -255,27 +277,32 @@ get_column_abx <- function(x,
|
||||
}
|
||||
for (i in seq_len(length(out))) {
|
||||
if (verbose == TRUE & !names(out[i]) %in% names(duplicates)) {
|
||||
message_("Using column '", font_bold(out[i]), "' as input for ", names(out)[i],
|
||||
" (", ab_name(names(out)[i], tolower = TRUE, language = NULL), ").")
|
||||
message_(
|
||||
"Using column '", font_bold(out[i]), "' as input for ", names(out)[i],
|
||||
" (", ab_name(names(out)[i], tolower = TRUE, language = NULL), ")."
|
||||
)
|
||||
}
|
||||
if (names(out[i]) %in% names(duplicates)) {
|
||||
already_set_as <- out[unname(out) == unname(out[i])][1L]
|
||||
warning_(paste0("Column '", font_bold(out[i]), "' will not be used for ",
|
||||
names(out)[i], " (", ab_name(names(out)[i], tolower = TRUE, language = NULL), ")",
|
||||
", as it is already set for ",
|
||||
names(already_set_as), " (", ab_name(names(already_set_as), tolower = TRUE, language = NULL), ")"),
|
||||
add_fn = font_red,
|
||||
immediate = verbose)
|
||||
warning_(paste0(
|
||||
"Column '", font_bold(out[i]), "' will not be used for ",
|
||||
names(out)[i], " (", ab_name(names(out)[i], tolower = TRUE, language = NULL), ")",
|
||||
", as it is already set for ",
|
||||
names(already_set_as), " (", ab_name(names(already_set_as), tolower = TRUE, language = NULL), ")"
|
||||
),
|
||||
add_fn = font_red,
|
||||
immediate = verbose
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
out <- out[!duplicated(names(out))]
|
||||
out <- out[!duplicated(unname(out))]
|
||||
if (sort == TRUE) {
|
||||
out <- out[order(names(out), out)]
|
||||
}
|
||||
|
||||
|
||||
if (!is.null(hard_dependencies)) {
|
||||
hard_dependencies <- unique(hard_dependencies)
|
||||
if (!all(hard_dependencies %in% names(out))) {
|
||||
@ -290,14 +317,19 @@ get_column_abx <- function(x,
|
||||
if (info == TRUE & !all(soft_dependencies %in% names(out))) {
|
||||
# missing a soft dependency may lower the reliability
|
||||
missing <- soft_dependencies[!soft_dependencies %in% names(out)]
|
||||
missing_msg <- vector_and(paste0(ab_name(missing, tolower = TRUE, language = NULL),
|
||||
" (", font_bold(missing, collapse = NULL), ")"),
|
||||
quotes = FALSE)
|
||||
message_("Reliability would be improved if these antimicrobial results would be available too: ",
|
||||
missing_msg)
|
||||
missing_msg <- vector_and(paste0(
|
||||
ab_name(missing, tolower = TRUE, language = NULL),
|
||||
" (", font_bold(missing, collapse = NULL), ")"
|
||||
),
|
||||
quotes = FALSE
|
||||
)
|
||||
message_(
|
||||
"Reliability would be improved if these antimicrobial results would be available too: ",
|
||||
missing_msg
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pkg_env$get_column_abx.call <- unique_call_id(entire_session = FALSE, match_fn = fn)
|
||||
pkg_env$get_column_abx.checked_cols <- colnames(x.bak)
|
||||
pkg_env$get_column_abx.out <- out
|
||||
@ -306,12 +338,12 @@ get_column_abx <- function(x,
|
||||
|
||||
get_ab_from_namespace <- function(x, cols_ab) {
|
||||
# cols_ab comes from get_column_abx()
|
||||
|
||||
|
||||
x <- trimws(unique(toupper(unlist(strsplit(x, ",")))))
|
||||
x_new <- character()
|
||||
for (val in x) {
|
||||
if (paste0("AB_", val) %in% ls(envir = asNamespace("AMR"))) {
|
||||
# antibiotic group names, as defined in data-raw/pre-commit-hook.R, such as `AB_CARBAPENEMS`
|
||||
# antibiotic group names, as defined in data-raw/_pre_commit_hook.R, such as `AB_CARBAPENEMS`
|
||||
val <- eval(parse(text = paste0("AB_", val)), envir = asNamespace("AMR"))
|
||||
} else if (val %in% AB_lookup$ab) {
|
||||
# separate drugs, such as `AMX`
|
||||
@ -333,7 +365,10 @@ generate_warning_abs_missing <- function(missing, any = FALSE) {
|
||||
} else {
|
||||
any_txt <- c("", "are")
|
||||
}
|
||||
warning_(paste0("Introducing NAs since", any_txt[1], " these antimicrobials ", any_txt[2], " required: ",
|
||||
vector_and(missing, quotes = FALSE)),
|
||||
immediate = TRUE)
|
||||
warning_(paste0(
|
||||
"Introducing NAs since", any_txt[1], " these antimicrobials ", any_txt[2], " required: ",
|
||||
vector_and(missing, quotes = FALSE)
|
||||
),
|
||||
immediate = TRUE
|
||||
)
|
||||
}
|
||||
|
Reference in New Issue
Block a user