mirror of
https://github.com/msberends/AMR.git
synced 2025-07-09 04:02:19 +02:00
(v1.5.0.9014) only_rsi_columns, is.rsi.eligible improvement
This commit is contained in:
@ -1,6 +1,6 @@
|
||||
# ==================================================================== #
|
||||
# TITLE #
|
||||
# Antimicrobial Resistance (AMR) Analysis for R #
|
||||
# Antimicrobial Resistance (AMR) Data Analysis for R #
|
||||
# #
|
||||
# SOURCE #
|
||||
# https://github.com/msberends/AMR #
|
||||
@ -20,7 +20,7 @@
|
||||
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
|
||||
# #
|
||||
# Visit our website for the full manual and a complete tutorial about #
|
||||
# how to conduct AMR analysis: https://msberends.github.io/AMR/ #
|
||||
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
|
||||
# ==================================================================== #
|
||||
|
||||
#' Guess Antibiotic Column
|
||||
@ -30,6 +30,7 @@
|
||||
#' @param x a [data.frame]
|
||||
#' @param search_string a text to search `x` for, will be checked with [as.ab()] if this value is not a column in `x`
|
||||
#' @param verbose a logical to indicate whether additional info should be printed
|
||||
#' @param only_rsi_columns a logical to indicate whether only antibiotic columns must be detected that were [transformed to class `<rsi>`]([rsi]) on beforehand. Defaults to `TRUE` if any column of `x` is of class `<rsi>`.
|
||||
#' @details You can look for an antibiotic (trade) name or abbreviation and it will search `x` and the [antibiotics] data set for any column containing a name or code of that antibiotic. **Longer columns names take precedence over shorter column names.**
|
||||
#' @return A column name of `x`, or `NULL` when no result is found.
|
||||
#' @export
|
||||
@ -62,35 +63,20 @@
|
||||
#' AMP_ED20 = "S")
|
||||
#' guess_ab_col(df, "ampicillin")
|
||||
#' # [1] "AMP_ED20"
|
||||
guess_ab_col <- function(x = NULL, search_string = NULL, verbose = FALSE) {
|
||||
guess_ab_col <- function(x = NULL, search_string = NULL, verbose = FALSE, only_rsi_columns = any(is.rsi(x))) {
|
||||
meet_criteria(x, allow_class = "data.frame", allow_NULL = TRUE)
|
||||
meet_criteria(search_string, allow_class = "character", has_length = 1, allow_NULL = TRUE)
|
||||
meet_criteria(verbose, allow_class = "logical", has_length = 1)
|
||||
|
||||
if (is.null(x) & is.null(search_string)) {
|
||||
return(as.name("guess_ab_col"))
|
||||
} else {
|
||||
meet_criteria(search_string, allow_class = "character", has_length = 1, allow_NULL = FALSE)
|
||||
}
|
||||
|
||||
if (search_string %in% colnames(x)) {
|
||||
ab_result <- search_string
|
||||
} else {
|
||||
search_string.ab <- suppressWarnings(as.ab(search_string))
|
||||
if (search_string.ab %in% colnames(x)) {
|
||||
ab_result <- colnames(x)[colnames(x) == search_string.ab][1L]
|
||||
|
||||
} else if (any(tolower(colnames(x)) %in% tolower(unlist(ab_property(search_string.ab, "abbreviations", language = NULL))))) {
|
||||
ab_result <- colnames(x)[tolower(colnames(x)) %in% tolower(unlist(ab_property(search_string.ab, "abbreviations", language = NULL)))][1L]
|
||||
|
||||
} else {
|
||||
# sort colnames on length - longest first
|
||||
cols <- colnames(x[, x %pm>% colnames() %pm>% nchar() %pm>% order() %pm>% rev()])
|
||||
df_trans <- data.frame(cols = cols,
|
||||
abs = suppressWarnings(as.ab(cols)),
|
||||
stringsAsFactors = FALSE)
|
||||
ab_result <- df_trans[which(df_trans$abs == search_string.ab), "cols"]
|
||||
ab_result <- ab_result[!is.na(ab_result)][1L]
|
||||
}
|
||||
}
|
||||
all_found <- get_column_abx(x, info = verbose, only_rsi_columns = only_rsi_columns, verbose = verbose)
|
||||
search_string.ab <- suppressWarnings(as.ab(search_string))
|
||||
ab_result <- unname(all_found[names(all_found) == search_string.ab])
|
||||
|
||||
if (length(ab_result) == 0) {
|
||||
if (verbose == TRUE) {
|
||||
@ -114,18 +100,24 @@ get_column_abx <- function(x,
|
||||
hard_dependencies = NULL,
|
||||
verbose = FALSE,
|
||||
info = TRUE,
|
||||
only_rsi_columns = FALSE,
|
||||
...) {
|
||||
meet_criteria(x, allow_class = "data.frame")
|
||||
meet_criteria(soft_dependencies, allow_class = "character", allow_NULL = TRUE)
|
||||
meet_criteria(hard_dependencies, allow_class = "character", allow_NULL = TRUE)
|
||||
meet_criteria(verbose, allow_class = "logical", has_length = 1)
|
||||
meet_criteria(info, allow_class = "logical", has_length = 1)
|
||||
meet_criteria(only_rsi_columns, allow_class = "logical", has_length = 1)
|
||||
|
||||
if (info == TRUE) {
|
||||
message_("Auto-guessing columns suitable for analysis", appendLF = FALSE, as_note = FALSE)
|
||||
}
|
||||
|
||||
x <- as.data.frame(x, stringsAsFactors = FALSE)
|
||||
if (only_rsi_columns == TRUE) {
|
||||
x <- x[, which(is.rsi(x)), drop = FALSE]
|
||||
}
|
||||
|
||||
if (NROW(x) > 10000) {
|
||||
# only test maximum of 10,000 values per column
|
||||
if (info == TRUE) {
|
||||
@ -141,21 +133,23 @@ get_column_abx <- function(x,
|
||||
# only check columns that are a valid AB code, ATC code, name, abbreviation or synonym,
|
||||
# or already have the <rsi> class (as.rsi)
|
||||
# and that they have no more than 50% invalid values
|
||||
vectr_antibiotics <- unique(toupper(unlist(antibiotics[, c("ab", "atc", "name", "abbreviations", "synonyms")])))
|
||||
vectr_antibiotics <- unlist(AB_lookup$generalised_all)
|
||||
vectr_antibiotics <- vectr_antibiotics[!is.na(vectr_antibiotics) & nchar(vectr_antibiotics) >= 3]
|
||||
x_columns <- vapply(FUN.VALUE = character(1), colnames(x), function(col, df = x) {
|
||||
if (toupper(col) %in% vectr_antibiotics ||
|
||||
is.rsi(x[, col, drop = TRUE]) ||
|
||||
is.rsi.eligible(x[, col, drop = TRUE], threshold = 0.5)
|
||||
) {
|
||||
return(col)
|
||||
} else {
|
||||
return(NA_character_)
|
||||
}
|
||||
})
|
||||
x_columns <- vapply(FUN.VALUE = character(1),
|
||||
colnames(x),
|
||||
function(col, df = x) {
|
||||
if (generalise_antibiotic_name(col) %in% vectr_antibiotics ||
|
||||
is.rsi(x[, col, drop = TRUE]) ||
|
||||
is.rsi.eligible(x[, col, drop = TRUE], threshold = 0.5)
|
||||
) {
|
||||
return(col)
|
||||
} else {
|
||||
return(NA_character_)
|
||||
}
|
||||
})
|
||||
|
||||
x_columns <- x_columns[!is.na(x_columns)]
|
||||
x <- x[, x_columns, drop = FALSE] # without drop = TRUE, x will become a vector when x_columns is length 1
|
||||
x <- x[, x_columns, drop = FALSE] # without drop = FALSE, x will become a vector when x_columns is length 1
|
||||
df_trans <- data.frame(colnames = colnames(x),
|
||||
abcode = suppressWarnings(as.ab(colnames(x), info = FALSE)),
|
||||
stringsAsFactors = FALSE)
|
||||
@ -164,7 +158,7 @@ get_column_abx <- function(x,
|
||||
names(x) <- df_trans$abcode
|
||||
|
||||
# add from self-defined dots (...):
|
||||
# such as get_column_abx(example_isolates %pm>% rename(thisone = AMX), amox = "thisone")
|
||||
# such as get_column_abx(example_isolates %>% rename(thisone = AMX), amox = "thisone")
|
||||
dots <- list(...)
|
||||
if (length(dots) > 0) {
|
||||
newnames <- suppressWarnings(as.ab(names(dots), info = FALSE))
|
||||
|
Reference in New Issue
Block a user