1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-08 16:42:10 +02:00

(v1.5.0.9008) Internal data sets to pkg, speed for auto col determination

This commit is contained in:
2021-01-22 10:20:41 +01:00
parent 27f084d819
commit 1ba44776a1
87 changed files with 408 additions and 292 deletions

View File

@ -585,7 +585,7 @@ get_current_data <- function(arg_name, call) {
stop_("argument `", arg_name, "` is missing with no default", call = call)
}
}
# try a (base R) method, by going over the complete system call stack with sys.frames()
not_set <- TRUE
frms <- lapply(sys.frames(), function(el) {
@ -615,6 +615,7 @@ get_current_data <- function(arg_name, call) {
NULL
}
})
vars_df <- tryCatch(frms[[which(!vapply(FUN.VALUE = logical(1), frms, is.null))]], error = function(e) NULL)
if (is.data.frame(vars_df)) {
return(vars_df)

View File

@ -173,7 +173,15 @@ ab_selector <- function(ab_class, function_name) {
}
vars_df <- get_current_data(arg_name = NA, call = -3)
ab_in_data <- get_column_abx(vars_df, info = FALSE)
# improve speed here so it will only run once when e.g. in one select call
if (!identical(pkg_env$ab_selector, unique_call_id())) {
ab_in_data <- get_column_abx(vars_df, info = FALSE)
pkg_env$ab_selector <- unique_call_id()
pkg_env$ab_selector_cols <- ab_in_data
} else {
ab_in_data <- pkg_env$ab_selector_cols
}
if (length(ab_in_data) == 0) {
message_("No antimicrobial agents found.")
@ -199,13 +207,14 @@ ab_selector <- function(ab_class, function_name) {
} else {
agents_formatted <- paste0("column '", font_bold(agents, collapse = NULL), "'")
agents_names <- ab_name(names(agents), tolower = TRUE, language = NULL)
agents_formatted[agents != agents_names] <- paste0(agents_formatted[agents != agents_names],
" (", agents_names[agents != agents_names], ")")
need_name <- tolower(agents) != tolower(agents_names)
agents_formatted[need_name] <- paste0(agents_formatted[need_name],
" (", agents_names[need_name], ")")
message_("Selecting ", ab_group, ": ", paste(agents_formatted, collapse = ", "),
as_note = FALSE,
extra_indent = nchar(paste0("Selecting ", ab_group, ": ")))
}
remember_thrown_message(function_name)
}
remember_thrown_message(function_name)
}
unname(agents)
}

View File

@ -1049,6 +1049,7 @@ eucast_rules <- function(x,
warn_lacking_rsi_class <- unique(warn_lacking_rsi_class)
warning_("Not all columns with antimicrobial results are of class <rsi>. Transform them on beforehand, with e.g.:\n",
" ", x_deparsed, " %>% mutate_if(is.rsi.eligible, as.rsi)\n",
" ", x_deparsed, " %>% mutate(across((is.rsi.eligible), as.rsi))\n",
" ", x_deparsed, " %>% as.rsi(", ifelse(length(warn_lacking_rsi_class) == 1,
warn_lacking_rsi_class,
paste0(warn_lacking_rsi_class[1], ":", warn_lacking_rsi_class[length(warn_lacking_rsi_class)])),

View File

@ -137,25 +137,25 @@ get_column_abx <- function(x,
} else if (info == TRUE) {
message_("...", appendLF = FALSE, as_note = FALSE)
}
x_bak <- x
# only check columns that are a valid AB code, ATC code, name, abbreviation or synonym,
# or already have the <rsi> class (as.rsi)
# and that they have no more than 50% invalid values
vectr_antibiotics <- unique(toupper(unlist(antibiotics[, c("ab", "atc", "name", "abbreviations", "synonyms")])))
vectr_antibiotics <- vectr_antibiotics[!is.na(vectr_antibiotics) & nchar(vectr_antibiotics) >= 3]
x_columns <- vapply(FUN.VALUE = character(1), colnames(x), function(col, df = x_bak) {
x_columns <- vapply(FUN.VALUE = character(1), colnames(x), function(col, df = x) {
if (toupper(col) %in% vectr_antibiotics ||
is.rsi(as.data.frame(df, stringsAsFactors = FALSE)[, col, drop = TRUE]) ||
is.rsi.eligible(as.data.frame(df, stringsAsFactors = FALSE)[, col, drop = TRUE],
threshold = 0.5)) {
is.rsi(x[, col, drop = TRUE]) ||
is.rsi.eligible(x[, col, drop = TRUE], threshold = 0.5)
) {
return(col)
} else {
return(NA_character_)
}
})
x_columns <- x_columns[!is.na(x_columns)]
x <- x[, x_columns, drop = FALSE] # without drop = TRUE, x will become a vector when x_columns is length 1
df_trans <- data.frame(colnames = colnames(x),
abcode = suppressWarnings(as.ab(colnames(x), info = FALSE)),
stringsAsFactors = FALSE)
@ -217,7 +217,6 @@ get_column_abx <- function(x,
}
}
if (!is.null(hard_dependencies)) {
hard_dependencies <- unique(hard_dependencies)
if (!all(hard_dependencies %in% names(x))) {

View File

@ -26,9 +26,10 @@
#' Create Identifier of an Isolate
#'
#' This function will paste the microorganism code with all antimicrobial results into one string for each row in a data set. This is useful to compare isolates, e.g. between institutions or regions, when there is no genotyping available.
#' @inheritSection lifecycle Maturing Lifecycle
#' @inheritSection lifecycle Experimental Lifecycle
#' @inheritParams eucast_rules
#' @param cols_ab a character vector of column names of `x`, or (a combination with) an [antibiotic selector function]([ab_class()]), such as [carbapenems()] and [aminoglycosides()]
#' @rdname isolate_identifier
#' @export
#' @inheritSection AMR Read more on Our Website!
#' @examples
@ -43,7 +44,12 @@
isolate_identifier <- function(x, col_mo = NULL, cols_ab = NULL) {
if (is.null(col_mo)) {
col_mo <- search_type_in_df(x, "mo")
if (is.null(col_mo)) {
# no column found, then ignore the argument
col_mo <- FALSE
}
}
if (isFALSE(col_mo)) {
# is FALSE then ignore mo column
x$col_mo <- ""
@ -60,14 +66,77 @@ isolate_identifier <- function(x, col_mo = NULL, cols_ab = NULL) {
# tryCatch adds 4 calls, so total is -5
error = function(e) stop_(e$message, call = -5))
}
if (length(cols_ab) == 0) {
warning_("no columns with antimicrobial agents found", call = TRUE)
# cope with empty values
if (length(cols_ab) == 0 && all(x[, col_mo, drop = TRUE] == "", na.rm = TRUE)) {
warning_("in isolate_identifier(): no column with microorganisms and no columns with antimicrobial agents found", call = FALSE)
} else if (length(cols_ab) == 0) {
warning_("in isolate_identifier(): no columns with antimicrobial agents found", call = FALSE)
}
out <- x[, c(col_mo, cols_ab), drop = FALSE]
out <- do.call(paste, c(out, sep = ""))
out <- gsub("NA", ".", out, fixed = TRUE)
set_clean_class(out, new_class = c("isolate_identifier", "character"))
out <- set_clean_class(out, new_class = c("isolate_identifier", "character"))
attr(out, "ab") <- cols_ab
out
}
#' @method all.equal isolate_identifier
#' @rdname isolate_identifier
#' @export
all.equal.isolate_identifier <- function(target, current, ignore_empty_results = TRUE, ...) {
if (isTRUE(all.equal.character(target, current))) {
return(TRUE)
}
# vectorise over both target and current
if (length(target) > 1 && length(current) == 1) {
current <- rep(current, length(target))
} else if (length(current) > 1 && length(target) == 1) {
target <- rep(target, length(current))
}
stop_if(length(target) != length(current),
"length of `target` and `current` must be the same, or one must be 1")
get_vector <- function(x) {
if (grepl("|", x, fixed = TRUE)) {
mo <- gsub("(.*)\\|.*", "\\1", x)
} else {
mo <- NULL
}
if (grepl("|", x, fixed = TRUE)) {
ab <- gsub(".*\\|(.*)", "\\1", x)
} else {
ab <- x
}
ab <- strsplit(ab, "")[[1L]]
if (is.null(mo)) {
out <- as.character(ab)
names(out) <- attributes(x)$ab
} else {
out <- as.character(c(mo, ab))
names(out) <- c("mo", attributes(x)$ab)
}
out
}
# run it
for (i in seq_len(length(target))) {
if (i == 1) {
df <- data.frame(object = paste0(c("target[", "current["), i, "]"))
}
trgt <- get_vector(target[i])
crnt <- get_vector(current[i])
if (ignore_empty_results == TRUE) {
diff <- names(trgt[trgt != crnt & trgt != "." & crnt != "."])
} else {
diff <- names(trgt[trgt != crnt])
}
}
stop("THIS FUNCTION IS WORK IN PROGRESS AND NOT AVAILABLE IN THIS BETA VERSION")
}
#' @method print isolate_identifier

23
R/rsi.R
View File

@ -49,16 +49,16 @@
#' 2. For **interpreting minimum inhibitory concentration (MIC) values** according to EUCAST or CLSI. You must clean your MIC values first using [as.mic()], that also gives your columns the new data class [`mic`]. Also, be sure to have a column with microorganism names or codes. It will be found automatically, but can be set manually using the `mo` argument.
#' * Using `dplyr`, R/SI interpretation can be done very easily with either:
#' ```
#' your_data %>% mutate_if(is.mic, as.rsi) # until dplyr 1.0.0
#' your_data %>% mutate(across(where(is.mic), as.rsi)) # since dplyr 1.0.0
#' your_data %>% mutate_if(is.mic, as.rsi) # until dplyr 1.0.0
#' your_data %>% mutate(across((is.mic), as.rsi)) # since dplyr 1.0.0
#' ```
#' * Operators like "<=" will be stripped before interpretation. When using `conserve_capped_values = TRUE`, an MIC value of e.g. ">2" will always return "R", even if the breakpoint according to the chosen guideline is ">=4". This is to prevent that capped values from raw laboratory data would not be treated conservatively. The default behaviour (`conserve_capped_values = FALSE`) considers ">2" to be lower than ">=4" and might in this case return "S" or "I".
#'
#' 3. For **interpreting disk diffusion diameters** according to EUCAST or CLSI. You must clean your disk zones first using [as.disk()], that also gives your columns the new data class [`disk`]. Also, be sure to have a column with microorganism names or codes. It will be found automatically, but can be set manually using the `mo` argument.
#' * Using `dplyr`, R/SI interpretation can be done very easily with either:
#' ```
#' your_data %>% mutate_if(is.disk, as.rsi) # until dplyr 1.0.0
#' your_data %>% mutate(across(where(is.disk), as.rsi)) # since dplyr 1.0.0
#' your_data %>% mutate_if(is.disk, as.rsi) # until dplyr 1.0.0
#' your_data %>% mutate(across((is.disk), as.rsi)) # since dplyr 1.0.0
#' ```
#'
#' 4. For **interpreting a complete data set**, with automatic determination of MIC values, disk diffusion diameters, microorganism names or codes, and antimicrobial test results. This is done very simply by running `as.rsi(data)`.
@ -133,7 +133,7 @@
#' if (require("dplyr")) {
#' df %>% mutate_if(is.mic, as.rsi)
#' df %>% mutate_if(function(x) is.mic(x) | is.disk(x), as.rsi)
#' df %>% mutate(across(where(is.mic), as.rsi))
#' df %>% mutate(across((is.mic), as.rsi))
#' df %>% mutate_at(vars(AMP:TOB), as.rsi)
#' df %>% mutate(across(AMP:TOB, as.rsi))
#'
@ -179,7 +179,7 @@
#'
#' # note: from dplyr 1.0.0 on, this will be:
#' # example_isolates %>%
#' # mutate(across(where(is.rsi.eligible), as.rsi))
#' # mutate(across((is.rsi.eligible), as.rsi))
#' }
#' }
as.rsi <- function(x, ...) {
@ -202,14 +202,19 @@ is.rsi.eligible <- function(x, threshold = 0.05) {
"numeric",
"integer",
"mo",
"ab",
"Date",
"POSIXct",
"POSIXt",
"rsi",
"raw",
"hms")
"hms",
"mic",
"disk")
%in% class(x))) {
# no transformation needed
FALSE
return(FALSE)
} else if (!any(c("R", "S", "I") %in% x, na.rm = TRUE)) {
return(FALSE)
} else {
x <- x[!is.na(x) & !is.null(x) & !identical(x, "")]
if (length(x) == 0) {

View File

@ -148,7 +148,9 @@ rsi_calc <- function(...,
if (print_warning == TRUE) {
if (message_not_thrown_before("rsi_calc")) {
warning_("Increase speed by transforming to class <rsi> on beforehand: your_data %>% mutate_if(is.rsi.eligible, as.rsi)",
warning_("Increase speed by transforming to class <rsi> on beforehand:\n",
" your_data %>% mutate_if(is.rsi.eligible, as.rsi)\n",
" your_data %>% mutate(across((is.rsi.eligible), as.rsi))",
call = FALSE)
remember_thrown_message("rsi_calc")
}

Binary file not shown.

113
R/zzz.R
View File

@ -28,35 +28,6 @@ pkg_env <- new.env(hash = FALSE)
pkg_env$mo_failed <- character(0)
.onLoad <- function(libname, pkgname) {
assign(x = "AB_lookup",
value = create_AB_lookup(),
envir = asNamespace("AMR"))
assign(x = "MO_lookup",
value = create_MO_lookup(),
envir = asNamespace("AMR"))
assign(x = "MO.old_lookup",
value = create_MO.old_lookup(),
envir = asNamespace("AMR"))
assign(x = "INTRINSIC_R",
value = create_intr_resistance(),
envir = asNamespace("AMR"))
assign(x = "LANGUAGES_SUPPORTED",
value = sort(c("en", unique(translations_file$lang))),
envir = asNamespace("AMR"))
assign(x = "MO_CONS",
value = create_species_cons_cops("CoNS"),
envir = asNamespace("AMR"))
assign(x = "MO_COPS",
value = create_species_cons_cops("CoPS"),
envir = asNamespace("AMR"))
# Support for tibble headers (type_sum) and tibble columns content (pillar_shaft)
# without the need to depend on other packages. This was suggested by the
# developers of the vctrs package:
@ -102,89 +73,5 @@ pkg_env$mo_failed <- character(0)
font_bold("options(AMR_silentstart = TRUE)"), "]"))
}
create_intr_resistance <- function() {
# for mo_is_intrinsic_resistant() - saves a lot of time when executed on this vector
paste(AMR::microorganisms[match(AMR::intrinsic_resistant$microorganism, AMR::microorganisms$fullname), "mo", drop = TRUE],
AMR::antibiotics[match(AMR::intrinsic_resistant$antibiotic, AMR::antibiotics$name), "ab", drop = TRUE])
}
create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
# Determination of which staphylococcal species are CoNS/CoPS according to:
# - Becker et al. 2014, PMID 25278577
# - Becker et al. 2019, PMID 30872103
# - Becker et al. 2020, PMID 32056452
# this function returns class <mo>
MO_staph <- AMR::microorganisms
MO_staph <- MO_staph[which(MO_staph$genus == "Staphylococcus"), , drop = FALSE]
if (type == "CoNS") {
MO_staph[which(MO_staph$species %in% c("coagulase-negative", "argensis", "arlettae",
"auricularis", "caeli", "capitis", "caprae",
"carnosus", "chromogenes", "cohnii", "condimenti",
"debuckii", "devriesei", "edaphicus", "epidermidis",
"equorum", "felis", "fleurettii", "gallinarum",
"haemolyticus", "hominis", "jettensis", "kloosii",
"lentus", "lugdunensis", "massiliensis", "microti",
"muscae", "nepalensis", "pasteuri", "petrasii",
"pettenkoferi", "piscifermentans", "pseudoxylosus",
"rostri", "saccharolyticus", "saprophyticus",
"sciuri", "simulans", "stepanovicii", "succinus",
"vitulinus", "warneri", "xylosus")
| (MO_staph$species == "schleiferi" & MO_staph$subspecies %in% c("schleiferi", ""))),
"mo", drop = TRUE]
} else if (type == "CoPS") {
MO_staph[which(MO_staph$species %in% c("coagulase-positive",
"simiae", "agnetis",
"delphini", "lutrae",
"hyicus", "intermedius",
"pseudintermedius", "pseudointermedius",
"schweitzeri", "argenteus")
| (MO_staph$species == "schleiferi" & MO_staph$subspecies == "coagulans")),
"mo", drop = TRUE]
}
}
create_AB_lookup <- function() {
AB_lookup <- AMR::antibiotics
AB_lookup$generalised_name <- generalise_antibiotic_name(AB_lookup$name)
AB_lookup$generalised_synonyms <- lapply(AB_lookup$synonyms, generalise_antibiotic_name)
AB_lookup$generalised_abbreviations <- lapply(AB_lookup$abbreviations, generalise_antibiotic_name)
AB_lookup$generalised_loinc <- lapply(AB_lookup$loinc, generalise_antibiotic_name)
AB_lookup
}
create_MO_lookup <- function() {
MO_lookup <- AMR::microorganisms
MO_lookup$kingdom_index <- NA_real_
MO_lookup[which(MO_lookup$kingdom == "Bacteria" | MO_lookup$mo == "UNKNOWN"), "kingdom_index"] <- 1
MO_lookup[which(MO_lookup$kingdom == "Fungi"), "kingdom_index"] <- 2
MO_lookup[which(MO_lookup$kingdom == "Protozoa"), "kingdom_index"] <- 3
MO_lookup[which(MO_lookup$kingdom == "Archaea"), "kingdom_index"] <- 4
# all the rest
MO_lookup[which(is.na(MO_lookup$kingdom_index)), "kingdom_index"] <- 5
# use this paste instead of `fullname` to work with Viridans Group Streptococci, etc.
MO_lookup$fullname_lower <- tolower(trimws(paste(MO_lookup$genus,
MO_lookup$species,
MO_lookup$subspecies)))
ind <- MO_lookup$genus == "" | grepl("^[(]unknown ", MO_lookup$fullname)
MO_lookup[ind, "fullname_lower"] <- tolower(MO_lookup[ind, "fullname"])
MO_lookup$fullname_lower <- trimws(gsub("[^.a-z0-9/ \\-]+", "", MO_lookup$fullname_lower, perl = TRUE))
# add a column with only "e coli" like combinations
MO_lookup$g_species <- gsub("^([a-z])[a-z]+ ([a-z]+) ?.*", "\\1 \\2", MO_lookup$fullname_lower, perl = TRUE)
# so arrange data on prevalence first, then kingdom, then full name
MO_lookup[order(MO_lookup$prevalence, MO_lookup$kingdom_index, MO_lookup$fullname_lower), ]
}
create_MO.old_lookup <- function() {
MO.old_lookup <- AMR::microorganisms.old
MO.old_lookup$fullname_lower <- trimws(gsub("[^.a-z0-9/ \\-]+", "", tolower(trimws(MO.old_lookup$fullname))))
# add a column with only "e coli"-like combinations
MO.old_lookup$g_species <- trimws(gsub("^([a-z])[a-z]+ ([a-z]+) ?.*", "\\1 \\2", MO.old_lookup$fullname_lower))
# so arrange data on prevalence first, then full name
MO.old_lookup[order(MO.old_lookup$prevalence, MO.old_lookup$fullname_lower), ]
}