mirror of
https://github.com/msberends/AMR.git
synced 2025-05-01 07:03:48 +02:00
(v2.1.1.9253) parallel computing
This commit is contained in:
parent
abee2a954e
commit
4e7fca3b38
3
.github/workflows/lintr.yaml
vendored
3
.github/workflows/lintr.yaml
vendored
@ -69,8 +69,7 @@ jobs:
|
||||
|
||||
- name: Lint
|
||||
run: |
|
||||
# old: lintr::lint_package(linters = lintr::with_defaults(line_length_linter = NULL, trailing_whitespace_linter = NULL, object_name_linter = NULL, cyclocomp_linter = NULL, object_length_linter = lintr::object_length_linter(length = 50L)), exclusions = list("R/aa_helper_pm_functions.R"))
|
||||
# now get ALL linters, not just default ones
|
||||
# get ALL linters, not just default ones
|
||||
linters <- getNamespaceExports(asNamespace("lintr"))
|
||||
linters <- sort(linters[grepl("_linter$", linters)])
|
||||
# lose deprecated
|
||||
|
@ -1,6 +1,6 @@
|
||||
Package: AMR
|
||||
Version: 2.1.1.9252
|
||||
Date: 2025-04-25
|
||||
Version: 2.1.1.9253
|
||||
Date: 2025-04-26
|
||||
Title: Antimicrobial Resistance Data Analysis
|
||||
Description: Functions to simplify and standardise antimicrobial resistance (AMR)
|
||||
data analysis and to work with microbial and antimicrobial properties by
|
||||
@ -47,6 +47,7 @@ Suggests:
|
||||
ggplot2,
|
||||
knitr,
|
||||
openxlsx,
|
||||
parallelly,
|
||||
pillar,
|
||||
progress,
|
||||
readxl,
|
||||
|
3
NEWS.md
3
NEWS.md
@ -1,4 +1,4 @@
|
||||
# AMR 2.1.1.9252
|
||||
# AMR 2.1.1.9253
|
||||
|
||||
*(this beta version will eventually become v3.0. We're happy to reach a new major milestone soon, which will be all about the new One Health support! Install this beta using [the instructions here](https://amr-for-r.org/#get-this-package).)*
|
||||
|
||||
@ -47,6 +47,7 @@ This package now supports not only tools for AMR data analysis in clinical setti
|
||||
|
||||
## Changed
|
||||
* SIR interpretation
|
||||
* Support for parallel computing using the `parallel` package (part of base R). Use `as.sir(your_data, parallel = TRUE)` to run SIR interpretation using multiple cores.
|
||||
* It is now possible to use column names for arguments `guideline`, `ab`, `mo`, and `uti`: `as.sir(..., ab = "column1", mo = "column2", uti = "column3")`. This greatly improves the flexibility for users.
|
||||
* Users can now set their own criteria (using regular expressions) as to what should be considered S, I, R, SDD, and NI.
|
||||
* To get quantitative values, `as.double()` on a `sir` object will return 1 for S, 2 for SDD/I, and 3 for R (NI will become `NA`). Other functions using `sir` classes (e.g., `summary()`) are updated to reflect the change to contain NI and SDD.
|
||||
|
@ -1589,6 +1589,28 @@ readRDS_AMR <- function(file, refhook = NULL) {
|
||||
readRDS(con, refhook = refhook)
|
||||
}
|
||||
|
||||
get_n_cores <- function(max_cores = Inf) {
|
||||
if (pkg_is_available("parallelly", min_version = "0.8.0", also_load = FALSE)) {
|
||||
available_cores <- import_fn("availableCores", "parallelly")
|
||||
n_cores <- min(available_cores(), na.rm = TRUE)
|
||||
} else {
|
||||
# `parallel` is part of base R since 2.14.0, but detectCores() is not very precise on exotic systems like Docker and quota-set Linux environments
|
||||
n_cores <- parallel::detectCores()[1]
|
||||
if (is.na(n_cores)) {
|
||||
n_cores <- 1
|
||||
}
|
||||
}
|
||||
max_cores <- floor(max_cores)
|
||||
if (max_cores == 0) {
|
||||
n_cores <- 1
|
||||
} else if (max_cores < 0) {
|
||||
n_cores <- max(1, n_cores - abs(max_cores))
|
||||
} else if (max_cores > 0) {
|
||||
n_cores <- min(n_cores, max_cores)
|
||||
}
|
||||
n_cores
|
||||
}
|
||||
|
||||
# Faster data.table implementations ----
|
||||
|
||||
match <- function(x, table, ...) {
|
||||
|
2
R/ab.R
2
R/ab.R
@ -139,7 +139,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, language = get_AMR_locale(),
|
||||
if (toupper(paste(abnames, collapse = " ")) %in% AMR_env$AB_lookup$generalised_name) {
|
||||
# if the found values combined is a valid AB, return that
|
||||
found <- AMR_env$AB_lookup$ab[match(toupper(paste(abnames, collapse = " ")), AMR_env$AB_lookup$generalised_name)][1]
|
||||
} else {
|
||||
} else if (isTRUE(info)) {
|
||||
message_(
|
||||
"More than one result was found for item ", index, ": ",
|
||||
vector_and(abnames, quotes = FALSE)
|
||||
|
@ -573,6 +573,8 @@ antibiogram.default <- function(x,
|
||||
for (ab in abx) {
|
||||
# make sure they are SIR columns
|
||||
x[, ab] <- as.sir(x[, ab, drop = TRUE])
|
||||
# set NI as NA
|
||||
x[[ab]][x[[ab]] == "NI"] <- NA_sir_
|
||||
}
|
||||
new_colname <- paste0(trimws(abx), collapse = sep)
|
||||
if (length(abx) == 1) {
|
||||
@ -584,7 +586,7 @@ antibiogram.default <- function(x,
|
||||
} else {
|
||||
S_values <- "S"
|
||||
}
|
||||
other_values <- setdiff(c("S", "SDD", "I", "R", "NI"), S_values)
|
||||
other_values <- setdiff(c("S", "SDD", "I", "R"), S_values)
|
||||
x_transposed <- as.list(as.data.frame(t(x[, abx, drop = FALSE]), stringsAsFactors = FALSE))
|
||||
if (isTRUE(only_all_tested)) {
|
||||
x[new_colname] <- as.sir(vapply(FUN.VALUE = character(1), x_transposed, function(x) ifelse(anyNA(x), NA_character_, ifelse(any(x %in% S_values), "S", "R")), USE.NAMES = FALSE))
|
||||
|
150
R/sir.R
150
R/sir.R
@ -88,6 +88,9 @@
|
||||
#'
|
||||
#' # for veterinary breakpoints, also set `host`:
|
||||
#' your_data %>% mutate_if(is.mic, as.sir, host = "column_with_animal_species", guideline = "CLSI")
|
||||
#'
|
||||
#' # fast processing with parallel computing:
|
||||
#' as.sir(your_data, ..., parallel = TRUE)
|
||||
#' ```
|
||||
#' * Operators like "<=" will be stripped before interpretation. When using `capped_mic_handling = "conservative"`, an MIC value of e.g. ">2" will always return "R", even if the breakpoint according to the chosen guideline is ">=4". This is to prevent that capped values from raw laboratory data would not be treated conservatively. The default behaviour (`capped_mic_handling = "standard"`) considers ">2" to be lower than ">=4" and might in this case return "S" or "I".
|
||||
#' * **Note:** When using CLSI as the guideline, MIC values must be log2-based doubling dilutions. Values not in this format, will be automatically rounded up to the nearest log2 level as CLSI instructs, and a warning will be thrown.
|
||||
@ -102,6 +105,9 @@
|
||||
#'
|
||||
#' # for veterinary breakpoints, also set `host`:
|
||||
#' your_data %>% mutate_if(is.disk, as.sir, host = "column_with_animal_species", guideline = "CLSI")
|
||||
#'
|
||||
#' # fast processing with parallel computing:
|
||||
#' as.sir(your_data, ..., parallel = TRUE)
|
||||
#' ```
|
||||
#'
|
||||
#' 4. For **interpreting a complete data set**, with automatic determination of MIC values, disk diffusion diameters, microorganism names or codes, and antimicrobial test results. This is done very simply by running `as.sir(your_data)`.
|
||||
@ -129,14 +135,18 @@
|
||||
#' options(AMR_guideline = NULL)
|
||||
#' ```
|
||||
#'
|
||||
#' For veterinary guidelines, these might be the best options:
|
||||
#' ### Working with Veterinary Breakpoints
|
||||
#'
|
||||
#' When using veterinary breakpoints (i.e., setting `breakpoint_type = "animal"`), a column with animal species must be available or set manually using the `host` argument. The column must contain names like "dogs", "cats", "cattle", "swine", "horses", "poultry", or "aquatic". Other animal names like "goats", "rabbits", or "monkeys" are also recognised but may not be available in all guidelines. Matching is case-insensitive and accepts Latin-based synonyms (e.g., "bovine" for cattle and "canine" for dogs).
|
||||
#'
|
||||
#' Regarding choice of veterinary guidelines, these might be the best options to set before analysis:
|
||||
#'
|
||||
#' ```
|
||||
#' options(AMR_guideline = "CLSI")
|
||||
#' options(AMR_breakpoint_type = "animal")
|
||||
#' ```
|
||||
#'
|
||||
###### TODO #187 When applying veterinary breakpoints (by setting `host` or by setting `breakpoint_type = "animal"`), the [CLSI VET09 guideline](https://clsi.org/standards/products/veterinary-medicine/documents/vet09/) will be applied to cope with missing animal species-specific breakpoints.
|
||||
#' ###### TODO #187 When applying veterinary breakpoints (by setting `host` or by setting `breakpoint_type = "animal"`), the [CLSI VET09 guideline](https://clsi.org/standards/products/veterinary-medicine/documents/vet09/) will be applied to cope with missing animal species-specific breakpoints.
|
||||
#'
|
||||
#' ### After Interpretation
|
||||
#'
|
||||
@ -657,6 +667,8 @@ as.sir.disk <- function(x,
|
||||
}
|
||||
|
||||
#' @rdname as.sir
|
||||
#' @param parallel A [logical] to indicate if parallel computing must be used, defaults to `FALSE`.
|
||||
#' @param max_cores Maximum number of cores to use if `parallel = TRUE`. Use a negative value to subtract that number from the available number of cores, e.g. a value of `-2` on an 8-core machine means that 6 cores will be used. Defaults to `-1`. The available number of cores are detected using [parallelly::availableCores()] if that package is installed, and base \R's [parallel::detectCores()] otherwise.
|
||||
#' @export
|
||||
as.sir.data.frame <- function(x,
|
||||
...,
|
||||
@ -673,6 +685,8 @@ as.sir.data.frame <- function(x,
|
||||
host = NULL,
|
||||
verbose = FALSE,
|
||||
info = TRUE,
|
||||
parallel = FALSE,
|
||||
max_cores = -1,
|
||||
conserve_capped_values = NULL) {
|
||||
meet_criteria(x, allow_class = "data.frame") # will also check for dimensions > 0
|
||||
meet_criteria(col_mo, allow_class = "character", is_in = colnames(x), allow_NULL = TRUE)
|
||||
@ -688,6 +702,9 @@ as.sir.data.frame <- function(x,
|
||||
meet_criteria(host, allow_class = c("character", "factor"), allow_NULL = TRUE, allow_NA = TRUE)
|
||||
meet_criteria(verbose, allow_class = "logical", has_length = 1)
|
||||
meet_criteria(info, allow_class = "logical", has_length = 1)
|
||||
meet_criteria(parallel, allow_class = "logical", has_length = 1)
|
||||
meet_criteria(max_cores, allow_class = c("numeric", "integer"), has_length = 1)
|
||||
|
||||
x.bak <- x
|
||||
for (i in seq_len(ncol(x))) {
|
||||
# don't keep factors, overwriting them is hard
|
||||
@ -788,7 +805,7 @@ as.sir.data.frame <- function(x,
|
||||
return(FALSE)
|
||||
}
|
||||
if (length(sel) == 0 || (length(sel) > 0 && ab %in% sel)) {
|
||||
ab_coerced <- suppressWarnings(as.ab(ab))
|
||||
ab_coerced <- suppressWarnings(as.ab(ab, info = info))
|
||||
if (is.na(ab_coerced) || (length(sel) > 0 & !ab %in% sel)) {
|
||||
# not even a valid AB code
|
||||
return(FALSE)
|
||||
@ -818,19 +835,25 @@ as.sir.data.frame <- function(x,
|
||||
if (is.null(col_mo.bak)) {
|
||||
col_mo <- search_type_in_df(x = x, type = "mo")
|
||||
}
|
||||
x_mo <- as.mo(x[, col_mo, drop = TRUE])
|
||||
x_mo <- as.mo(x[, col_mo, drop = TRUE], info = info)
|
||||
}
|
||||
|
||||
for (i in seq_along(ab_cols)) {
|
||||
# set up parallel computing
|
||||
n_cores <- get_n_cores(max_cores = max_cores)
|
||||
|
||||
run_as_sir_column <- function(i) {
|
||||
ab_col <- ab_cols[i]
|
||||
out <- list(result = NULL, log = NULL)
|
||||
|
||||
if (types[i] == "mic") {
|
||||
x[, ab_cols[i]] <- x %pm>%
|
||||
pm_pull(ab_cols[i]) %pm>%
|
||||
result <- x %pm>%
|
||||
pm_pull(ab_col) %pm>%
|
||||
as.character() %pm>%
|
||||
as.mic() %pm>%
|
||||
as.sir(
|
||||
mo = x_mo,
|
||||
mo.bak = x[, col_mo, drop = TRUE],
|
||||
ab = ab_cols[i],
|
||||
ab = ab_col,
|
||||
guideline = guideline,
|
||||
uti = uti,
|
||||
capped_mic_handling = capped_mic_handling,
|
||||
@ -846,15 +869,19 @@ as.sir.data.frame <- function(x,
|
||||
conserve_capped_values = conserve_capped_values,
|
||||
is_data.frame = TRUE
|
||||
)
|
||||
out$result <- result
|
||||
out$log <- AMR_env$sir_interpretation_history
|
||||
AMR_env$sir_interpretation_history <- AMR_env$sir_interpretation_history[0, , drop = FALSE] # reset log
|
||||
return(out)
|
||||
} else if (types[i] == "disk") {
|
||||
x[, ab_cols[i]] <- x %pm>%
|
||||
pm_pull(ab_cols[i]) %pm>%
|
||||
result <- x %pm>%
|
||||
pm_pull(ab_col) %pm>%
|
||||
as.character() %pm>%
|
||||
as.disk() %pm>%
|
||||
as.sir(
|
||||
mo = x_mo,
|
||||
mo.bak = x[, col_mo, drop = TRUE],
|
||||
ab = ab_cols[i],
|
||||
ab = ab_col,
|
||||
guideline = guideline,
|
||||
uti = uti,
|
||||
add_intrinsic_resistance = add_intrinsic_resistance,
|
||||
@ -868,38 +895,99 @@ as.sir.data.frame <- function(x,
|
||||
info = info,
|
||||
is_data.frame = TRUE
|
||||
)
|
||||
out$result <- result
|
||||
out$log <- AMR_env$sir_interpretation_history
|
||||
AMR_env$sir_interpretation_history <- AMR_env$sir_interpretation_history[0, , drop = FALSE]
|
||||
return(out)
|
||||
} else if (types[i] == "sir") {
|
||||
ab <- ab_col
|
||||
ab_coerced <- suppressWarnings(as.ab(ab, info = info))
|
||||
show_message <- FALSE
|
||||
ab <- ab_cols[i]
|
||||
ab_coerced <- suppressWarnings(as.ab(ab))
|
||||
if (!all(x[, ab_cols[i], drop = TRUE] %in% c("S", "SDD", "I", "R", "NI", NA), na.rm = TRUE)) {
|
||||
if (!all(x[, ab, drop = TRUE] %in% c("S", "SDD", "I", "R", "NI", NA), na.rm = TRUE)) {
|
||||
show_message <- TRUE
|
||||
# only print message if values are not already clean
|
||||
if (isTRUE(info)) {
|
||||
message_("Cleaning values in column '", font_bold(ab), "' (",
|
||||
ifelse(ab_coerced != toupper(ab), paste0(ab_coerced, ", "), ""),
|
||||
ab_name(ab_coerced, tolower = TRUE), ")... ",
|
||||
ab_name(ab_coerced, tolower = TRUE, info = info), ")... ",
|
||||
appendLF = FALSE,
|
||||
as_note = FALSE
|
||||
)
|
||||
}
|
||||
} else if (!is.sir(x.bak[, ab_cols[i], drop = TRUE])) {
|
||||
} else if (!is.sir(x.bak[, ab, drop = TRUE])) {
|
||||
show_message <- TRUE
|
||||
# only print message if class not already set
|
||||
if (isTRUE(info)) {
|
||||
message_("Assigning class 'sir' to already clean column '", font_bold(ab), "' (",
|
||||
ifelse(ab_coerced != toupper(ab), paste0(ab_coerced, ", "), ""),
|
||||
ab_name(ab_coerced, tolower = TRUE, language = NULL), ")... ",
|
||||
ab_name(ab_coerced, tolower = TRUE, language = NULL, info = info), ")... ",
|
||||
appendLF = FALSE,
|
||||
as_note = FALSE
|
||||
)
|
||||
}
|
||||
}
|
||||
x[, ab_cols[i]] <- as.sir.default(x = as.character(x[, ab_cols[i], drop = TRUE]))
|
||||
result <- as.sir.default(x = as.character(x[, ab, drop = TRUE]))
|
||||
if (show_message == TRUE && isTRUE(info)) {
|
||||
message(font_green_bg(" OK "))
|
||||
}
|
||||
out$result <- result
|
||||
out$log <- NULL
|
||||
return(out)
|
||||
}
|
||||
|
||||
return(out)
|
||||
}
|
||||
|
||||
if (isTRUE(parallel) && n_cores > 1 && length(ab_cols) > 1) {
|
||||
if (isTRUE(info)) {
|
||||
message()
|
||||
message_("Running SIR interpretation in parallel mode on ", nr2char(length(ab_cols)), " columns, using ", n_cores, " out of ", get_n_cores(Inf), " cores...", as_note = FALSE, appendLF = FALSE, add_fn = font_red)
|
||||
}
|
||||
if (.Platform$OS.type == "windows") {
|
||||
cl <- parallel::makeCluster(n_cores, type = "PSOCK")
|
||||
on.exit(parallel::stopCluster(cl), add = TRUE)
|
||||
parallel::clusterExport(cl, varlist = c(
|
||||
"x", "x.bak", "x_mo", "ab_cols", "types",
|
||||
"capped_mic_handling", "add_intrinsic_resistance",
|
||||
"reference_data", "substitute_missing_r_breakpoint", "include_screening", "include_PKPD",
|
||||
"breakpoint_type", "guideline", "host", "uti", "info", "verbose",
|
||||
"col_mo", "AMR_env", "conserve_capped_values",
|
||||
"run_as_sir_column"
|
||||
), envir = environment())
|
||||
result_list <- parallel::parLapply(cl, seq_along(ab_cols), run_as_sir_column)
|
||||
} else {
|
||||
result_list <- parallel::mclapply(seq_along(ab_cols), run_as_sir_column, mc.cores = n_cores)
|
||||
}
|
||||
if (isTRUE(info)) {
|
||||
message_(" Done.", appendLF = TRUE, as_note = FALSE, add_fn = font_red)
|
||||
message()
|
||||
message_("Run `sir_interpretation_history()` to retrieve a logbook with all the details of the breakpoint interpretations.", add_fn = font_green)
|
||||
}
|
||||
} else {
|
||||
# sequential mode (non-parallel)
|
||||
if (n_cores > 1 && isTRUE(info) && (NROW(x) > 2500 || length(ab_cols) >= 5)) {
|
||||
# give a note that parallel mode might be better
|
||||
message()
|
||||
message_("Running SIR interpretation in sequential mode. Consider setting `parallel = TRUE` to speed up processing on multiple cores.\n", add_fn = font_red)
|
||||
}
|
||||
# this will contain a progress bar already
|
||||
result_list <- lapply(seq_along(ab_cols), run_as_sir_column)
|
||||
}
|
||||
|
||||
# bind results back to x
|
||||
for (i in seq_along(ab_cols)) {
|
||||
x[, ab_cols[i]] <- result_list[[i]]$result
|
||||
}
|
||||
|
||||
# combine all sir_interpretation_history
|
||||
sir_logs_all <- lapply(result_list, function(x) x$log)
|
||||
sir_logs_all <- Filter(Negate(is.null), sir_logs_all) # remove NULLs early
|
||||
if (length(sir_logs_all) > 0) {
|
||||
rbindlist <- import_fn("rbindlist", "data.table", error_on_fail = FALSE)
|
||||
if (!is.null(rbindlist)) {
|
||||
sir_logs_all <- rbindlist(sir_logs_all, fill = TRUE, ignore.attr = TRUE)
|
||||
} else {
|
||||
sir_logs_all <- do.call(rbind, sir_logs_all)
|
||||
}
|
||||
AMR_env$sir_interpretation_history <- rbind_AMR(AMR_env$sir_interpretation_history, sir_logs_all)
|
||||
}
|
||||
|
||||
x
|
||||
@ -1155,7 +1243,7 @@ as_sir_method <- function(method_short,
|
||||
}
|
||||
|
||||
ab.bak <- trimws2(ab)
|
||||
ab <- suppressWarnings(as.ab(ab))
|
||||
ab <- suppressWarnings(as.ab(ab, info = info))
|
||||
if (!is.null(list(...)$mo.bak)) {
|
||||
mo.bak <- list(...)$mo.bak
|
||||
} else {
|
||||
@ -1200,7 +1288,7 @@ as_sir_method <- function(method_short,
|
||||
|
||||
# format agents ----
|
||||
agent_formatted <- paste0("'", font_bold(ab.bak, collapse = NULL), "'")
|
||||
agent_name <- ab_name(ab, tolower = TRUE, language = NULL)
|
||||
agent_name <- ab_name(ab, tolower = TRUE, language = NULL, info = info)
|
||||
same_ab <- generalise_antibiotic_name(ab) == generalise_antibiotic_name(agent_name)
|
||||
same_ab.bak <- generalise_antibiotic_name(ab.bak) == generalise_antibiotic_name(agent_name)
|
||||
agent_formatted[same_ab.bak] <- paste0(agent_formatted[same_ab.bak], " (", ab[same_ab.bak], ")")
|
||||
@ -1233,7 +1321,7 @@ as_sir_method <- function(method_short,
|
||||
rise_warning <- FALSE
|
||||
rise_notes <- FALSE
|
||||
method_coerced <- toupper(method)
|
||||
ab_coerced <- as.ab(ab)
|
||||
ab_coerced <- as.ab(ab, info = info)
|
||||
|
||||
if (identical(reference_data, AMR::clinical_breakpoints)) {
|
||||
breakpoints <- reference_data %pm>%
|
||||
@ -1266,7 +1354,7 @@ as_sir_method <- function(method_short,
|
||||
# CLSI in log 2 ----
|
||||
# CLSI says: if MIC is not a log2 value it must be rounded up to the nearest log2 value
|
||||
log2_levels <- as.double(VALID_MIC_LEVELS[which(VALID_MIC_LEVELS %in% 2^c(-20:20))])
|
||||
test_values <- df$values[which(df$guideline %like% "CLSI")]
|
||||
test_values <- df$values
|
||||
test_values_dbl <- as.double(test_values)
|
||||
test_values_dbl[test_values %like% "^>[0-9]"] <- test_values_dbl[test_values %like% "^>[0-9]"] + 0.0000001
|
||||
test_values_dbl[test_values %like% "^<[0-9]"] <- test_values_dbl[test_values %like% "^>[0-9]"] - 0.0000001
|
||||
@ -1275,7 +1363,7 @@ as_sir_method <- function(method_short,
|
||||
test_values_dbl,
|
||||
function(mic_val) {
|
||||
if (is.na(mic_val)) {
|
||||
return(NA_character_)
|
||||
return(NA_real_)
|
||||
} else {
|
||||
# find the smallest log2 level that is >= mic_val
|
||||
log2_val <- log2_levels[which(log2_levels >= as.double(mic_val))][1]
|
||||
@ -1290,7 +1378,7 @@ as_sir_method <- function(method_short,
|
||||
}
|
||||
}
|
||||
)
|
||||
df$values[which(df$guideline %like% "CLSI" & test_values != test_outcome)] <- test_outcome[which(test_values != test_outcome)]
|
||||
df$values[which(df$guideline %like% "CLSI" & test_values != test_outcome)] <- test_outcome[which(df$guideline %like% "CLSI" & test_values != test_outcome)]
|
||||
}
|
||||
df$values <- as.mic(df$values)
|
||||
} else if (method == "disk") {
|
||||
@ -1342,7 +1430,7 @@ as_sir_method <- function(method_short,
|
||||
paste0(font_rose_bg(" WARNING "), "\n"),
|
||||
font_black(paste0(
|
||||
" ", AMR_env$bullet_icon, " No ", method_coerced, " breakpoints available for ",
|
||||
suppressMessages(suppressWarnings(ab_name(unique(ab_coerced), language = NULL, tolower = TRUE))),
|
||||
suppressMessages(suppressWarnings(ab_name(unique(ab_coerced), language = NULL, tolower = TRUE, info = info))),
|
||||
" (", unique(ab_coerced), ")."
|
||||
), collapse = "\n")
|
||||
)
|
||||
@ -1420,7 +1508,7 @@ as_sir_method <- function(method_short,
|
||||
mo_formatted <- font_italic(mo_formatted, collapse = NULL)
|
||||
}
|
||||
ab_formatted <- paste0(
|
||||
suppressMessages(suppressWarnings(ab_name(ab_current, language = NULL, tolower = TRUE))),
|
||||
suppressMessages(suppressWarnings(ab_name(ab_current, language = NULL, tolower = TRUE, info = info))),
|
||||
" (", ab_current, ")"
|
||||
)
|
||||
|
||||
@ -1813,7 +1901,7 @@ freq.sir <- function(x, ...) {
|
||||
}
|
||||
))[1L]
|
||||
}
|
||||
ab <- suppressMessages(suppressWarnings(as.ab(x_name)))
|
||||
ab <- suppressMessages(suppressWarnings(as.ab(x_name, info = FALSE)))
|
||||
digits <- list(...)$digits
|
||||
if (is.null(digits)) {
|
||||
digits <- 2
|
||||
@ -1822,7 +1910,7 @@ freq.sir <- function(x, ...) {
|
||||
cleaner::freq.default(
|
||||
x = x, ...,
|
||||
.add_header = list(
|
||||
Drug = paste0(ab_name(ab, language = NULL), " (", ab, ", ", paste(ab_atc(ab), collapse = "/"), ")"),
|
||||
Drug = paste0(ab_name(ab, language = NULL, info = info), " (", ab, ", ", paste(ab_atc(ab, info = info), collapse = "/"), ")"),
|
||||
`Drug group` = ab_group(ab, language = NULL),
|
||||
`%SI` = trimws(percentage(susceptibility(x, minimum = 0, as_percent = FALSE),
|
||||
digits = digits
|
||||
@ -1854,7 +1942,7 @@ get_skimmers.sir <- function(column) {
|
||||
vars <- tryCatch(eval(parse(text = ".data$skim_variable$sir"), envir = frms[[ind]]),
|
||||
error = function(e) NULL
|
||||
)
|
||||
tryCatch(ab_name(as.character(calls[[length(calls)]][[2]]), language = NULL),
|
||||
tryCatch(ab_name(as.character(calls[[length(calls)]][[2]]), language = NULL, info = FALSE),
|
||||
error = function(e) NA_character_
|
||||
)
|
||||
} else {
|
||||
|
@ -32,7 +32,7 @@
|
||||
#' This function filters a data set to include only the top *n* microorganisms based on a specified property, such as taxonomic family or genus. For example, it can filter a data set to the top 3 species, or to any species in the top 5 genera, or to the top 3 species in each of the top 5 genera.
|
||||
#' @param x A data frame containing microbial data.
|
||||
#' @param n An integer specifying the maximum number of unique values of the `property` to include in the output.
|
||||
#' @param property A character string indicating the microorganism property to use for filtering. Must be one of the column names of the [microorganisms] data set: `r vector_or(colnames(microorganisms), sort = FALSE, quotes = TRUE)`. If `NULL`, the raw values from `col_mo` will be used without transformation.
|
||||
#' @param property A character string indicating the microorganism property to use for filtering. Must be one of the column names of the [microorganisms] data set: `r vector_or(colnames(microorganisms), sort = FALSE, quotes = TRUE)`. If `NULL`, the raw values from `col_mo` will be used without transformation. When using `"species"` (default) or `"subpecies"`, the genus will be added to make sure each (sub)species still belongs to the right genus.
|
||||
#' @param n_for_each An optional integer specifying the maximum number of rows to retain for each value of the selected property. If `NULL`, all rows within the top *n* groups will be included.
|
||||
#' @param col_mo A character string indicating the column in `x` that contains microorganism names or codes. Defaults to the first column of class [`mo`]. Values will be coerced using [as.mo()].
|
||||
#' @param ... Additional arguments passed on to [mo_property()] when `property` is not `NULL`.
|
||||
@ -54,7 +54,7 @@
|
||||
#' top_n_microorganisms(example_isolates,
|
||||
#' n = 5, property = "genus", n_for_each = 3
|
||||
#' )
|
||||
top_n_microorganisms <- function(x, n, property = "fullname", n_for_each = NULL, col_mo = NULL, ...) {
|
||||
top_n_microorganisms <- function(x, n, property = "species", n_for_each = NULL, col_mo = NULL, ...) {
|
||||
meet_criteria(x, allow_class = "data.frame") # also checks dimensions to be >0
|
||||
meet_criteria(n, allow_class = c("numeric", "integer"), has_length = 1, is_finite = TRUE, is_positive = TRUE)
|
||||
meet_criteria(property, allow_class = "character", has_length = 1, is_in = colnames(AMR::microorganisms))
|
||||
@ -71,6 +71,10 @@ top_n_microorganisms <- function(x, n, property = "fullname", n_for_each = NULL,
|
||||
|
||||
if (is.null(property)) {
|
||||
x$prop_val <- x[[col_mo]]
|
||||
} else if (property == "species") {
|
||||
x$prop_val <- paste(mo_genus(x[[col_mo]], ...), mo_species(x[[col_mo]], ...))
|
||||
} else if (property == "subspecies") {
|
||||
x$prop_val <- paste(mo_genus(x[[col_mo]], ...), mo_species(x[[col_mo]], ...), mo_subspecies(x[[col_mo]], ...))
|
||||
} else {
|
||||
x$prop_val <- mo_property(x[[col_mo]], property = property, ...)
|
||||
}
|
||||
|
1
R/zzz.R
1
R/zzz.R
@ -73,6 +73,7 @@ AMR_env$sir_interpretation_history <- data.frame(
|
||||
ref_table = character(0),
|
||||
uti = logical(0),
|
||||
breakpoint_S_R = character(0),
|
||||
site = character(0),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
|
||||
|
2128
data-raw/parallel_test_sir.R
Normal file
2128
data-raw/parallel_test_sir.R
Normal file
File diff suppressed because it is too large
Load Diff
@ -65,7 +65,8 @@ is_sir_eligible(x, threshold = 0.05)
|
||||
FALSE), include_screening = getOption("AMR_include_screening", FALSE),
|
||||
include_PKPD = getOption("AMR_include_PKPD", TRUE),
|
||||
breakpoint_type = getOption("AMR_breakpoint_type", "human"), host = NULL,
|
||||
verbose = FALSE, info = TRUE, conserve_capped_values = NULL)
|
||||
verbose = FALSE, info = TRUE, parallel = FALSE, max_cores = -1,
|
||||
conserve_capped_values = NULL)
|
||||
|
||||
sir_interpretation_history(clean = FALSE)
|
||||
}
|
||||
@ -136,6 +137,10 @@ The default \code{"standard"} setting ensures cautious handling of uncertain val
|
||||
|
||||
\item{col_mo}{Column name of the names or codes of the microorganisms (see \code{\link[=as.mo]{as.mo()}}) - the default is the first column of class \code{\link{mo}}. Values will be coerced using \code{\link[=as.mo]{as.mo()}}.}
|
||||
|
||||
\item{parallel}{A \link{logical} to indicate if parallel computing must be used, defaults to \code{FALSE}.}
|
||||
|
||||
\item{max_cores}{Maximum number of cores to use if \code{parallel = TRUE}. Use a negative value to subtract that number from the available number of cores, e.g. a value of \code{-2} on an 8-core machine means that 6 cores will be used. Defaults to \code{-1}. The available number of cores are detected using \code{\link[parallelly:availableCores]{parallelly::availableCores()}} if that package is installed, and base \R's \code{\link[parallel:detectCores]{parallel::detectCores()}} otherwise.}
|
||||
|
||||
\item{clean}{A \link{logical} to indicate whether previously stored results should be forgotten after returning the 'logbook' with results.}
|
||||
}
|
||||
\value{
|
||||
@ -164,6 +169,9 @@ your_data \%>\% mutate_if(is.mic, as.sir, ab = c("cipro", "ampicillin", ...), mo
|
||||
|
||||
# for veterinary breakpoints, also set `host`:
|
||||
your_data \%>\% mutate_if(is.mic, as.sir, host = "column_with_animal_species", guideline = "CLSI")
|
||||
|
||||
# fast processing with parallel computing:
|
||||
as.sir(your_data, ..., parallel = TRUE)
|
||||
}\if{html}{\out{</div>}}
|
||||
\item Operators like "<=" will be stripped before interpretation. When using \code{capped_mic_handling = "conservative"}, an MIC value of e.g. ">2" will always return "R", even if the breakpoint according to the chosen guideline is ">=4". This is to prevent that capped values from raw laboratory data would not be treated conservatively. The default behaviour (\code{capped_mic_handling = "standard"}) considers ">2" to be lower than ">=4" and might in this case return "S" or "I".
|
||||
\item \strong{Note:} When using CLSI as the guideline, MIC values must be log2-based doubling dilutions. Values not in this format, will be automatically rounded up to the nearest log2 level as CLSI instructs, and a warning will be thrown.
|
||||
@ -179,6 +187,9 @@ your_data \%>\% mutate_if(is.disk, as.sir, ab = c("cipro", "ampicillin", ...), m
|
||||
|
||||
# for veterinary breakpoints, also set `host`:
|
||||
your_data \%>\% mutate_if(is.disk, as.sir, host = "column_with_animal_species", guideline = "CLSI")
|
||||
|
||||
# fast processing with parallel computing:
|
||||
as.sir(your_data, ..., parallel = TRUE)
|
||||
}\if{html}{\out{</div>}}
|
||||
}
|
||||
\item For \strong{interpreting a complete data set}, with automatic determination of MIC values, disk diffusion diameters, microorganism names or codes, and antimicrobial test results. This is done very simply by running \code{as.sir(your_data)}.
|
||||
@ -208,12 +219,20 @@ It is also possible to set the default guideline with the package option \code{\
|
||||
# or to reset:
|
||||
options(AMR_guideline = NULL)
|
||||
}\if{html}{\out{</div>}}
|
||||
}
|
||||
|
||||
For veterinary guidelines, these might be the best options:
|
||||
\subsection{Working with Veterinary Breakpoints}{
|
||||
|
||||
When using veterinary breakpoints (i.e., setting \code{breakpoint_type = "animal"}), a column with animal species must be available or set manually using the \code{host} argument. The column must contain names like "dogs", "cats", "cattle", "swine", "horses", "poultry", or "aquatic". Other animal names like "goats", "rabbits", or "monkeys" are also recognised but may not be available in all guidelines. Matching is case-insensitive and accepts Latin-based synonyms (e.g., "bovine" for cattle and "canine" for dogs).
|
||||
|
||||
Regarding choice of veterinary guidelines, these might be the best options to set before analysis:
|
||||
|
||||
\if{html}{\out{<div class="sourceCode">}}\preformatted{ options(AMR_guideline = "CLSI")
|
||||
options(AMR_breakpoint_type = "animal")
|
||||
}\if{html}{\out{</div>}}
|
||||
\subsection{TODO #187 When applying veterinary breakpoints (by setting \code{host} or by setting \code{breakpoint_type = "animal"}), the \href{https://clsi.org/standards/products/veterinary-medicine/documents/vet09/}{CLSI VET09 guideline} will be applied to cope with missing animal species-specific breakpoints.}{
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
\subsection{After Interpretation}{
|
||||
|
@ -4,7 +4,7 @@
|
||||
\alias{top_n_microorganisms}
|
||||
\title{Filter Top \emph{n} Microorganisms}
|
||||
\usage{
|
||||
top_n_microorganisms(x, n, property = "fullname", n_for_each = NULL,
|
||||
top_n_microorganisms(x, n, property = "species", n_for_each = NULL,
|
||||
col_mo = NULL, ...)
|
||||
}
|
||||
\arguments{
|
||||
@ -12,7 +12,7 @@ top_n_microorganisms(x, n, property = "fullname", n_for_each = NULL,
|
||||
|
||||
\item{n}{An integer specifying the maximum number of unique values of the \code{property} to include in the output.}
|
||||
|
||||
\item{property}{A character string indicating the microorganism property to use for filtering. Must be one of the column names of the \link{microorganisms} data set: "mo", "fullname", "status", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "rank", "ref", "oxygen_tolerance", "source", "lpsn", "lpsn_parent", "lpsn_renamed_to", "mycobank", "mycobank_parent", "mycobank_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence", or "snomed". If \code{NULL}, the raw values from \code{col_mo} will be used without transformation.}
|
||||
\item{property}{A character string indicating the microorganism property to use for filtering. Must be one of the column names of the \link{microorganisms} data set: "mo", "fullname", "status", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "rank", "ref", "oxygen_tolerance", "source", "lpsn", "lpsn_parent", "lpsn_renamed_to", "mycobank", "mycobank_parent", "mycobank_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence", or "snomed". If \code{NULL}, the raw values from \code{col_mo} will be used without transformation. When using \code{"species"} (default) or \code{"subpecies"}, the genus will be added to make sure each (sub)species still belongs to the right genus.}
|
||||
|
||||
\item{n_for_each}{An optional integer specifying the maximum number of rows to retain for each value of the selected property. If \code{NULL}, all rows within the top \emph{n} groups will be included.}
|
||||
|
||||
|
@ -118,6 +118,8 @@ test_that("test-zzz.R", {
|
||||
"kable" = "knitr",
|
||||
"knit_print" = "knitr",
|
||||
"opts_chunk" = "knitr",
|
||||
# parallelly
|
||||
"availableCores" = "parallelly",
|
||||
# pillar
|
||||
"pillar_shaft" = "pillar",
|
||||
"tbl_format_footer" = "pillar",
|
||||
|
Loading…
x
Reference in New Issue
Block a user