1
0
mirror of https://github.com/msberends/AMR.git synced 2025-05-01 07:03:48 +02:00

(v2.1.1.9253) parallel computing

This commit is contained in:
dr. M.S. (Matthijs) Berends 2025-04-26 15:47:00 +02:00
parent abee2a954e
commit 4e7fca3b38
No known key found for this signature in database
13 changed files with 2311 additions and 44 deletions

View File

@ -69,8 +69,7 @@ jobs:
- name: Lint
run: |
# old: lintr::lint_package(linters = lintr::with_defaults(line_length_linter = NULL, trailing_whitespace_linter = NULL, object_name_linter = NULL, cyclocomp_linter = NULL, object_length_linter = lintr::object_length_linter(length = 50L)), exclusions = list("R/aa_helper_pm_functions.R"))
# now get ALL linters, not just default ones
# get ALL linters, not just default ones
linters <- getNamespaceExports(asNamespace("lintr"))
linters <- sort(linters[grepl("_linter$", linters)])
# lose deprecated

View File

@ -1,6 +1,6 @@
Package: AMR
Version: 2.1.1.9252
Date: 2025-04-25
Version: 2.1.1.9253
Date: 2025-04-26
Title: Antimicrobial Resistance Data Analysis
Description: Functions to simplify and standardise antimicrobial resistance (AMR)
data analysis and to work with microbial and antimicrobial properties by
@ -47,6 +47,7 @@ Suggests:
ggplot2,
knitr,
openxlsx,
parallelly,
pillar,
progress,
readxl,

View File

@ -1,4 +1,4 @@
# AMR 2.1.1.9252
# AMR 2.1.1.9253
*(this beta version will eventually become v3.0. We're happy to reach a new major milestone soon, which will be all about the new One Health support! Install this beta using [the instructions here](https://amr-for-r.org/#get-this-package).)*
@ -47,6 +47,7 @@ This package now supports not only tools for AMR data analysis in clinical setti
## Changed
* SIR interpretation
* Support for parallel computing using the `parallel` package (part of base R). Use `as.sir(your_data, parallel = TRUE)` to run SIR interpretation using multiple cores.
* It is now possible to use column names for arguments `guideline`, `ab`, `mo`, and `uti`: `as.sir(..., ab = "column1", mo = "column2", uti = "column3")`. This greatly improves the flexibility for users.
* Users can now set their own criteria (using regular expressions) as to what should be considered S, I, R, SDD, and NI.
* To get quantitative values, `as.double()` on a `sir` object will return 1 for S, 2 for SDD/I, and 3 for R (NI will become `NA`). Other functions using `sir` classes (e.g., `summary()`) are updated to reflect the change to contain NI and SDD.

View File

@ -1589,6 +1589,28 @@ readRDS_AMR <- function(file, refhook = NULL) {
readRDS(con, refhook = refhook)
}
get_n_cores <- function(max_cores = Inf) {
if (pkg_is_available("parallelly", min_version = "0.8.0", also_load = FALSE)) {
available_cores <- import_fn("availableCores", "parallelly")
n_cores <- min(available_cores(), na.rm = TRUE)
} else {
# `parallel` is part of base R since 2.14.0, but detectCores() is not very precise on exotic systems like Docker and quota-set Linux environments
n_cores <- parallel::detectCores()[1]
if (is.na(n_cores)) {
n_cores <- 1
}
}
max_cores <- floor(max_cores)
if (max_cores == 0) {
n_cores <- 1
} else if (max_cores < 0) {
n_cores <- max(1, n_cores - abs(max_cores))
} else if (max_cores > 0) {
n_cores <- min(n_cores, max_cores)
}
n_cores
}
# Faster data.table implementations ----
match <- function(x, table, ...) {

2
R/ab.R
View File

@ -139,7 +139,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, language = get_AMR_locale(),
if (toupper(paste(abnames, collapse = " ")) %in% AMR_env$AB_lookup$generalised_name) {
# if the found values combined is a valid AB, return that
found <- AMR_env$AB_lookup$ab[match(toupper(paste(abnames, collapse = " ")), AMR_env$AB_lookup$generalised_name)][1]
} else {
} else if (isTRUE(info)) {
message_(
"More than one result was found for item ", index, ": ",
vector_and(abnames, quotes = FALSE)

View File

@ -573,6 +573,8 @@ antibiogram.default <- function(x,
for (ab in abx) {
# make sure they are SIR columns
x[, ab] <- as.sir(x[, ab, drop = TRUE])
# set NI as NA
x[[ab]][x[[ab]] == "NI"] <- NA_sir_
}
new_colname <- paste0(trimws(abx), collapse = sep)
if (length(abx) == 1) {
@ -584,7 +586,7 @@ antibiogram.default <- function(x,
} else {
S_values <- "S"
}
other_values <- setdiff(c("S", "SDD", "I", "R", "NI"), S_values)
other_values <- setdiff(c("S", "SDD", "I", "R"), S_values)
x_transposed <- as.list(as.data.frame(t(x[, abx, drop = FALSE]), stringsAsFactors = FALSE))
if (isTRUE(only_all_tested)) {
x[new_colname] <- as.sir(vapply(FUN.VALUE = character(1), x_transposed, function(x) ifelse(anyNA(x), NA_character_, ifelse(any(x %in% S_values), "S", "R")), USE.NAMES = FALSE))

150
R/sir.R
View File

@ -88,6 +88,9 @@
#'
#' # for veterinary breakpoints, also set `host`:
#' your_data %>% mutate_if(is.mic, as.sir, host = "column_with_animal_species", guideline = "CLSI")
#'
#' # fast processing with parallel computing:
#' as.sir(your_data, ..., parallel = TRUE)
#' ```
#' * Operators like "<=" will be stripped before interpretation. When using `capped_mic_handling = "conservative"`, an MIC value of e.g. ">2" will always return "R", even if the breakpoint according to the chosen guideline is ">=4". This is to prevent that capped values from raw laboratory data would not be treated conservatively. The default behaviour (`capped_mic_handling = "standard"`) considers ">2" to be lower than ">=4" and might in this case return "S" or "I".
#' * **Note:** When using CLSI as the guideline, MIC values must be log2-based doubling dilutions. Values not in this format, will be automatically rounded up to the nearest log2 level as CLSI instructs, and a warning will be thrown.
@ -102,6 +105,9 @@
#'
#' # for veterinary breakpoints, also set `host`:
#' your_data %>% mutate_if(is.disk, as.sir, host = "column_with_animal_species", guideline = "CLSI")
#'
#' # fast processing with parallel computing:
#' as.sir(your_data, ..., parallel = TRUE)
#' ```
#'
#' 4. For **interpreting a complete data set**, with automatic determination of MIC values, disk diffusion diameters, microorganism names or codes, and antimicrobial test results. This is done very simply by running `as.sir(your_data)`.
@ -129,14 +135,18 @@
#' options(AMR_guideline = NULL)
#' ```
#'
#' For veterinary guidelines, these might be the best options:
#' ### Working with Veterinary Breakpoints
#'
#' When using veterinary breakpoints (i.e., setting `breakpoint_type = "animal"`), a column with animal species must be available or set manually using the `host` argument. The column must contain names like "dogs", "cats", "cattle", "swine", "horses", "poultry", or "aquatic". Other animal names like "goats", "rabbits", or "monkeys" are also recognised but may not be available in all guidelines. Matching is case-insensitive and accepts Latin-based synonyms (e.g., "bovine" for cattle and "canine" for dogs).
#'
#' Regarding choice of veterinary guidelines, these might be the best options to set before analysis:
#'
#' ```
#' options(AMR_guideline = "CLSI")
#' options(AMR_breakpoint_type = "animal")
#' ```
#'
###### TODO #187 When applying veterinary breakpoints (by setting `host` or by setting `breakpoint_type = "animal"`), the [CLSI VET09 guideline](https://clsi.org/standards/products/veterinary-medicine/documents/vet09/) will be applied to cope with missing animal species-specific breakpoints.
#' ###### TODO #187 When applying veterinary breakpoints (by setting `host` or by setting `breakpoint_type = "animal"`), the [CLSI VET09 guideline](https://clsi.org/standards/products/veterinary-medicine/documents/vet09/) will be applied to cope with missing animal species-specific breakpoints.
#'
#' ### After Interpretation
#'
@ -657,6 +667,8 @@ as.sir.disk <- function(x,
}
#' @rdname as.sir
#' @param parallel A [logical] to indicate if parallel computing must be used, defaults to `FALSE`.
#' @param max_cores Maximum number of cores to use if `parallel = TRUE`. Use a negative value to subtract that number from the available number of cores, e.g. a value of `-2` on an 8-core machine means that 6 cores will be used. Defaults to `-1`. The available number of cores are detected using [parallelly::availableCores()] if that package is installed, and base \R's [parallel::detectCores()] otherwise.
#' @export
as.sir.data.frame <- function(x,
...,
@ -673,6 +685,8 @@ as.sir.data.frame <- function(x,
host = NULL,
verbose = FALSE,
info = TRUE,
parallel = FALSE,
max_cores = -1,
conserve_capped_values = NULL) {
meet_criteria(x, allow_class = "data.frame") # will also check for dimensions > 0
meet_criteria(col_mo, allow_class = "character", is_in = colnames(x), allow_NULL = TRUE)
@ -688,6 +702,9 @@ as.sir.data.frame <- function(x,
meet_criteria(host, allow_class = c("character", "factor"), allow_NULL = TRUE, allow_NA = TRUE)
meet_criteria(verbose, allow_class = "logical", has_length = 1)
meet_criteria(info, allow_class = "logical", has_length = 1)
meet_criteria(parallel, allow_class = "logical", has_length = 1)
meet_criteria(max_cores, allow_class = c("numeric", "integer"), has_length = 1)
x.bak <- x
for (i in seq_len(ncol(x))) {
# don't keep factors, overwriting them is hard
@ -788,7 +805,7 @@ as.sir.data.frame <- function(x,
return(FALSE)
}
if (length(sel) == 0 || (length(sel) > 0 && ab %in% sel)) {
ab_coerced <- suppressWarnings(as.ab(ab))
ab_coerced <- suppressWarnings(as.ab(ab, info = info))
if (is.na(ab_coerced) || (length(sel) > 0 & !ab %in% sel)) {
# not even a valid AB code
return(FALSE)
@ -818,19 +835,25 @@ as.sir.data.frame <- function(x,
if (is.null(col_mo.bak)) {
col_mo <- search_type_in_df(x = x, type = "mo")
}
x_mo <- as.mo(x[, col_mo, drop = TRUE])
x_mo <- as.mo(x[, col_mo, drop = TRUE], info = info)
}
for (i in seq_along(ab_cols)) {
# set up parallel computing
n_cores <- get_n_cores(max_cores = max_cores)
run_as_sir_column <- function(i) {
ab_col <- ab_cols[i]
out <- list(result = NULL, log = NULL)
if (types[i] == "mic") {
x[, ab_cols[i]] <- x %pm>%
pm_pull(ab_cols[i]) %pm>%
result <- x %pm>%
pm_pull(ab_col) %pm>%
as.character() %pm>%
as.mic() %pm>%
as.sir(
mo = x_mo,
mo.bak = x[, col_mo, drop = TRUE],
ab = ab_cols[i],
ab = ab_col,
guideline = guideline,
uti = uti,
capped_mic_handling = capped_mic_handling,
@ -846,15 +869,19 @@ as.sir.data.frame <- function(x,
conserve_capped_values = conserve_capped_values,
is_data.frame = TRUE
)
out$result <- result
out$log <- AMR_env$sir_interpretation_history
AMR_env$sir_interpretation_history <- AMR_env$sir_interpretation_history[0, , drop = FALSE] # reset log
return(out)
} else if (types[i] == "disk") {
x[, ab_cols[i]] <- x %pm>%
pm_pull(ab_cols[i]) %pm>%
result <- x %pm>%
pm_pull(ab_col) %pm>%
as.character() %pm>%
as.disk() %pm>%
as.sir(
mo = x_mo,
mo.bak = x[, col_mo, drop = TRUE],
ab = ab_cols[i],
ab = ab_col,
guideline = guideline,
uti = uti,
add_intrinsic_resistance = add_intrinsic_resistance,
@ -868,38 +895,99 @@ as.sir.data.frame <- function(x,
info = info,
is_data.frame = TRUE
)
out$result <- result
out$log <- AMR_env$sir_interpretation_history
AMR_env$sir_interpretation_history <- AMR_env$sir_interpretation_history[0, , drop = FALSE]
return(out)
} else if (types[i] == "sir") {
ab <- ab_col
ab_coerced <- suppressWarnings(as.ab(ab, info = info))
show_message <- FALSE
ab <- ab_cols[i]
ab_coerced <- suppressWarnings(as.ab(ab))
if (!all(x[, ab_cols[i], drop = TRUE] %in% c("S", "SDD", "I", "R", "NI", NA), na.rm = TRUE)) {
if (!all(x[, ab, drop = TRUE] %in% c("S", "SDD", "I", "R", "NI", NA), na.rm = TRUE)) {
show_message <- TRUE
# only print message if values are not already clean
if (isTRUE(info)) {
message_("Cleaning values in column '", font_bold(ab), "' (",
ifelse(ab_coerced != toupper(ab), paste0(ab_coerced, ", "), ""),
ab_name(ab_coerced, tolower = TRUE), ")... ",
ab_name(ab_coerced, tolower = TRUE, info = info), ")... ",
appendLF = FALSE,
as_note = FALSE
)
}
} else if (!is.sir(x.bak[, ab_cols[i], drop = TRUE])) {
} else if (!is.sir(x.bak[, ab, drop = TRUE])) {
show_message <- TRUE
# only print message if class not already set
if (isTRUE(info)) {
message_("Assigning class 'sir' to already clean column '", font_bold(ab), "' (",
ifelse(ab_coerced != toupper(ab), paste0(ab_coerced, ", "), ""),
ab_name(ab_coerced, tolower = TRUE, language = NULL), ")... ",
ab_name(ab_coerced, tolower = TRUE, language = NULL, info = info), ")... ",
appendLF = FALSE,
as_note = FALSE
)
}
}
x[, ab_cols[i]] <- as.sir.default(x = as.character(x[, ab_cols[i], drop = TRUE]))
result <- as.sir.default(x = as.character(x[, ab, drop = TRUE]))
if (show_message == TRUE && isTRUE(info)) {
message(font_green_bg(" OK "))
}
out$result <- result
out$log <- NULL
return(out)
}
return(out)
}
if (isTRUE(parallel) && n_cores > 1 && length(ab_cols) > 1) {
if (isTRUE(info)) {
message()
message_("Running SIR interpretation in parallel mode on ", nr2char(length(ab_cols)), " columns, using ", n_cores, " out of ", get_n_cores(Inf), " cores...", as_note = FALSE, appendLF = FALSE, add_fn = font_red)
}
if (.Platform$OS.type == "windows") {
cl <- parallel::makeCluster(n_cores, type = "PSOCK")
on.exit(parallel::stopCluster(cl), add = TRUE)
parallel::clusterExport(cl, varlist = c(
"x", "x.bak", "x_mo", "ab_cols", "types",
"capped_mic_handling", "add_intrinsic_resistance",
"reference_data", "substitute_missing_r_breakpoint", "include_screening", "include_PKPD",
"breakpoint_type", "guideline", "host", "uti", "info", "verbose",
"col_mo", "AMR_env", "conserve_capped_values",
"run_as_sir_column"
), envir = environment())
result_list <- parallel::parLapply(cl, seq_along(ab_cols), run_as_sir_column)
} else {
result_list <- parallel::mclapply(seq_along(ab_cols), run_as_sir_column, mc.cores = n_cores)
}
if (isTRUE(info)) {
message_(" Done.", appendLF = TRUE, as_note = FALSE, add_fn = font_red)
message()
message_("Run `sir_interpretation_history()` to retrieve a logbook with all the details of the breakpoint interpretations.", add_fn = font_green)
}
} else {
# sequential mode (non-parallel)
if (n_cores > 1 && isTRUE(info) && (NROW(x) > 2500 || length(ab_cols) >= 5)) {
# give a note that parallel mode might be better
message()
message_("Running SIR interpretation in sequential mode. Consider setting `parallel = TRUE` to speed up processing on multiple cores.\n", add_fn = font_red)
}
# this will contain a progress bar already
result_list <- lapply(seq_along(ab_cols), run_as_sir_column)
}
# bind results back to x
for (i in seq_along(ab_cols)) {
x[, ab_cols[i]] <- result_list[[i]]$result
}
# combine all sir_interpretation_history
sir_logs_all <- lapply(result_list, function(x) x$log)
sir_logs_all <- Filter(Negate(is.null), sir_logs_all) # remove NULLs early
if (length(sir_logs_all) > 0) {
rbindlist <- import_fn("rbindlist", "data.table", error_on_fail = FALSE)
if (!is.null(rbindlist)) {
sir_logs_all <- rbindlist(sir_logs_all, fill = TRUE, ignore.attr = TRUE)
} else {
sir_logs_all <- do.call(rbind, sir_logs_all)
}
AMR_env$sir_interpretation_history <- rbind_AMR(AMR_env$sir_interpretation_history, sir_logs_all)
}
x
@ -1155,7 +1243,7 @@ as_sir_method <- function(method_short,
}
ab.bak <- trimws2(ab)
ab <- suppressWarnings(as.ab(ab))
ab <- suppressWarnings(as.ab(ab, info = info))
if (!is.null(list(...)$mo.bak)) {
mo.bak <- list(...)$mo.bak
} else {
@ -1200,7 +1288,7 @@ as_sir_method <- function(method_short,
# format agents ----
agent_formatted <- paste0("'", font_bold(ab.bak, collapse = NULL), "'")
agent_name <- ab_name(ab, tolower = TRUE, language = NULL)
agent_name <- ab_name(ab, tolower = TRUE, language = NULL, info = info)
same_ab <- generalise_antibiotic_name(ab) == generalise_antibiotic_name(agent_name)
same_ab.bak <- generalise_antibiotic_name(ab.bak) == generalise_antibiotic_name(agent_name)
agent_formatted[same_ab.bak] <- paste0(agent_formatted[same_ab.bak], " (", ab[same_ab.bak], ")")
@ -1233,7 +1321,7 @@ as_sir_method <- function(method_short,
rise_warning <- FALSE
rise_notes <- FALSE
method_coerced <- toupper(method)
ab_coerced <- as.ab(ab)
ab_coerced <- as.ab(ab, info = info)
if (identical(reference_data, AMR::clinical_breakpoints)) {
breakpoints <- reference_data %pm>%
@ -1266,7 +1354,7 @@ as_sir_method <- function(method_short,
# CLSI in log 2 ----
# CLSI says: if MIC is not a log2 value it must be rounded up to the nearest log2 value
log2_levels <- as.double(VALID_MIC_LEVELS[which(VALID_MIC_LEVELS %in% 2^c(-20:20))])
test_values <- df$values[which(df$guideline %like% "CLSI")]
test_values <- df$values
test_values_dbl <- as.double(test_values)
test_values_dbl[test_values %like% "^>[0-9]"] <- test_values_dbl[test_values %like% "^>[0-9]"] + 0.0000001
test_values_dbl[test_values %like% "^<[0-9]"] <- test_values_dbl[test_values %like% "^>[0-9]"] - 0.0000001
@ -1275,7 +1363,7 @@ as_sir_method <- function(method_short,
test_values_dbl,
function(mic_val) {
if (is.na(mic_val)) {
return(NA_character_)
return(NA_real_)
} else {
# find the smallest log2 level that is >= mic_val
log2_val <- log2_levels[which(log2_levels >= as.double(mic_val))][1]
@ -1290,7 +1378,7 @@ as_sir_method <- function(method_short,
}
}
)
df$values[which(df$guideline %like% "CLSI" & test_values != test_outcome)] <- test_outcome[which(test_values != test_outcome)]
df$values[which(df$guideline %like% "CLSI" & test_values != test_outcome)] <- test_outcome[which(df$guideline %like% "CLSI" & test_values != test_outcome)]
}
df$values <- as.mic(df$values)
} else if (method == "disk") {
@ -1342,7 +1430,7 @@ as_sir_method <- function(method_short,
paste0(font_rose_bg(" WARNING "), "\n"),
font_black(paste0(
" ", AMR_env$bullet_icon, " No ", method_coerced, " breakpoints available for ",
suppressMessages(suppressWarnings(ab_name(unique(ab_coerced), language = NULL, tolower = TRUE))),
suppressMessages(suppressWarnings(ab_name(unique(ab_coerced), language = NULL, tolower = TRUE, info = info))),
" (", unique(ab_coerced), ")."
), collapse = "\n")
)
@ -1420,7 +1508,7 @@ as_sir_method <- function(method_short,
mo_formatted <- font_italic(mo_formatted, collapse = NULL)
}
ab_formatted <- paste0(
suppressMessages(suppressWarnings(ab_name(ab_current, language = NULL, tolower = TRUE))),
suppressMessages(suppressWarnings(ab_name(ab_current, language = NULL, tolower = TRUE, info = info))),
" (", ab_current, ")"
)
@ -1813,7 +1901,7 @@ freq.sir <- function(x, ...) {
}
))[1L]
}
ab <- suppressMessages(suppressWarnings(as.ab(x_name)))
ab <- suppressMessages(suppressWarnings(as.ab(x_name, info = FALSE)))
digits <- list(...)$digits
if (is.null(digits)) {
digits <- 2
@ -1822,7 +1910,7 @@ freq.sir <- function(x, ...) {
cleaner::freq.default(
x = x, ...,
.add_header = list(
Drug = paste0(ab_name(ab, language = NULL), " (", ab, ", ", paste(ab_atc(ab), collapse = "/"), ")"),
Drug = paste0(ab_name(ab, language = NULL, info = info), " (", ab, ", ", paste(ab_atc(ab, info = info), collapse = "/"), ")"),
`Drug group` = ab_group(ab, language = NULL),
`%SI` = trimws(percentage(susceptibility(x, minimum = 0, as_percent = FALSE),
digits = digits
@ -1854,7 +1942,7 @@ get_skimmers.sir <- function(column) {
vars <- tryCatch(eval(parse(text = ".data$skim_variable$sir"), envir = frms[[ind]]),
error = function(e) NULL
)
tryCatch(ab_name(as.character(calls[[length(calls)]][[2]]), language = NULL),
tryCatch(ab_name(as.character(calls[[length(calls)]][[2]]), language = NULL, info = FALSE),
error = function(e) NA_character_
)
} else {

View File

@ -32,7 +32,7 @@
#' This function filters a data set to include only the top *n* microorganisms based on a specified property, such as taxonomic family or genus. For example, it can filter a data set to the top 3 species, or to any species in the top 5 genera, or to the top 3 species in each of the top 5 genera.
#' @param x A data frame containing microbial data.
#' @param n An integer specifying the maximum number of unique values of the `property` to include in the output.
#' @param property A character string indicating the microorganism property to use for filtering. Must be one of the column names of the [microorganisms] data set: `r vector_or(colnames(microorganisms), sort = FALSE, quotes = TRUE)`. If `NULL`, the raw values from `col_mo` will be used without transformation.
#' @param property A character string indicating the microorganism property to use for filtering. Must be one of the column names of the [microorganisms] data set: `r vector_or(colnames(microorganisms), sort = FALSE, quotes = TRUE)`. If `NULL`, the raw values from `col_mo` will be used without transformation. When using `"species"` (default) or `"subpecies"`, the genus will be added to make sure each (sub)species still belongs to the right genus.
#' @param n_for_each An optional integer specifying the maximum number of rows to retain for each value of the selected property. If `NULL`, all rows within the top *n* groups will be included.
#' @param col_mo A character string indicating the column in `x` that contains microorganism names or codes. Defaults to the first column of class [`mo`]. Values will be coerced using [as.mo()].
#' @param ... Additional arguments passed on to [mo_property()] when `property` is not `NULL`.
@ -54,7 +54,7 @@
#' top_n_microorganisms(example_isolates,
#' n = 5, property = "genus", n_for_each = 3
#' )
top_n_microorganisms <- function(x, n, property = "fullname", n_for_each = NULL, col_mo = NULL, ...) {
top_n_microorganisms <- function(x, n, property = "species", n_for_each = NULL, col_mo = NULL, ...) {
meet_criteria(x, allow_class = "data.frame") # also checks dimensions to be >0
meet_criteria(n, allow_class = c("numeric", "integer"), has_length = 1, is_finite = TRUE, is_positive = TRUE)
meet_criteria(property, allow_class = "character", has_length = 1, is_in = colnames(AMR::microorganisms))
@ -71,6 +71,10 @@ top_n_microorganisms <- function(x, n, property = "fullname", n_for_each = NULL,
if (is.null(property)) {
x$prop_val <- x[[col_mo]]
} else if (property == "species") {
x$prop_val <- paste(mo_genus(x[[col_mo]], ...), mo_species(x[[col_mo]], ...))
} else if (property == "subspecies") {
x$prop_val <- paste(mo_genus(x[[col_mo]], ...), mo_species(x[[col_mo]], ...), mo_subspecies(x[[col_mo]], ...))
} else {
x$prop_val <- mo_property(x[[col_mo]], property = property, ...)
}

View File

@ -73,6 +73,7 @@ AMR_env$sir_interpretation_history <- data.frame(
ref_table = character(0),
uti = logical(0),
breakpoint_S_R = character(0),
site = character(0),
stringsAsFactors = FALSE
)

2128
data-raw/parallel_test_sir.R Normal file

File diff suppressed because it is too large Load Diff

View File

@ -65,7 +65,8 @@ is_sir_eligible(x, threshold = 0.05)
FALSE), include_screening = getOption("AMR_include_screening", FALSE),
include_PKPD = getOption("AMR_include_PKPD", TRUE),
breakpoint_type = getOption("AMR_breakpoint_type", "human"), host = NULL,
verbose = FALSE, info = TRUE, conserve_capped_values = NULL)
verbose = FALSE, info = TRUE, parallel = FALSE, max_cores = -1,
conserve_capped_values = NULL)
sir_interpretation_history(clean = FALSE)
}
@ -136,6 +137,10 @@ The default \code{"standard"} setting ensures cautious handling of uncertain val
\item{col_mo}{Column name of the names or codes of the microorganisms (see \code{\link[=as.mo]{as.mo()}}) - the default is the first column of class \code{\link{mo}}. Values will be coerced using \code{\link[=as.mo]{as.mo()}}.}
\item{parallel}{A \link{logical} to indicate if parallel computing must be used, defaults to \code{FALSE}.}
\item{max_cores}{Maximum number of cores to use if \code{parallel = TRUE}. Use a negative value to subtract that number from the available number of cores, e.g. a value of \code{-2} on an 8-core machine means that 6 cores will be used. Defaults to \code{-1}. The available number of cores are detected using \code{\link[parallelly:availableCores]{parallelly::availableCores()}} if that package is installed, and base \R's \code{\link[parallel:detectCores]{parallel::detectCores()}} otherwise.}
\item{clean}{A \link{logical} to indicate whether previously stored results should be forgotten after returning the 'logbook' with results.}
}
\value{
@ -164,6 +169,9 @@ your_data \%>\% mutate_if(is.mic, as.sir, ab = c("cipro", "ampicillin", ...), mo
# for veterinary breakpoints, also set `host`:
your_data \%>\% mutate_if(is.mic, as.sir, host = "column_with_animal_species", guideline = "CLSI")
# fast processing with parallel computing:
as.sir(your_data, ..., parallel = TRUE)
}\if{html}{\out{</div>}}
\item Operators like "<=" will be stripped before interpretation. When using \code{capped_mic_handling = "conservative"}, an MIC value of e.g. ">2" will always return "R", even if the breakpoint according to the chosen guideline is ">=4". This is to prevent that capped values from raw laboratory data would not be treated conservatively. The default behaviour (\code{capped_mic_handling = "standard"}) considers ">2" to be lower than ">=4" and might in this case return "S" or "I".
\item \strong{Note:} When using CLSI as the guideline, MIC values must be log2-based doubling dilutions. Values not in this format, will be automatically rounded up to the nearest log2 level as CLSI instructs, and a warning will be thrown.
@ -179,6 +187,9 @@ your_data \%>\% mutate_if(is.disk, as.sir, ab = c("cipro", "ampicillin", ...), m
# for veterinary breakpoints, also set `host`:
your_data \%>\% mutate_if(is.disk, as.sir, host = "column_with_animal_species", guideline = "CLSI")
# fast processing with parallel computing:
as.sir(your_data, ..., parallel = TRUE)
}\if{html}{\out{</div>}}
}
\item For \strong{interpreting a complete data set}, with automatic determination of MIC values, disk diffusion diameters, microorganism names or codes, and antimicrobial test results. This is done very simply by running \code{as.sir(your_data)}.
@ -208,12 +219,20 @@ It is also possible to set the default guideline with the package option \code{\
# or to reset:
options(AMR_guideline = NULL)
}\if{html}{\out{</div>}}
}
For veterinary guidelines, these might be the best options:
\subsection{Working with Veterinary Breakpoints}{
When using veterinary breakpoints (i.e., setting \code{breakpoint_type = "animal"}), a column with animal species must be available or set manually using the \code{host} argument. The column must contain names like "dogs", "cats", "cattle", "swine", "horses", "poultry", or "aquatic". Other animal names like "goats", "rabbits", or "monkeys" are also recognised but may not be available in all guidelines. Matching is case-insensitive and accepts Latin-based synonyms (e.g., "bovine" for cattle and "canine" for dogs).
Regarding choice of veterinary guidelines, these might be the best options to set before analysis:
\if{html}{\out{<div class="sourceCode">}}\preformatted{ options(AMR_guideline = "CLSI")
options(AMR_breakpoint_type = "animal")
}\if{html}{\out{</div>}}
\subsection{TODO #187 When applying veterinary breakpoints (by setting \code{host} or by setting \code{breakpoint_type = "animal"}), the \href{https://clsi.org/standards/products/veterinary-medicine/documents/vet09/}{CLSI VET09 guideline} will be applied to cope with missing animal species-specific breakpoints.}{
}
}
\subsection{After Interpretation}{

View File

@ -4,7 +4,7 @@
\alias{top_n_microorganisms}
\title{Filter Top \emph{n} Microorganisms}
\usage{
top_n_microorganisms(x, n, property = "fullname", n_for_each = NULL,
top_n_microorganisms(x, n, property = "species", n_for_each = NULL,
col_mo = NULL, ...)
}
\arguments{
@ -12,7 +12,7 @@ top_n_microorganisms(x, n, property = "fullname", n_for_each = NULL,
\item{n}{An integer specifying the maximum number of unique values of the \code{property} to include in the output.}
\item{property}{A character string indicating the microorganism property to use for filtering. Must be one of the column names of the \link{microorganisms} data set: "mo", "fullname", "status", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "rank", "ref", "oxygen_tolerance", "source", "lpsn", "lpsn_parent", "lpsn_renamed_to", "mycobank", "mycobank_parent", "mycobank_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence", or "snomed". If \code{NULL}, the raw values from \code{col_mo} will be used without transformation.}
\item{property}{A character string indicating the microorganism property to use for filtering. Must be one of the column names of the \link{microorganisms} data set: "mo", "fullname", "status", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "rank", "ref", "oxygen_tolerance", "source", "lpsn", "lpsn_parent", "lpsn_renamed_to", "mycobank", "mycobank_parent", "mycobank_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence", or "snomed". If \code{NULL}, the raw values from \code{col_mo} will be used without transformation. When using \code{"species"} (default) or \code{"subpecies"}, the genus will be added to make sure each (sub)species still belongs to the right genus.}
\item{n_for_each}{An optional integer specifying the maximum number of rows to retain for each value of the selected property. If \code{NULL}, all rows within the top \emph{n} groups will be included.}

View File

@ -118,6 +118,8 @@ test_that("test-zzz.R", {
"kable" = "knitr",
"knit_print" = "knitr",
"opts_chunk" = "knitr",
# parallelly
"availableCores" = "parallelly",
# pillar
"pillar_shaft" = "pillar",
"tbl_format_footer" = "pillar",