diff --git a/DESCRIPTION b/DESCRIPTION index d737e261c..b817b7a57 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 3.0.1.9052 -Date: 2026-04-25 +Version: 3.0.1.9053 +Date: 2026-04-27 Title: Antimicrobial Resistance Data Analysis Description: Functions to simplify and standardise antimicrobial resistance (AMR) data analysis and to work with microbial and antimicrobial properties by @@ -37,17 +37,18 @@ Authors@R: c( person(given = c("Casper", "J."), family = "Albers", role = "ths", comment = c(ORCID = "0000-0002-9213-6743")), person(given = c("Corinna"), family = "Glasner", role = "ths", comment = c(ORCID = "0000-0003-1241-1328"))) Depends: R (>= 3.0.0) -Suggests: +Suggests: cleaner, cli, crayon, curl, data.table, dplyr, + future, + future.apply, ggplot2, knitr, openxlsx, - parallelly, pillar, progress, readxl, diff --git a/NEWS.md b/NEWS.md index 42ca28b1e..9ae3064af 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,13 @@ -# AMR 3.0.1.9052 +# AMR 3.0.1.9053 + +This will become release v3.1.0, intended for launch end of May. ### New * Support for clinical breakpoints of 2026 of both CLSI and EUCAST, by adding all of their over 5,700 new clinical breakpoints to the `clinical_breakpoints` data set for usage in `as.sir()`. EUCAST 2026 is now the new default guideline for all MIC and disk diffusion interpretations. -* Integration with the **tidymodels** framework to allow seamless use of SIR, MIC and disk data in modelling pipelines via `recipes` +* Support for the [`future`](https://future.futureverse.org) package and its framework, as the previous implementation of parallel computing was slow + - **Breaking change**: `as.sir()` with `parallel = TRUE` now requires a non-sequential `future::plan()` to be active before the call — e.g., `future::plan(future::multisession)` — and throws an informative error if none is set. + - New all-core usage setup: when the number of AB columns is smaller than the number of available cores, rows are now split into batches so all cores stay active (row-batch mode). Previously, a 6-column dataset on a 16-core machine would only use 6 cores; now all 16 are used, with each worker processing a smaller row slice (lower per-worker memory pressure and processing time) +* Integration with the *tidymodels* framework to allow seamless use of SIR, MIC and disk data in modelling pipelines via `recipes` - `step_mic_log2()` to transform `` columns with log2, and `step_sir_numeric()` to convert `` columns to numeric - New `tidyselect` helpers: - `all_sir()`, `all_sir_predictors()` @@ -21,7 +26,6 @@ * Two new `NA` objects, `NA_ab_` and `NA_mo_`, analogous to base R's `NA_character_` and `NA_integer_`, for use in pipelines that require typed missing values ### Fixes -* Fixed multiple bugs in the `parallel = TRUE` mode of `as.sir()` for data frames * Fixed a bug in `as.sir()` where values that were purely numeric (e.g., `"1"`) and matched the broad SIR-matching regex would be incorrectly stripped of all content by the Unicode letter filter * Fixed a bug in `as.mic()` where MIC values in scientific notation (e.g., `"1e-3"`) were incorrectly handled because the letter `e` was removed along with other Unicode letters; scientific notation `e` is now preserved * Fixed a bug in `as.ab()` where certain AB codes containing "PH" or "TH" (such as `ETH`, `MTH`, `PHE`, `PHN`, `STH`, `THA`, `THI1`) would incorrectly return `NA` when combined in a vector with any untranslatable value (#245) @@ -37,8 +41,7 @@ * Fixed BRMO classification by including bacterial complexes (#275) * Fixed `as.sir()` for data frames silently deleting columns whose AB class was already `` when called a second time (re-running on already-converted data) (#278) * Fixed `as.sir()` for data frames incorrectly treating metadata columns (e.g. `patient`, `ward`) as antibiotic columns when their names coincidentally matched an antibiotic code; column content is now validated against AMR data patterns before inclusion -* Improved parallel computing in `as.sir()`: when the number of AB columns is smaller than the number of available cores, rows are now split into batches so all cores stay active (row-batch mode). Previously, a 6-column dataset on a 16-core machine would only use 6 cores; now all 16 are used, with each worker processing a smaller row slice (lower per-worker memory pressure) -* Fixed `as.sir()` ignoring `info = FALSE` for columns with no breakpoints (e.g. cefoxitin against *E. coli*): an operator-precedence bug (`&&`/`||`) caused the "Interpreting MIC values" intro message to fire unconditionally when `nrow(breakpoints) == 0`, regardless of `info`; the progress bar title was also not gated by `info` +* Fixed `as.sir()` ignoring `info = FALSE` for columns with no breakpoints (e.g. cefoxitin against *E. coli*) ### Updates * `as.sir()` with `reference_data`: custom guideline names now correctly classify values as R using EUCAST convention (`> breakpoint_R` for MIC, `< breakpoint_R` for disk); custom breakpoints with `host = NA` now serve as a host-agnostic fallback when no host-specific row matches (#239) @@ -56,7 +59,6 @@ * This results in more reliable behaviour compared to previous versions for capped MIC values * Removed the `"inverse"` option, which has now become redundant * `ab_group()` now returns values consist with the AMR selectors (#246) -* Added two new `NA` objects, `NA_ab_` and `NA_mo_`, analogous to base R's `NA_character_` and `NA_integer_`, for use in pipelines that require typed missing values # AMR 3.0.1 diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R index bae37579f..d321816aa 100644 --- a/R/aa_helper_functions.R +++ b/R/aa_helper_functions.R @@ -1681,28 +1681,6 @@ readRDS_AMR <- function(file, refhook = NULL) { readRDS(con, refhook = refhook) } -get_n_cores <- function(max_cores = Inf) { - if (pkg_is_available("parallelly", min_version = "0.8.0", also_load = FALSE)) { - available_cores <- import_fn("availableCores", "parallelly") - n_cores <- min(available_cores(), na.rm = TRUE) - } else { - # `parallel` is part of base R since 2.14.0, but detectCores() is not very precise on exotic systems like Docker and quota-set Linux environments - n_cores <- parallel::detectCores()[1] - if (is.na(n_cores)) { - n_cores <- 1 - } - } - max_cores <- floor(max_cores) - if (max_cores == 0) { - n_cores <- 1 - } else if (max_cores < 0) { - n_cores <- max(1, n_cores - abs(max_cores)) - } else if (max_cores > 0) { - n_cores <- min(n_cores, max_cores) - } - n_cores -} - # Support `where()` if tidyselect not installed ---- if (!is.null(import_fn("where", "tidyselect", error_on_fail = FALSE))) { # tidyselect::where() exists, retrieve from their namespace to make `where()`s work across the package in default arguments diff --git a/R/antibiogram.R b/R/antibiogram.R index a614ef7ac..6a8d086e5 100755 --- a/R/antibiogram.R +++ b/R/antibiogram.R @@ -1206,7 +1206,7 @@ retrieve_wisca_parameters <- function(wisca_model, ...) { #' @rawNamespace if(getRversion() >= "3.0.0") S3method(pillar::tbl_sum, antibiogram) tbl_sum.antibiogram <- function(x, ...) { dims <- paste(format(NROW(x), big.mark = ","), AMR_env$cross_icon, format(NCOL(x), big.mark = ",")) - names(dims) <- "An Antibiogram" + names(dims) <- "An antibiogram" if (isTRUE(attributes(x)$wisca)) { dims <- c(dims, Type = paste0("WISCA with ", attributes(x)$conf_interval * 100, "% CI")) } else if (isTRUE(attributes(x)$formatting_type >= 13)) { @@ -1226,8 +1226,7 @@ tbl_format_footer.antibiogram <- function(x, ...) { } c(footer, font_subtle(paste0( "# Use `ggplot2::autoplot()` or base R `plot()` to create a plot of this antibiogram,\n", - "# or use it directly in R Markdown or ", - font_url("https://quarto.org", "Quarto"), ", see ", word_wrap("?antibiogram") + "# or use it directly in R Markdown or Quarto, see ", word_wrap("?antibiogram") ))) } diff --git a/R/bug_drug_combinations.R b/R/bug_drug_combinations.R index 6ab0f1f73..a2d78971a 100755 --- a/R/bug_drug_combinations.R +++ b/R/bug_drug_combinations.R @@ -129,16 +129,21 @@ bug_drug_combinations <- function(x, # turn and merge everything pivot <- lapply(x_mo_filter, function(x) { m <- as.matrix(table(as.sir(x), useNA = "always")) + na_idx <- which(is.na(rownames(m))) + get_row <- function(lbl) { + idx <- which(rownames(m) == lbl) + if (length(idx) == 1L) unname(m[idx, ]) else rep(0L, ncol(m)) + } data.frame( - S = m["S", ], - SDD = m["SDD", ], - I = m["I", ], - R = m["R", ], - NI = m["NI", ], - WT = m["WT", ], - NWT = m["NWT", ], - NS = m["NS", ], - na = m[which(is.na(rownames(m))), ], + S = get_row("S"), + SDD = get_row("SDD"), + I = get_row("I"), + R = get_row("R"), + NI = get_row("NI"), + WT = get_row("WT"), + NWT = get_row("NWT"), + NS = get_row("NS"), + na = if (length(na_idx) == 1L) unname(m[na_idx, ]) else rep(0L, ncol(m)), stringsAsFactors = FALSE ) }) diff --git a/R/sir.R b/R/sir.R index 2d7f38379..f316d179b 100755 --- a/R/sir.R +++ b/R/sir.R @@ -95,7 +95,7 @@ VALID_SIR_LEVELS <- c("S", "SDD", "I", "R", "NI", "WT", "NWT", "NS") #' # for veterinary breakpoints, also set `host`: #' your_data %>% mutate_if(is.mic, as.sir, host = "column_with_animal_species", guideline = "CLSI") #' -#' # fast processing with parallel computing: +#' # fast processing with parallel computing (requires future.apply): #' as.sir(your_data, ..., parallel = TRUE) #' ``` #' * Operators like "<=" will be considered according to the `capped_mic_handling` setting. At default, an MIC value of e.g. ">2" will return "NI" (non-interpretable) if the breakpoint is 4-8; the *true* MIC could be at either side of the breakpoint. This is to prevent that capped values from raw laboratory data would not be treated conservatively. @@ -112,7 +112,7 @@ VALID_SIR_LEVELS <- c("S", "SDD", "I", "R", "NI", "WT", "NWT", "NS") #' # for veterinary breakpoints, also set `host`: #' your_data %>% mutate_if(is.disk, as.sir, host = "column_with_animal_species", guideline = "CLSI") #' -#' # fast processing with parallel computing: +#' # fast processing with parallel computing (requires future.apply): #' as.sir(your_data, ..., parallel = TRUE) #' ``` #' @@ -220,9 +220,6 @@ VALID_SIR_LEVELS <- c("S", "SDD", "I", "R", "NI", "WT", "NWT", "NS") #' sir_interpretation_history() #' #' \donttest{ -#' # using parallel computing, which is available in base R: -#' as.sir(df_wide, parallel = TRUE, info = TRUE) -#' #' #' ## Using dplyr ------------------------------------------------- #' if (require("dplyr")) { @@ -716,8 +713,7 @@ as.sir.disk <- function(x, } #' @rdname as.sir -#' @param parallel A [logical] to indicate if parallel computing must be used, defaults to `FALSE`. The `parallel` package is part of base \R and no additional packages are required. On Unix/macOS with \R >= 4.0.0, [parallel::mclapply()] (fork-based) is used; on Windows and \R < 4.0.0, [parallel::parLapply()] with a PSOCK cluster is used (requires the AMR package to be installed, not just loaded via `devtools::load_all()`). Parallelism distributes columns across cores; it is most beneficial when there are many antibiotic columns and a large number of rows. -#' @param max_cores Maximum number of cores to use if `parallel = TRUE`. Use a negative value to subtract that number from the available number of cores, e.g. a value of `-2` on an 8-core machine means that at most 6 cores will be used. Defaults to `-1`. There will never be used more cores than variables to analyse. The available number of cores are detected using [parallelly::availableCores()] if that package is installed, and base \R's [parallel::detectCores()] otherwise. +#' @param parallel A [logical] to indicate if parallel computing must be used, defaults to `FALSE`. Requires the [`future.apply`][future.apply::future_lapply()] package. **A non-sequential [future::plan()] must already be active before setting `parallel = TRUE`** — for example, `future::plan(future::multisession)`. An error is thrown if `parallel = TRUE` is used without a plan set by the user. Parallelism distributes columns (and optionally row batches) across workers; it is most beneficial when there are many antibiotic columns and a large number of rows. #' @export as.sir.data.frame <- function(x, ..., @@ -737,7 +733,6 @@ as.sir.data.frame <- function(x, verbose = FALSE, info = interactive(), parallel = FALSE, - max_cores = -1, conserve_capped_values = NULL) { meet_criteria(x, allow_class = "data.frame") # will also check for dimensions > 0 meet_criteria(col_mo, allow_class = "character", is_in = colnames(x), allow_NULL = TRUE) @@ -756,7 +751,6 @@ as.sir.data.frame <- function(x, meet_criteria(verbose, allow_class = "logical", has_length = 1) meet_criteria(info, allow_class = "logical", has_length = 1) meet_criteria(parallel, allow_class = "logical", has_length = 1) - meet_criteria(max_cores, allow_class = c("numeric", "integer"), has_length = 1) x.bak <- x if (isTRUE(info) && message_not_thrown_before("as.sir", "sir_interpretation_history")) { @@ -911,40 +905,34 @@ as.sir.data.frame <- function(x, } # set up parallel computing - n_cores <- get_n_cores(max_cores = max_cores) - n_cores <- min(n_cores, length(ab_cols)) # never more cores than variables required - if (isTRUE(parallel) && (.Platform$OS.type == "windows" || getRversion() < "4.0.0")) { - cl <- tryCatch(parallel::makeCluster(n_cores, type = "PSOCK"), - error = function(e) { - if (isTRUE(info)) { - message_("Could not create parallel cluster, using single-core computation. Error message: ", conditionMessage(e)) - } - return(NULL) - } - ) - if (!is.null(cl)) { - # Each PSOCK worker is a fresh R session — the AMR package must be loaded there - # so all exported functions (as.sir, as.mic, as.disk, ...) are available. - amr_loaded_on_workers <- tryCatch({ - parallel::clusterEvalQ(cl, library(AMR, quietly = TRUE)) - TRUE - }, error = function(e) FALSE) - if (!amr_loaded_on_workers) { - if (isTRUE(info)) { - message_("Could not load AMR on parallel workers (package may not be installed); falling back to single-core computation.") - } - parallel::stopCluster(cl) - cl <- NULL - } - } - if (is.null(cl)) { - n_cores <- 1 + if (requireNamespace("future.apply", quietly = TRUE) && !inherits(future::plan(), "sequential")) { + if (isFALSE(parallel)) { + message_("Assuming {.code parallel = TRUE} since parallel computing has been set up using the {.pkg future} package before. Set {.help [{.fun plan}](future::plan)} to sequential to prevent this.") } + parallel <- TRUE } + if (isTRUE(parallel)) { + stop_ifnot( + requireNamespace("future.apply", quietly = TRUE), + "Setting {.code parallel = TRUE} requires the {.pkg future.apply} package.\n", + "Install it with {.code install.packages(\"future.apply\")}." + ) + stop_if(inherits(future::plan(), "sequential"), + "Setting {.code parallel = TRUE} requires a non-sequential {.help [{.fun future::plan}](future::plan)} to be active.\n", + "For your system, you could first run: {.code library(future); ", + ifelse(.Platform$OS.type == "windows" || in_rstudio(), + "plan(multisession)", + "plan(multicore)" + ), + "}", + call = FALSE + ) - if (isTRUE(info)) { - message_(as_note = FALSE) # empty line - message_("Processing columns:", as_note = FALSE) + n_workers <- future::nbrOfWorkers() + n_cores <- min(n_workers, length(ab_cols)) + } else { + n_workers <- 1L + n_cores <- 1L } # In parallel mode suppress per-column messages: workers print simultaneously and @@ -952,31 +940,23 @@ as.sir.data.frame <- function(x, is_parallel_run <- isTRUE(parallel) && n_cores > 1 && length(ab_cols) > 1 effective_info <- if (is_parallel_run) FALSE else info - # Row-batch mode: when n_cols < n_cores we would leave cores idle under plain - # column-parallel dispatch. Instead we split rows into pieces so every core - # gets work. pieces_per_col = ceil(n_cores / n_cols) gives ~n_cores jobs + # Row-batch mode: when n_cols < n_workers we would leave workers idle under plain + # column-parallel dispatch. Instead we split rows into pieces so every worker + # gets work. pieces_per_col = ceil(n_workers / n_cols) gives ~n_workers jobs # total; each job processes one column on one row slice, which also reduces # per-worker memory pressure (smaller breakpoints search space). - # Only used for the fork path (R >= 4.0, non-Windows); PSOCK clusters already - # incur high per-job serialisation overhead so we keep column-mode there. - use_fork <- is_parallel_run && - !(.Platform$OS.type == "windows" || getRversion() < "4.0.0") - pieces_per_col <- if (use_fork && length(ab_cols) < n_cores) { - ceiling(n_cores / length(ab_cols)) + if (is_parallel_run && length(ab_cols) < n_workers) { + pieces_per_col <- ceiling(n_workers / length(ab_cols)) } else { - 1L + pieces_per_col <- 1L } run_as_sir_column <- function(i, rows = NULL) { - # Always resolve AMR_env from the package namespace. This is essential for - # PSOCK workers (where the closure-captured AMR_env is a stale serialised copy - # while as.sir() writes to the live AMR:::AMR_env) and also avoids capturing - # pre-existing log entries from earlier in the session when forking. + # Always resolve AMR_env from the package namespace so workers get the live + # environment rather than a stale serialised copy from the closure. .amr_env <- get("AMR_env", envir = asNamespace("AMR"), inherits = FALSE) - # In parallel mode each worker (fork or PSOCK) has its own copy of the - # history; record the current length so we capture only the new rows added - # by the as.sir() call below, not any pre-existing entries inherited at fork - # time or carried over from earlier as.sir() calls. + # In parallel mode each worker has its own copy of the history; record the + # current length so we capture only the rows added by this as.sir() call. if (is_parallel_run) pre_log_n <- NROW(.amr_env$sir_interpretation_history) ab_col <- ab_cols[i] @@ -1057,7 +1037,7 @@ as.sir.data.frame <- function(x, ab <- ab_col ab_coerced <- suppressWarnings(as.ab(ab, info = FALSE)) show_message <- FALSE - if (!all(x[row_idx, ab, drop = TRUE] %in% c("S", "SDD", "I", "R", "NI", NA), na.rm = TRUE)) { + if (!all(x[row_idx, ab, drop = TRUE] %in% c(VALID_SIR_LEVELS, NA), na.rm = TRUE)) { show_message <- TRUE if (isTRUE(effective_info)) { message_("\u00a0\u00a0", .amr_env$bullet_icon, " Cleaning values in column ", paste0("{.field ", font_bold(ab), "}"), " (", @@ -1090,31 +1070,17 @@ as.sir.data.frame <- function(x, return(out) } - if (isTRUE(parallel) && n_cores > 1 && length(ab_cols) > 1) { + if (is_parallel_run) { if (isTRUE(info)) { message_(as_note = FALSE) if (pieces_per_col > 1L) { - message_("Running in parallel mode using ", n_cores, " out of ", get_n_cores(Inf), " cores, on columns ", vector_and(paste0("{.field ", font_bold(ab_cols, collapse = NULL), "}"), quotes = FALSE, sort = FALSE), " (", pieces_per_col, " row slices per column)...", as_note = FALSE, appendLF = FALSE) + message_("Running in parallel mode using ", n_cores, " workers, on columns ", vector_and(paste0("{.field ", font_bold(ab_cols, collapse = NULL), "}"), quotes = FALSE, sort = FALSE), " (", pieces_per_col, " row slices per column)...", as_note = FALSE, appendLF = FALSE) } else { - message_("Running in parallel mode using ", n_cores, " out of ", get_n_cores(Inf), " cores, on columns ", vector_and(paste0("{.field ", font_bold(ab_cols, collapse = NULL), "}"), quotes = FALSE, sort = FALSE), "...", as_note = FALSE, appendLF = FALSE) + message_("Running in parallel mode using ", n_cores, " workers, on columns ", vector_and(paste0("{.field ", font_bold(ab_cols, collapse = NULL), "}"), quotes = FALSE, sort = FALSE), "...", as_note = FALSE, appendLF = FALSE) } } - if (.Platform$OS.type == "windows" || getRversion() < "4.0.0") { - # PSOCK cluster: column-mode only (row-batch serialisation overhead not worth it) - on.exit(parallel::stopCluster(cl), add = TRUE) - parallel::clusterExport(cl, varlist = c( - "x", "x.bak", "x_mo", "ab_cols", "types", - "capped_mic_handling", "as_wt_nwt", "add_intrinsic_resistance", - "reference_data", "substitute_missing_r_breakpoint", "include_screening", "include_PKPD", - "breakpoint_type", "guideline", "host", "uti", "verbose", - "col_mo", "conserve_capped_values", - "effective_info", "is_parallel_run", - "run_as_sir_column" - ), envir = environment()) - result_list <- parallel::parLapply(cl, seq_along(ab_cols), run_as_sir_column) - } else if (pieces_per_col > 1L) { - # Row-batch mode (R >= 4.0, non-Windows, n_cols < n_cores): - # build (col, row_slice) job pairs so all cores stay active + if (pieces_per_col > 1L) { + # Row-batch mode: build (col, row_slice) job pairs so all workers stay active row_cuts <- unique(round(seq(0, nrow(x), length.out = pieces_per_col + 1L))) row_ranges <- lapply(seq_len(length(row_cuts) - 1L), function(p) { seq.int(row_cuts[p] + 1L, row_cuts[p + 1L]) @@ -1122,23 +1088,23 @@ as.sir.data.frame <- function(x, jobs <- do.call(c, lapply(seq_along(ab_cols), function(ci) { lapply(seq_along(row_ranges), function(p) list(col = ci, rows = row_ranges[[p]])) })) - flat <- parallel::mclapply(jobs, function(job) { + flat <- future.apply::future_lapply(jobs, function(job) { run_as_sir_column(job$col, job$rows) - }, mc.cores = n_cores) + }, future.seed = TRUE) # Reassemble: for each column concatenate row pieces in order result_list <- lapply(seq_along(ab_cols), function(ci) { pieces <- flat[vapply(jobs, function(j) j$col == ci, logical(1L))] list( result = as.sir(do.call(c, lapply(pieces, function(p) as.character(p$result)))), - log = { + log = { logs <- Filter(Negate(is.null), lapply(pieces, function(p) p$log)) if (length(logs) > 0L) do.call(rbind_AMR, logs) else NULL } ) }) } else { - # Column-parallel mode (R >= 4.0, non-Windows, n_cols >= n_cores) - result_list <- parallel::mclapply(seq_along(ab_cols), run_as_sir_column, mc.cores = n_cores) + # Column-parallel mode: one job per antibiotic column + result_list <- future.apply::future_lapply(seq_along(ab_cols), run_as_sir_column, future.seed = TRUE) } if (isTRUE(info)) { message_(font_green_bg("\u00a0DONE\u00a0"), as_note = FALSE) @@ -1148,9 +1114,16 @@ as.sir.data.frame <- function(x, } else { # sequential mode (non-parallel) if (isTRUE(info) && n_cores > 1 && NROW(x) * NCOL(x) > 10000) { - # give a note that parallel mode might be better + suggest <- ifelse(.Platform$OS.type == "windows" || in_rstudio(), + "plan(multisession)", + "plan(multicore)" + ) message_(as_note = FALSE) - message_("Running in sequential mode. Consider setting {.arg parallel} to {.code TRUE} to speed up processing on multiple cores.\n") + if (requireNamespace("future.apply", quietly = TRUE)) { + message_("Running in sequential mode. To speed up processing, set a parallel {.help [{.fun future::plan}](future::plan)} such as {.code ", suggest, "}.") + } else { + message_("Running in sequential mode. To speed up processing, install the {.pkg future.apply} package and then set {.code parallel = TRUE}.\n") + } } # this will contain a progress bar already result_list <- lapply(seq_along(ab_cols), run_as_sir_column) @@ -1280,7 +1253,7 @@ as_sir_method <- function(method_short, # backward compatibilty dots <- list(...) - dots <- dots[which(!names(dots) %in% c("warn", "mo.bak", "is_data.frame"))] + dots <- dots[which(!names(dots) %in% c("warn", "mo.bak", "is_data.frame", "as_wt_nwt"))] if (length(dots) != 0) { warning_("These arguments in {.help [{.fun as.sir}](AMR::as.sir)} are no longer used: ", vector_and(names(dots), quotes = "`"), ".", call = FALSE) } @@ -2121,7 +2094,7 @@ sir_interpretation_history <- function(clean = FALSE) { #' @noRd print.sir_log <- function(x, ...) { if (NROW(x) == 0) { - message_("No results to print. First run {.help [{.fun as.sir}](AMR::as.sir)} on MIC values or disk diffusion zones (or on a {.cls data.frame} containing any of these) to print a {.val logbook} data set here.") + message_("No results to print. First run {.help [{.fun as.sir}](AMR::as.sir)} on MIC values or disk diffusion zones (or on a {.cls data.frame} containing any of these) to print a 'logbook' data set here.") return(invisible(NULL)) } class(x) <- class(x)[class(x) != "sir_log"] @@ -2363,7 +2336,7 @@ coerce_reference_data_columns <- function(x) { ref <- AMR::clinical_breakpoints for (col in names(ref)) { col_ref <- ref[[col]] - col_x <- x[[col]] + col_x <- x[[col]] if (identical(class(col_ref), class(col_x))) next if (col == "mo") { x[[col]] <- suppressMessages(as.mo(col_x)) diff --git a/index.md b/index.md index c512a8175..78a02c0a6 100644 --- a/index.md +++ b/index.md @@ -26,12 +26,9 @@

- amr-for-r.org

-

- doi.org/10.18637/jss.v104.i03

@@ -174,24 +171,26 @@ example_isolates %>% #> ℹ Using column mo as input for `mo_fullname()` #> ℹ Using column mo as input for `mo_is_gram_negative()` #> ℹ Using column mo as input for `mo_is_intrinsic_resistant()` -#> ℹ Determining intrinsic resistance based on 'EUCAST Expected Resistant -#> Phenotypes' v1.2 (2023). This note will be shown once per session. -#> ℹ For `aminoglycosides()` using columns GEN (gentamicin), TOB (tobramycin), AMK -#> (amikacin), and KAN (kanamycin) -#> ℹ For `carbapenems()` using columns IPM (imipenem) and MEM (meropenem) +#> ℹ Determining intrinsic resistance based on 'EUCAST Expected +#> Resistant Phenotypes' v1.2 (2023). This note will be shown +#> once per session. +#> ℹ For `aminoglycosides()` using columns GEN (gentamicin), TOB +#> (tobramycin), AMK (amikacin), and KAN (kanamycin) +#> ℹ For `carbapenems()` using columns IPM (imipenem) and MEM +#> (meropenem) #> # A tibble: 35 × 7 -#> bacteria GEN TOB AMK KAN IPM MEM -#> -#> 1 Pseudomonas aeruginosa I S NA R S NA -#> 2 Pseudomonas aeruginosa I S NA R S NA -#> 3 Pseudomonas aeruginosa I S NA R S NA -#> 4 Pseudomonas aeruginosa S S S R NA S -#> 5 Pseudomonas aeruginosa S S S R S S -#> 6 Pseudomonas aeruginosa S S S R S S -#> 7 Stenotrophomonas maltophilia R R R R R R -#> 8 Pseudomonas aeruginosa S S S R NA S -#> 9 Pseudomonas aeruginosa S S S R NA S -#> 10 Pseudomonas aeruginosa S S S R S S +#> bacteria GEN TOB AMK KAN IPM MEM +#> +#> 1 Pseudomonas aer… I S NA R S NA +#> 2 Pseudomonas aer… I S NA R S NA +#> 3 Pseudomonas aer… I S NA R S NA +#> 4 Pseudomonas aer… S S S R NA S +#> 5 Pseudomonas aer… S S S R S S +#> 6 Pseudomonas aer… S S S R S S +#> 7 Stenotrophomona… R R R R R R +#> 8 Pseudomonas aer… S S S R NA S +#> 9 Pseudomonas aer… S S S R NA S +#> 10 Pseudomonas aer… S S S R S S #> # ℹ 25 more rows ``` @@ -215,23 +214,24 @@ output format automatically (such as markdown, LaTeX, HTML, etc.). ``` r antibiogram(example_isolates, antimicrobials = c(aminoglycosides(), carbapenems())) -#> ℹ For `aminoglycosides()` using columns GEN (gentamicin), TOB (tobramycin), AMK -#> (amikacin), and KAN (kanamycin) -#> ℹ For `carbapenems()` using columns IPM (imipenem) and MEM (meropenem) +#> ℹ For `aminoglycosides()` using columns GEN (gentamicin), TOB +#> (tobramycin), AMK (amikacin), and KAN (kanamycin) +#> ℹ For `carbapenems()` using columns IPM (imipenem) and MEM +#> (meropenem) ``` -| Pathogen | Amikacin | Gentamicin | Imipenem | Kanamycin | Meropenem | Tobramycin | -|:---|:---|:---|:---|:---|:---|:---| -| CoNS | 0% (0-8%,N=43) | 86% (82-90%,N=309) | 52% (37-67%,N=48) | 0% (0-8%,N=43) | 52% (37-67%,N=48) | 22% (12-35%,N=55) | -| *E. coli* | 100% (98-100%,N=171) | 98% (96-99%,N=460) | 100% (99-100%,N=422) | NA | 100% (99-100%,N=418) | 97% (96-99%,N=462) | -| *E. faecalis* | 0% (0-9%,N=39) | 0% (0-9%,N=39) | 100% (91-100%,N=38) | 0% (0-9%,N=39) | NA | 0% (0-9%,N=39) | -| *K. pneumoniae* | NA | 90% (79-96%,N=58) | 100% (93-100%,N=51) | NA | 100% (93-100%,N=53) | 90% (79-96%,N=58) | -| *P. aeruginosa* | NA | 100% (88-100%,N=30) | NA | 0% (0-12%,N=30) | NA | 100% (88-100%,N=30) | -| *P. mirabilis* | NA | 94% (80-99%,N=34) | 94% (79-99%,N=32) | NA | NA | 94% (80-99%,N=34) | -| *S. aureus* | NA | 99% (97-100%,N=233) | NA | NA | NA | 98% (92-100%,N=86) | -| *S. epidermidis* | 0% (0-8%,N=44) | 79% (71-85%,N=163) | NA | 0% (0-8%,N=44) | NA | 51% (40-61%,N=89) | -| *S. hominis* | NA | 92% (84-97%,N=80) | NA | NA | NA | 85% (74-93%,N=62) | -| *S. pneumoniae* | 0% (0-3%,N=117) | 0% (0-3%,N=117) | NA | 0% (0-3%,N=117) | NA | 0% (0-3%,N=117) | +| Pathogen | Amikacin | Gentamicin | Imipenem | Kanamycin | Meropenem | Tobramycin | +|:-----------------|:---------------------|:--------------------|:---------------------|:----------------|:---------------------|:--------------------| +| CoNS | 0% (0-8%,N=43) | 86% (82-90%,N=309) | 52% (37-67%,N=48) | 0% (0-8%,N=43) | 52% (37-67%,N=48) | 22% (12-35%,N=55) | +| *E. coli* | 100% (98-100%,N=171) | 98% (96-99%,N=460) | 100% (99-100%,N=422) | NA | 100% (99-100%,N=418) | 97% (96-99%,N=462) | +| *E. faecalis* | 0% (0-9%,N=39) | 0% (0-9%,N=39) | 100% (91-100%,N=38) | 0% (0-9%,N=39) | NA | 0% (0-9%,N=39) | +| *K. pneumoniae* | NA | 90% (79-96%,N=58) | 100% (93-100%,N=51) | NA | 100% (93-100%,N=53) | 90% (79-96%,N=58) | +| *P. aeruginosa* | NA | 100% (88-100%,N=30) | NA | 0% (0-12%,N=30) | NA | 100% (88-100%,N=30) | +| *P. mirabilis* | NA | 94% (80-99%,N=34) | 94% (79-99%,N=32) | NA | NA | 94% (80-99%,N=34) | +| *S. aureus* | NA | 99% (97-100%,N=233) | NA | NA | NA | 98% (92-100%,N=86) | +| *S. epidermidis* | 0% (0-8%,N=44) | 79% (71-85%,N=163) | NA | 0% (0-8%,N=44) | NA | 51% (40-61%,N=89) | +| *S. hominis* | NA | 92% (84-97%,N=80) | NA | NA | NA | 85% (74-93%,N=62) | +| *S. pneumoniae* | 0% (0-3%,N=117) | 0% (0-3%,N=117) | NA | 0% (0-3%,N=117) | NA | 0% (0-3%,N=117) | In combination antibiograms, it is clear that combined antimicrobials yield higher empiric coverage: @@ -242,10 +242,10 @@ antibiogram(example_isolates, mo_transform = "gramstain") ``` -| Pathogen | Piperacillin/tazobactam | Piperacillin/tazobactam + Gentamicin | Piperacillin/tazobactam + Tobramycin | -|:---|:---|:---|:---| -| Gram-negative | 88% (85-91%,N=641) | 99% (97-99%,N=691) | 98% (97-99%,N=693) | -| Gram-positive | 86% (82-89%,N=345) | 98% (96-98%,N=1044) | 95% (93-97%,N=550) | +| Pathogen | Piperacillin/tazobactam | Piperacillin/tazobactam + Gentamicin | Piperacillin/tazobactam + Tobramycin | +|:--------------|:------------------------|:-------------------------------------|:-------------------------------------| +| Gram-negative | 88% (85-91%,N=641) | 99% (97-99%,N=691) | 98% (97-99%,N=693) | +| Gram-positive | 86% (82-89%,N=345) | 98% (96-98%,N=1044) | 95% (93-97%,N=550) | Like many other functions in this package, `antibiogram()` comes with support for 28 languages that are often detected automatically based on @@ -318,16 +318,18 @@ example_isolates %>% summarise(across(c(GEN, TOB), list(total_R = resistance, conf_int = function(x) sir_confidence_interval(x, collapse = "-")))) -#> ℹ `resistance()` assumes the EUCAST guideline and thus considers the 'I' -#> category susceptible. Set the `guideline` argument or the `AMR_guideline` -#> option to either "CLSI" or "EUCAST", see `?AMR-options`. +#> ℹ `resistance()` assumes the EUCAST guideline and thus +#> considers the 'I' category susceptible. Set the `guideline` +#> argument or the `AMR_guideline` option to either "CLSI" or +#> "EUCAST", see `?AMR-options`. #> ℹ This message will be shown once per session. #> # A tibble: 3 × 5 -#> ward GEN_total_R GEN_conf_int TOB_total_R TOB_conf_int -#> -#> 1 Clinical 0.229 0.205-0.254 0.315 0.284-0.347 -#> 2 ICU 0.290 0.253-0.33 0.400 0.353-0.449 -#> 3 Outpatient 0.2 0.131-0.285 0.368 0.254-0.493 +#> ward GEN_total_R GEN_conf_int TOB_total_R +#> +#> 1 Clinical 0.229 0.205-0.254 0.315 +#> 2 ICU 0.290 0.253-0.33 0.400 +#> 3 Outpatient 0.2 0.131-0.285 0.368 +#> # ℹ 1 more variable: TOB_conf_int ``` Or use [antimicrobial @@ -344,15 +346,16 @@ out <- example_isolates %>% # calculate AMR using resistance(), over all aminoglycosides and polymyxins: summarise(across(c(aminoglycosides(), polymyxins()), resistance)) -#> ℹ For `aminoglycosides()` using columns GEN (gentamicin), TOB (tobramycin), AMK -#> (amikacin), and KAN (kanamycin) +#> ℹ For `aminoglycosides()` using columns GEN (gentamicin), TOB +#> (tobramycin), AMK (amikacin), and KAN (kanamycin) #> ℹ For `polymyxins()` using column COL (colistin) #> Warning: There was 1 warning in `summarise()`. -#> ℹ In argument: `across(c(aminoglycosides(), polymyxins()), resistance)`. +#> ℹ In argument: `across(c(aminoglycosides(), polymyxins()), +#> resistance)`. #> ℹ In group 3: `ward = "Outpatient"`. #> Caused by warning: -#> ! Introducing NA: only 23 results available for KAN in group: ward = "Outpatient" -#> (whilst `minimum = 30`). +#> ! Introducing NA: only 23 results available for KAN in group: +#> ward = "Outpatient" (whilst `minimum = 30`). out #> # A tibble: 3 × 6 #> ward GEN TOB AMK KAN COL @@ -366,11 +369,12 @@ out # transform the antibiotic columns to names: out %>% set_ab_names() #> # A tibble: 3 × 6 -#> ward gentamicin tobramycin amikacin kanamycin colistin -#> -#> 1 Clinical 0.229 0.315 0.626 1 0.780 -#> 2 ICU 0.290 0.400 0.662 1 0.857 -#> 3 Outpatient 0.2 0.368 0.605 NA 0.889 +#> ward gentamicin tobramycin amikacin kanamycin +#> +#> 1 Clinical 0.229 0.315 0.626 1 +#> 2 ICU 0.290 0.400 0.662 1 +#> 3 Outpatient 0.2 0.368 0.605 NA +#> # ℹ 1 more variable: colistin ``` ``` r diff --git a/man/as.sir.Rd b/man/as.sir.Rd index 1121f8ff9..c08986e2e 100644 --- a/man/as.sir.Rd +++ b/man/as.sir.Rd @@ -73,7 +73,7 @@ is_sir_eligible(x, threshold = 0.05) include_PKPD = getOption("AMR_include_PKPD", TRUE), breakpoint_type = getOption("AMR_breakpoint_type", "human"), host = NULL, language = get_AMR_locale(), verbose = FALSE, info = interactive(), - parallel = FALSE, max_cores = -1, conserve_capped_values = NULL) + parallel = FALSE, conserve_capped_values = NULL) sir_interpretation_history(clean = FALSE) } @@ -150,9 +150,7 @@ The default \code{"conservative"} setting ensures cautious handling of uncertain \item{col_mo}{Column name of the names or codes of the microorganisms (see \code{\link[=as.mo]{as.mo()}}) - the default is the first column of class \code{\link{mo}}. Values will be coerced using \code{\link[=as.mo]{as.mo()}}.} -\item{parallel}{A \link{logical} to indicate if parallel computing must be used, defaults to \code{FALSE}. The \code{parallel} package is part of base \R and no additional packages are required. On Unix/macOS with \R >= 4.0.0, \code{\link[parallel:mclapply]{parallel::mclapply()}} (fork-based) is used; on Windows and \R < 4.0.0, \code{\link[parallel:clusterApply]{parallel::parLapply()}} with a PSOCK cluster is used (requires the AMR package to be installed, not just loaded via \code{devtools::load_all()}). Parallelism distributes columns across cores; it is most beneficial when there are many antibiotic columns and a large number of rows.} - -\item{max_cores}{Maximum number of cores to use if \code{parallel = TRUE}. Use a negative value to subtract that number from the available number of cores, e.g. a value of \code{-2} on an 8-core machine means that at most 6 cores will be used. Defaults to \code{-1}. There will never be used more cores than variables to analyse. The available number of cores are detected using \code{\link[parallelly:availableCores]{parallelly::availableCores()}} if that package is installed, and base \R's \code{\link[parallel:detectCores]{parallel::detectCores()}} otherwise.} +\item{parallel}{A \link{logical} to indicate if parallel computing must be used, defaults to \code{FALSE}. Requires the \code{\link[future.apply:future_lapply]{future.apply}} package. \strong{A non-sequential \code{\link[future:plan]{future::plan()}} must already be active before setting \code{parallel = TRUE}} — for example, \code{future::plan(future::multisession)}. An error is thrown if \code{parallel = TRUE} is used without a plan set by the user. Parallelism distributes columns (and optionally row batches) across workers; it is most beneficial when there are many antibiotic columns and a large number of rows.} \item{clean}{A \link{logical} to indicate whether previously stored results should be forgotten after returning the 'logbook' with results.} } @@ -183,7 +181,7 @@ your_data \%>\% mutate_if(is.mic, as.sir, ab = c("cipro", "ampicillin", ...), mo # for veterinary breakpoints, also set `host`: your_data \%>\% mutate_if(is.mic, as.sir, host = "column_with_animal_species", guideline = "CLSI") -# fast processing with parallel computing: +# fast processing with parallel computing (requires future.apply): as.sir(your_data, ..., parallel = TRUE) }\if{html}{\out{
}} \item Operators like "<=" will be considered according to the \code{capped_mic_handling} setting. At default, an MIC value of e.g. ">2" will return "NI" (non-interpretable) if the breakpoint is 4-8; the \emph{true} MIC could be at either side of the breakpoint. This is to prevent that capped values from raw laboratory data would not be treated conservatively. @@ -201,7 +199,7 @@ your_data \%>\% mutate_if(is.disk, as.sir, ab = c("cipro", "ampicillin", ...), m # for veterinary breakpoints, also set `host`: your_data \%>\% mutate_if(is.disk, as.sir, host = "column_with_animal_species", guideline = "CLSI") -# fast processing with parallel computing: +# fast processing with parallel computing (requires future.apply): as.sir(your_data, ..., parallel = TRUE) }\if{html}{\out{}} } @@ -313,9 +311,6 @@ as.sir(df_wide) sir_interpretation_history() \donttest{ -# using parallel computing, which is available in base R: -as.sir(df_wide, parallel = TRUE, info = TRUE) - ## Using dplyr ------------------------------------------------- if (require("dplyr")) { diff --git a/tests/testthat/test-sir.R b/tests/testthat/test-sir.R index 2e877c8d2..a648a0e83 100644 --- a/tests/testthat/test-sir.R +++ b/tests/testthat/test-sir.R @@ -408,13 +408,13 @@ test_that("test-sir.R", { # Issue #278: re-running as.sir() on already- data must preserve columns df_already_sir <- data.frame( - mo = "B_ESCHR_COLI", + mo = "B_ESCHR_COLI", AMC = as.mic(c("1", "2", "4")), GEN = sample(c("S", "I", "R"), 3, replace = TRUE), stringsAsFactors = FALSE ) - first_pass <- suppressMessages(as.sir(df_already_sir, col_mo = "mo", info = FALSE)) - second_pass <- suppressMessages(as.sir(first_pass, col_mo = "mo", info = FALSE)) + first_pass <- suppressMessages(as.sir(df_already_sir, col_mo = "mo", info = FALSE)) + second_pass <- suppressMessages(as.sir(first_pass, col_mo = "mo", info = FALSE)) expect_equal(ncol(first_pass), ncol(second_pass)) expect_true(is.sir(second_pass[["AMC"]])) expect_true(is.sir(second_pass[["GEN"]])) @@ -424,15 +424,15 @@ test_that("test-sir.R", { # Issue #278: metadata columns whose names coincidentally match antibiotic # codes (e.g. 'patient' -> OXY, 'ward' -> PRU) must not be processed df_meta <- data.frame( - mo = "B_ESCHR_COLI", + mo = "B_ESCHR_COLI", patient = paste0("Pt_", 1:20), - ward = rep(c("ICU", "Surgery", "Outpatient", "ED"), 5), - AMC = as.mic(rep(c("1", "2", "4", "8"), 5)), + ward = rep(c("ICU", "Surgery", "Outpatient", "ED"), 5), + AMC = as.mic(rep(c("1", "2", "4", "8"), 5)), stringsAsFactors = FALSE ) df_meta_sir <- suppressMessages(as.sir(df_meta, col_mo = "mo", info = FALSE)) expect_true("patient" %in% colnames(df_meta_sir)) - expect_true("ward" %in% colnames(df_meta_sir)) + expect_true("ward" %in% colnames(df_meta_sir)) expect_false(is.sir(df_meta_sir[["patient"]])) expect_false(is.sir(df_meta_sir[["ward"]])) expect_true(is.sir(df_meta_sir[["AMC"]])) @@ -441,92 +441,111 @@ test_that("test-sir.R", { # Tests must pass even when only 1 core is available; parallel = TRUE then # silently falls back to sequential, but results must still be identical. - set.seed(42) - n_par <- 200 - df_par <- data.frame( - mo = "B_ESCHR_COLI", - AMC = as.mic(sample(c("0.25", "0.5", "1", "2", "4", "8", "16", "32"), n_par, TRUE)), - GEN = as.mic(sample(c("0.5", "1", "2", "4", "8", "16", "32", "64"), n_par, TRUE)), - CIP = as.mic(sample(c("0.001", "0.002", "0.004", "0.008", "0.016", "0.032"), n_par, TRUE)), - PEN = sample(c("S", "I", "R", NA_character_), n_par, TRUE), - stringsAsFactors = FALSE - ) + if (AMR:::pkg_is_available("future.apply")) { + set.seed(42) + n_par <- 200 + df_par <- data.frame( + mo = "B_ESCHR_COLI", + AMC = as.mic(sample(c("0.25", "0.5", "1", "2", "4", "8", "16", "32"), n_par, TRUE)), + GEN = as.mic(sample(c("0.5", "1", "2", "4", "8", "16", "32", "64"), n_par, TRUE)), + CIP = as.mic(sample(c("0.001", "0.002", "0.004", "0.008", "0.016", "0.032"), n_par, TRUE)), + PEN = sample(c("S", "I", "R", NA_character_), n_par, TRUE), + stringsAsFactors = FALSE + ) - # clear any existing history before comparing - sir_interpretation_history(clean = TRUE) - sir_seq <- suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE)) - log_seq <- sir_interpretation_history(clean = TRUE) + # clear any existing history before comparing + sir_interpretation_history(clean = TRUE) + sir_seq <- suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE)) + log_seq <- sir_interpretation_history(clean = TRUE) - sir_par <- suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE, parallel = TRUE)) - log_par <- sir_interpretation_history(clean = TRUE) + future::plan(future::multicore) + n_max_workers <- future::nbrOfWorkers() - # 1. parallel = TRUE gives identical SIR results to sequential - expect_identical(sir_seq[["AMC"]], sir_par[["AMC"]]) - expect_identical(sir_seq[["GEN"]], sir_par[["GEN"]]) - expect_identical(sir_seq[["CIP"]], sir_par[["CIP"]]) - expect_identical(sir_seq[["PEN"]], sir_par[["PEN"]]) + sir_par <- suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE, parallel = TRUE)) + log_par <- sir_interpretation_history(clean = TRUE) - # 2. same number of log rows as sequential - expect_equal(nrow(log_seq), nrow(log_par)) + # 1. parallel = TRUE gives identical SIR results to sequential + expect_identical(sir_seq[["AMC"]], sir_par[["AMC"]]) + expect_identical(sir_seq[["GEN"]], sir_par[["GEN"]]) + expect_identical(sir_seq[["CIP"]], sir_par[["CIP"]]) + expect_identical(sir_seq[["PEN"]], sir_par[["PEN"]]) - # 3. pre-existing log entries must not be duplicated - # run sequential once to populate the history, then run parallel and - # verify the new parallel run adds exactly as many rows as sequential - sir_interpretation_history(clean = TRUE) - suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE)) # populate history - pre_n <- nrow(sir_interpretation_history()) - suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE, parallel = TRUE)) - post_n <- nrow(sir_interpretation_history()) - expect_equal(post_n - pre_n, nrow(log_seq)) # exactly one run's worth of new rows - sir_interpretation_history(clean = TRUE) + # 2. same number of log rows as sequential + expect_equal(nrow(log_seq), nrow(log_par)) - # 4. two sequential runs and two parallel runs yield identical results - sir_par2 <- suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE, parallel = TRUE)) - expect_identical(sir_par[["AMC"]], sir_par2[["AMC"]]) - expect_identical(sir_par[["GEN"]], sir_par2[["GEN"]]) + # 3. pre-existing log entries must not be duplicated + # run sequential once to populate the history, then run parallel and + # verify the new parallel run adds exactly as many rows as sequential + sir_interpretation_history(clean = TRUE) + future::plan(future::sequential) + suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE)) # populate history + pre_n <- nrow(sir_interpretation_history()) + future::plan(future::multicore) + suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE, parallel = TRUE)) + post_n <- nrow(sir_interpretation_history()) + expect_equal(post_n - pre_n, nrow(log_seq)) # exactly one run's worth of new rows + sir_interpretation_history(clean = TRUE) - # 5. max_cores = 1 gives same results as default sequential - sir_mc1 <- suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE, parallel = TRUE, max_cores = 1L)) - expect_identical(sir_seq[["AMC"]], sir_mc1[["AMC"]]) - expect_identical(sir_seq[["GEN"]], sir_mc1[["GEN"]]) + # 4. two sequential runs and two parallel runs yield identical results + sir_par2 <- suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE, parallel = TRUE)) + expect_identical(sir_par[["AMC"]], sir_par2[["AMC"]]) + expect_identical(sir_par[["GEN"]], sir_par2[["GEN"]]) - # 6. max_cores = 2 and max_cores = 3 give same results as sequential - sir_mc2 <- suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE, parallel = TRUE, max_cores = 2L)) - sir_mc3 <- suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE, parallel = TRUE, max_cores = 3L)) - expect_identical(sir_seq[["AMC"]], sir_mc2[["AMC"]]) - expect_identical(sir_seq[["GEN"]], sir_mc3[["GEN"]]) + # 5. used cores = 1 gives same results as default sequential + future::plan(future::multicore, workers = 1) + sir_mc1 <- suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE, parallel = TRUE)) + expect_identical(sir_seq[["AMC"]], sir_mc1[["AMC"]]) + expect_identical(sir_seq[["GEN"]], sir_mc1[["GEN"]]) - # 7. single-column data frame falls back silently to sequential - df_single <- df_par[, c("mo", "AMC")] - sir_single_seq <- suppressMessages(as.sir(df_single, col_mo = "mo", info = FALSE)) - sir_single_par <- suppressMessages(as.sir(df_single, col_mo = "mo", info = FALSE, parallel = TRUE)) - expect_identical(sir_single_seq[["AMC"]], sir_single_par[["AMC"]]) + # 6. used cores = 2 and used cores = 3 give same results as sequential + if (n_max_workers >= 3) { + future::plan(future::multicore, workers = 2) + sir_mc2 <- suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE, parallel = TRUE)) + future::plan(future::multicore, workers = 3) + sir_mc3 <- suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE, parallel = TRUE)) + expect_identical(sir_seq[["AMC"]], sir_mc2[["AMC"]]) + expect_identical(sir_seq[["GEN"]], sir_mc3[["GEN"]]) + } - # 9. row-batch mode (n_cols < n_cores): force row splitting via max_cores and - # verify identical output to sequential for a dataset with 2 AB columns so - # pieces_per_col = ceiling(max_cores / 2) >= 2 and row batching activates - df_wide <- data.frame( - mo = "B_ESCHR_COLI", - AMC = as.mic(sample(c("1", "2", "4", "8"), n_par, TRUE)), - GEN = as.mic(sample(c("1", "2", "4", "8"), n_par, TRUE)), - stringsAsFactors = FALSE - ) - sir_wide_seq <- suppressMessages(as.sir(df_wide, col_mo = "mo", info = FALSE)) - sir_wide_par <- suppressMessages(as.sir(df_wide, col_mo = "mo", info = FALSE, - parallel = TRUE, max_cores = 8L)) - expect_identical(sir_wide_seq[["AMC"]], sir_wide_par[["AMC"]]) - expect_identical(sir_wide_seq[["GEN"]], sir_wide_par[["GEN"]]) + # 7. single-column data frame falls back silently to sequential + df_single <- df_par[, c("mo", "AMC")] + future::plan(future::sequential) + sir_single_seq <- suppressMessages(as.sir(df_single, col_mo = "mo", info = FALSE)) + future::plan(future::multicore) + sir_single_par <- suppressMessages(as.sir(df_single, col_mo = "mo", info = FALSE, parallel = TRUE)) + expect_identical(sir_single_seq[["AMC"]], sir_single_par[["AMC"]]) - # 8. info = TRUE with parallel does not produce per-column worker messages - # (messages should only appear in the main process, not duplicated from workers) - msgs <- capture.output( - suppressWarnings(as.sir(df_par, col_mo = "mo", info = TRUE, parallel = TRUE)), - type = "message" - ) - # each AB column name should appear at most once in all messages combined - for (ab_nm in c("AMC", "GEN", "CIP", "PEN")) { - n_mentions <- sum(grepl(ab_nm, msgs, fixed = TRUE)) - expect_lte(n_mentions, 1L) + # 8. row-batch mode (n_cols < n_cores): force row splitting via used cores and + # verify identical output to sequential for a dataset with 2 AB columns so + # pieces_per_col = ceiling(used cores / 2) >= 2 and row batching activates + df_wide <- data.frame( + mo = "B_ESCHR_COLI", + AMC = as.mic(sample(c("1", "2", "4", "8"), n_par, TRUE)), + GEN = as.mic(sample(c("1", "2", "4", "8"), n_par, TRUE)), + stringsAsFactors = FALSE + ) + future::plan(future::sequential) + sir_wide_seq <- suppressMessages(as.sir(df_wide, col_mo = "mo", info = FALSE)) + future::plan(future::multicore) + sir_wide_par <- suppressMessages(as.sir(df_wide, + col_mo = "mo", info = FALSE, + parallel = TRUE + )) + expect_identical(sir_wide_seq[["AMC"]], sir_wide_par[["AMC"]]) + expect_identical(sir_wide_seq[["GEN"]], sir_wide_par[["GEN"]]) + + # 8. info = TRUE with parallel does not produce per-column worker messages + # (messages should only appear in the main process, not duplicated from workers) + msgs <- capture.output( + suppressWarnings(as.sir(df_par, col_mo = "mo", info = TRUE, parallel = TRUE)), + type = "message" + ) + # each AB column name should appear at most once in all messages combined + for (ab_nm in c("AMC", "GEN", "CIP", "PEN")) { + n_mentions <- sum(grepl(ab_nm, msgs, fixed = TRUE)) + expect_lte(n_mentions, 1L) + } + future::plan(future::sequential) } }) @@ -536,9 +555,9 @@ test_that("custom reference_data: non-EUCAST/CLSI guideline produces R", { # coerce_reference_data_columns() will coerce mo/ab to the right class. my_bp <- clinical_breakpoints[clinical_breakpoints$method == "MIC" & clinical_breakpoints$type == "human", ][1, ] - my_bp$guideline <- "MyLab 2025" - my_bp$mo <- "B_ACHRMB_XYLS" # plain character — coerced to - my_bp$ab <- "MEM" # plain character — coerced to + my_bp$guideline <- "MyLab 2025" + my_bp$mo <- "B_ACHRMB_XYLS" # plain character — coerced to + my_bp$ab <- "MEM" # plain character — coerced to my_bp$breakpoint_S <- 8 my_bp$breakpoint_R <- 32 @@ -556,26 +575,30 @@ test_that("custom reference_data: non-EUCAST/CLSI guideline produces R", { # guideline explicitly set: same result when it matches the data expect_equal(as.character(suppressMessages( - as.sir(as.mic(64), mo = "B_ACHRMB_XYLS", ab = "MEM", - guideline = "MyLab 2025", reference_data = my_bp) + as.sir(as.mic(64), + mo = "B_ACHRMB_XYLS", ab = "MEM", + guideline = "MyLab 2025", reference_data = my_bp + ) )), "R") }) test_that("custom reference_data: host = NA acts as host-agnostic fallback", { my_bp <- clinical_breakpoints[clinical_breakpoints$method == "MIC" & clinical_breakpoints$type == "human", ][1, ] - my_bp$guideline <- "MyLab 2025" - my_bp$mo <- "B_ACHRMB_XYLS" - my_bp$ab <- "MEM" - my_bp$type <- "animal" - my_bp$host <- NA # logical NA — coerced to character by coerce_reference_data_columns() + my_bp$guideline <- "MyLab 2025" + my_bp$mo <- "B_ACHRMB_XYLS" + my_bp$ab <- "MEM" + my_bp$type <- "animal" + my_bp$host <- NA # logical NA — coerced to character by coerce_reference_data_columns() my_bp$breakpoint_S <- 8 my_bp$breakpoint_R <- 32 # NA host should match when no species-specific row exists result <- suppressMessages( - as.sir(as.mic(64), mo = "B_ACHRMB_XYLS", ab = "MEM", - host = "dogs", breakpoint_type = "animal", reference_data = my_bp) + as.sir(as.mic(64), + mo = "B_ACHRMB_XYLS", ab = "MEM", + host = "dogs", breakpoint_type = "animal", reference_data = my_bp + ) ) expect_equal(as.character(result), "R") }) diff --git a/tests/testthat/test-zzz.R b/tests/testthat/test-zzz.R index 153fc97fe..c2429c02c 100644 --- a/tests/testthat/test-zzz.R +++ b/tests/testthat/test-zzz.R @@ -89,6 +89,11 @@ test_that("test-zzz.R", { "symbol" = "cli", # curl "has_internet" = "curl", + # future + "plan" = "future", + "nbrOfWorkers" = "future", + # future.apply + "future_lapply" = "future.apply", # ggplot2 "aes" = "ggplot2", "arrow" = "ggplot2", @@ -127,8 +132,6 @@ test_that("test-zzz.R", { "kable" = "knitr", "knit_print" = "knitr", "opts_chunk" = "knitr", - # parallelly - "availableCores" = "parallelly", # pillar "pillar_shaft" = "pillar", "style_na" = "pillar", diff --git a/tools/benchmark_parallel.png b/tools/benchmark_parallel.png new file mode 100644 index 000000000..a73c821d0 Binary files /dev/null and b/tools/benchmark_parallel.png differ