fix parallel

2026-07-17 16:31:06 +02:00 · 2026-04-30 00:41:17 +02:00
parent 20c9447096
commit 49e440d037
14 changed files with 155 additions and 174 deletions
--- a/1
+++ b/1
@@ -49,7 +49,6 @@ Suggests:
  ggplot2,
  knitr,
  openxlsx,
  parallelly,
  pillar,
  progress,
  readxl,
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,8 +1,12 @@
 # AMR 3.0.1.9053
 This will become release v3.1.0, intended for launch end of May.
 ### New
 * Support for clinical breakpoints of 2026 of both CLSI and EUCAST, by adding all of their over 5,700 new clinical breakpoints to the `clinical_breakpoints` data set for usage in `as.sir()`. EUCAST 2026 is now the new default guideline for all MIC and disk diffusion interpretations.
-* Integration with the **tidymodels** framework to allow seamless use of SIR, MIC and disk data in modelling pipelines via `recipes`
+* Support for the [`future`](https://future.futureverse.org) package and its framework, as the previous implementation of parallel computing was slow
  - **Breaking change**: `as.sir()` with `parallel = TRUE` now requires a non-sequential `future::plan()` to be active before the call — e.g., `future::plan(future::multisession)` — and throws an informative error if none is set.
 * Integration with the *tidymodels* framework to allow seamless use of SIR, MIC and disk data in modelling pipelines via `recipes`
  - `step_mic_log2()` to transform `<mic>` columns with log2, and `step_sir_numeric()` to convert `<sir>` columns to numeric
  - New `tidyselect` helpers:
    - `all_sir()`, `all_sir_predictors()`
@@ -21,7 +25,6 @@
 * Two new `NA` objects, `NA_ab_` and `NA_mo_`, analogous to base R's `NA_character_` and `NA_integer_`, for use in pipelines that require typed missing values
 ### Fixes
 * Fixed multiple bugs in the `parallel = TRUE` mode of `as.sir()` for data frames
 * Fixed a bug in `as.sir()` where values that were purely numeric (e.g., `"1"`) and matched the broad SIR-matching regex would be incorrectly stripped of all content by the Unicode letter filter
 * Fixed a bug in `as.mic()` where MIC values in scientific notation (e.g., `"1e-3"`) were incorrectly handled because the letter `e` was removed along with other Unicode letters; scientific notation `e` is now preserved
 * Fixed a bug in `as.ab()` where certain AB codes containing "PH" or "TH" (such as `ETH`, `MTH`, `PHE`, `PHN`, `STH`, `THA`, `THI1`) would incorrectly return `NA` when combined in a vector with any untranslatable value (#245)
@@ -39,8 +42,7 @@
 * Fixed `as.sir()` for data frames incorrectly treating metadata columns (e.g. `patient`, `ward`) as antibiotic columns when their names coincidentally matched an antibiotic code; column content is now validated against AMR data patterns before inclusion
 * Improved parallel computing in `as.sir()`: when the number of AB columns is smaller than the number of available cores, rows are now split into batches so all cores stay active (row-batch mode). Previously, a 6-column dataset on a 16-core machine would only use 6 cores; now all 16 are used, with each worker processing a smaller row slice (lower per-worker memory pressure)
 * Fixed false-positive `"as_wt_nwt is no longer used"` warnings that appeared during parallel `as.sir()` runs; `as_wt_nwt` is now excluded from the unused-argument check in `as_sir_method()`
-* **Breaking change**: `as.sir()` with `parallel = TRUE` now requires a non-sequential `future::plan()` to be active before the call — e.g., `future::plan(future::multisession)` — and throws an informative error if none is set; previously `as.sir()` would silently set up and tear down a `multisession` plan itself, which was slow and caused version-mismatch issues with `load_all()` workflows
+* Fixed `as.sir()` ignoring `info = FALSE` for columns with no breakpoints (e.g. cefoxitin against *E. coli*)
 * Fixed `as.sir()` ignoring `info = FALSE` for columns with no breakpoints (e.g. cefoxitin against *E. coli*): an operator-precedence bug (`&&`/`||`) caused the "Interpreting MIC values" intro message to fire unconditionally when `nrow(breakpoints) == 0`, regardless of `info`; the progress bar title was also not gated by `info`
 ### Updates
 * `as.sir()` with `parallel = TRUE` now uses `future.apply::future_lapply()` instead of `parallel::mclapply()`/`parallel::parLapply()`, enabling transparent support for any `future` backend (including `mirai_multisession`) on all platforms; `future` and `future.apply` are now listed under `Suggests`
@@ -59,7 +61,6 @@
  * This results in more reliable behaviour compared to previous versions for capped MIC values
  * Removed the `"inverse"` option, which has now become redundant
 * `ab_group()` now returns values consist with the AMR selectors (#246)
 * Added two new `NA` objects, `NA_ab_` and `NA_mo_`, analogous to base R's `NA_character_` and `NA_integer_`, for use in pipelines that require typed missing values
 # AMR 3.0.1
--- a/R/aa_helper_functions.R
+++ b/R/aa_helper_functions.R
@@ -1681,28 +1681,6 @@ readRDS_AMR <- function(file, refhook = NULL) {
  readRDS(con, refhook = refhook)
 }
 get_n_cores <- function(max_cores = Inf) {
  if (pkg_is_available("parallelly", min_version = "0.8.0", also_load = FALSE)) {
    available_cores <- import_fn("availableCores", "parallelly")
    n_cores <- min(available_cores(), na.rm = TRUE)
  } else {
    # `parallel` is part of base R since 2.14.0, but detectCores() is not very precise on exotic systems like Docker and quota-set Linux environments
    n_cores <- parallel::detectCores()[1]
    if (is.na(n_cores)) {
      n_cores <- 1
    }
  }
  max_cores <- floor(max_cores)
  if (max_cores == 0) {
    n_cores <- 1
  } else if (max_cores < 0) {
    n_cores <- max(1, n_cores - abs(max_cores))
  } else if (max_cores > 0) {
    n_cores <- min(n_cores, max_cores)
  }
  n_cores
 }
 # Support `where()` if tidyselect not installed ----
 if (!is.null(import_fn("where", "tidyselect", error_on_fail = FALSE))) {
  # tidyselect::where() exists, retrieve from their namespace to make `where()`s work across the package in default arguments
--- a/R/sir.R
+++ b/R/sir.R
@@ -718,7 +718,6 @@ as.sir.disk <- function(x,
 #' @rdname as.sir
 #' @param parallel A [logical] to indicate if parallel computing must be used, defaults to `FALSE`. Requires the [`future.apply`][future.apply::future_lapply()] package. **A non-sequential [future::plan()] must already be active before setting `parallel = TRUE`** — for example, `future::plan(future::multisession)`. An error is thrown if `parallel = TRUE` is used without a plan set by the user. Parallelism distributes columns (and optionally row batches) across workers; it is most beneficial when there are many antibiotic columns and a large number of rows.
 #' @param max_cores Maximum number of workers to use when `parallel = TRUE`. Use a negative value to subtract that number from the available workers, e.g. a value of `-2` means at most `nbrOfWorkers() - 2` workers will be used. Defaults to `-1` (all but one worker). There will never be more workers used than there are antibiotic columns to analyse.
 #' @export
 as.sir.data.frame <- function(x,
                              ...,
@@ -738,7 +737,6 @@ as.sir.data.frame <- function(x,
                              verbose = FALSE,
                              info = interactive(),
                              parallel = FALSE,
                              max_cores = -1,
                              conserve_capped_values = NULL) {
  meet_criteria(x, allow_class = "data.frame") # will also check for dimensions > 0
  meet_criteria(col_mo, allow_class = "character", is_in = colnames(x), allow_NULL = TRUE)
@@ -757,7 +755,6 @@ as.sir.data.frame <- function(x,
  meet_criteria(verbose, allow_class = "logical", has_length = 1)
  meet_criteria(info, allow_class = "logical", has_length = 1)
  meet_criteria(parallel, allow_class = "logical", has_length = 1)
  meet_criteria(max_cores, allow_class = c("numeric", "integer"), has_length = 1)
  x.bak <- x
  if (isTRUE(info) && message_not_thrown_before("as.sir", "sir_interpretation_history")) {
@@ -912,41 +909,36 @@ as.sir.data.frame <- function(x,
  }
  # set up parallel computing
  if (requireNamespace("future.apply", quietly = TRUE) && !inherits(future::plan(), "sequential")) {
    if (isFALSE(parallel)) {
      message_("Assuming {.code parallel = TRUE} since parallel computing has been set up using the {.pkg future} package before. Set {.help [{.fun plan}](future::plan)} to sequential to prevent this.")
    }
    parallel <- TRUE
  }
  if (isTRUE(parallel)) {
-    if (!requireNamespace("future.apply", quietly = TRUE)) {
+    stop_ifnot(
-      stop_(
+      requireNamespace("future.apply", quietly = TRUE),
-        "Setting {.arg parallel} to {.code TRUE} requires the {.pkg future.apply} package.\n",
+      "Setting {.code parallel = TRUE} requires the {.pkg future.apply} package.\n",
-        "Install it with: ", highlight_code('install.packages("future.apply")'), "."
+      "Install it with {.code install.packages(\"future.apply\")}."
-      )
+    )
-    }
+    stop_if(inherits(future::plan(), "sequential"),
-    if (inherits(future::plan(), "sequential")) {
+      "Setting {.code parallel = TRUE} requires a non-sequential {.help [{.fun future::plan}](future::plan)} to be active.\n",
-      stop_(
+      "For your system, you could first run: {.code library(future); ",
-        "Setting {.arg parallel} to {.code TRUE} requires a non-sequential {.help [future::plan](future::plan)} to be active.\n",
+      ifelse(.Platform$OS.type == "windows" || in_rstudio(),
-        "Set a parallel plan before calling {.help [{.fun as.sir}](AMR::as.sir)}, for example:\n",
+        "plan(multisession)",
-        highlight_code("future::plan(future::multisession)"), "\n",
+        "plan(multicore)"
-        "Or on Linux/macOS for fork-based workers:\n",
+      ),
-        highlight_code("future::plan(future::multicore)"), "\n",
+      "}",
-        "See {.help [future::plan](future::plan)} for all available strategies.",
+      call = FALSE
-        call = FALSE
+    )
-      )
+
    }
    n_workers <- future::nbrOfWorkers()
-    n_cores <- if (max_cores < 0L) {
+    n_cores <- min(n_workers, length(ab_cols))
      max(1L, n_workers + max_cores)
    } else {
      min(max_cores, n_workers)
    }
    n_cores <- min(n_cores, length(ab_cols))
  } else {
    n_workers <- 1L
    n_cores <- 1L
  }
  if (isTRUE(info)) {
    message_(as_note = FALSE) # empty line
    message_("Processing columns:", as_note = FALSE)
  }
  # In parallel mode suppress per-column messages: workers print simultaneously and
  # their output would be interleaved on the console.
  is_parallel_run <- isTRUE(parallel) && n_cores > 1 && length(ab_cols) > 1
@@ -957,10 +949,10 @@ as.sir.data.frame <- function(x,
  # gets work.  pieces_per_col = ceil(n_workers / n_cols) gives ~n_workers jobs
  # total; each job processes one column on one row slice, which also reduces
  # per-worker memory pressure (smaller breakpoints search space).
-  pieces_per_col <- if (is_parallel_run && length(ab_cols) < n_workers) {
+  if (is_parallel_run && length(ab_cols) < n_workers) {
-    ceiling(n_workers / length(ab_cols))
+    pieces_per_col <- ceiling(n_workers / length(ab_cols))
  } else {
-    1L
+    pieces_per_col <- 1L
  }
  run_as_sir_column <- function(i, rows = NULL) {
@@ -1049,7 +1041,7 @@ as.sir.data.frame <- function(x,
      ab <- ab_col
      ab_coerced <- suppressWarnings(as.ab(ab, info = FALSE))
      show_message <- FALSE
-      if (!all(x[row_idx, ab, drop = TRUE] %in% c("S", "SDD", "I", "R", "NI", NA), na.rm = TRUE)) {
+      if (!all(x[row_idx, ab, drop = TRUE] %in% c(VALID_SIR_LEVELS, NA), na.rm = TRUE)) {
        show_message <- TRUE
        if (isTRUE(effective_info)) {
          message_("\u00a0\u00a0", .amr_env$bullet_icon, " Cleaning values in column ", paste0("{.field ", font_bold(ab), "}"), " (",
@@ -1086,9 +1078,9 @@ as.sir.data.frame <- function(x,
    if (isTRUE(info)) {
      message_(as_note = FALSE)
      if (pieces_per_col > 1L) {
-        message_("Running in parallel mode using ", n_cores, " out of ", get_n_cores(Inf), " workers, on columns ", vector_and(paste0("{.field ", font_bold(ab_cols, collapse = NULL), "}"), quotes = FALSE, sort = FALSE), " (", pieces_per_col, " row slices per column)...", as_note = FALSE, appendLF = FALSE)
+        message_("Running in parallel mode using ", n_cores, " workers, on columns ", vector_and(paste0("{.field ", font_bold(ab_cols, collapse = NULL), "}"), quotes = FALSE, sort = FALSE), " (", pieces_per_col, " row slices per column)...", as_note = FALSE, appendLF = FALSE)
      } else {
-        message_("Running in parallel mode using ", n_cores, " out of ", get_n_cores(Inf), " workers, on columns ", vector_and(paste0("{.field ", font_bold(ab_cols, collapse = NULL), "}"), quotes = FALSE, sort = FALSE), "...", as_note = FALSE, appendLF = FALSE)
+        message_("Running in parallel mode using ", n_cores, " workers, on columns ", vector_and(paste0("{.field ", font_bold(ab_cols, collapse = NULL), "}"), quotes = FALSE, sort = FALSE), "...", as_note = FALSE, appendLF = FALSE)
      }
    }
    if (pieces_per_col > 1L) {
@@ -1108,7 +1100,7 @@ as.sir.data.frame <- function(x,
        pieces <- flat[vapply(jobs, function(j) j$col == ci, logical(1L))]
        list(
          result = as.sir(do.call(c, lapply(pieces, function(p) as.character(p$result)))),
-          log    = {
+          log = {
            logs <- Filter(Negate(is.null), lapply(pieces, function(p) p$log))
            if (length(logs) > 0L) do.call(rbind_AMR, logs) else NULL
          }
@@ -1125,12 +1117,16 @@ as.sir.data.frame <- function(x,
    }
  } else {
    # sequential mode (non-parallel)
-    if (isTRUE(info) && get_n_cores(Inf) > 1 && NROW(x) * NCOL(x) > 10000) {
+    if (isTRUE(info) && n_cores > 1 && NROW(x) * NCOL(x) > 10000) {
      suggest <- ifelse(.Platform$OS.type == "windows" || in_rstudio(),
        "plan(multisession)",
        "plan(multicore)"
      )
      message_(as_note = FALSE)
      if (requireNamespace("future.apply", quietly = TRUE)) {
-        message_("Running in sequential mode. To speed up processing, set a parallel plan first (e.g., ", highlight_code("future::plan(future::multisession)"), ") and then set {.arg parallel} to {.code TRUE}.\n")
+        message_("Running in sequential mode. To speed up processing, set a parallel {.help [{.fun future::plan}](future::plan)} such as {.code ", suggest, "}.")
      } else {
-        message_("Running in sequential mode. To speed up processing, install the {.pkg future.apply} package, set a parallel plan first (e.g., ", highlight_code("future::plan(future::multisession)"), ") and then set {.arg parallel} to {.code TRUE}.\n")
+        message_("Running in sequential mode. To speed up processing, install the {.pkg future.apply} package and then set {.code parallel = TRUE}.\n")
      }
    }
    # this will contain a progress bar already
@@ -2102,7 +2098,7 @@ sir_interpretation_history <- function(clean = FALSE) {
 #' @noRd
 print.sir_log <- function(x, ...) {
  if (NROW(x) == 0) {
-    message_("No results to print. First run {.help [{.fun as.sir}](AMR::as.sir)} on MIC values or disk diffusion zones (or on a {.cls data.frame} containing any of these) to print a {.val logbook} data set here.")
+    message_("No results to print. First run {.help [{.fun as.sir}](AMR::as.sir)} on MIC values or disk diffusion zones (or on a {.cls data.frame} containing any of these) to print a 'logbook' data set here.")
    return(invisible(NULL))
  }
  class(x) <- class(x)[class(x) != "sir_log"]
@@ -2344,7 +2340,7 @@ coerce_reference_data_columns <- function(x) {
  ref <- AMR::clinical_breakpoints
  for (col in names(ref)) {
    col_ref <- ref[[col]]
-    col_x   <- x[[col]]
+    col_x <- x[[col]]
    if (identical(class(col_ref), class(col_x))) next
    if (col == "mo") {
      x[[col]] <- suppressMessages(as.mo(col_x))
--- a/index.md
+++ b/index.md
@@ -26,12 +26,9 @@
 <div style="display: flex; font-size: 0.8em;">
 <p style="text-align:left; width: 50%;">
 <small><a href="https://amr-for-r.org/">amr-for-r.org</a></small>
 </p>
 <p style="text-align:right; width: 50%;">
 <small><a href="https://doi.org/10.18637/jss.v104.i03" target="_blank">doi.org/10.18637/jss.v104.i03</a></small>
 </p>
@@ -174,24 +171,26 @@ example_isolates %>%
 #> ℹ Using column mo as input for `mo_fullname()`
 #> ℹ Using column mo as input for `mo_is_gram_negative()`
 #> ℹ Using column mo as input for `mo_is_intrinsic_resistant()`
-#> ℹ Determining intrinsic resistance based on 'EUCAST Expected Resistant
+#> ℹ Determining intrinsic resistance based on 'EUCAST Expected
-#>   Phenotypes' v1.2 (2023). This note will be shown once per session.
+#>   Resistant Phenotypes' v1.2 (2023). This note will be shown
-#> ℹ For `aminoglycosides()` using columns GEN (gentamicin), TOB (tobramycin), AMK
+#>   once per session.
-#>   (amikacin), and KAN (kanamycin)
+#> ℹ For `aminoglycosides()` using columns GEN (gentamicin), TOB
-#> ℹ For `carbapenems()` using columns IPM (imipenem) and MEM (meropenem)
+#>   (tobramycin), AMK (amikacin), and KAN (kanamycin)
 #> ℹ For `carbapenems()` using columns IPM (imipenem) and MEM
 #>   (meropenem)
 #> # A tibble: 35 × 7
-#>    bacteria                     GEN   TOB   AMK   KAN   IPM   MEM  
+#>    bacteria         GEN   TOB   AMK   KAN   IPM   MEM  
-#>    <chr>                        <sir> <sir> <sir> <sir> <sir> <sir>
+#>    <chr>            <sir> <sir> <sir> <sir> <sir> <sir>
-#>  1 Pseudomonas aeruginosa       I     S     NA    R     S     NA   
+#>  1 Pseudomonas aer… I     S     NA    R     S     NA   
-#>  2 Pseudomonas aeruginosa       I     S     NA    R     S     NA   
+#>  2 Pseudomonas aer… I     S     NA    R     S     NA   
-#>  3 Pseudomonas aeruginosa       I     S     NA    R     S     NA   
+#>  3 Pseudomonas aer… I     S     NA    R     S     NA   
-#>  4 Pseudomonas aeruginosa       S     S     S     R     NA    S    
+#>  4 Pseudomonas aer… S     S     S     R     NA    S    
-#>  5 Pseudomonas aeruginosa       S     S     S     R     S     S    
+#>  5 Pseudomonas aer… S     S     S     R     S     S    
-#>  6 Pseudomonas aeruginosa       S     S     S     R     S     S    
+#>  6 Pseudomonas aer… S     S     S     R     S     S    
-#>  7 Stenotrophomonas maltophilia R     R     R     R     R     R    
+#>  7 Stenotrophomona… R     R     R     R     R     R    
-#>  8 Pseudomonas aeruginosa       S     S     S     R     NA    S    
+#>  8 Pseudomonas aer… S     S     S     R     NA    S    
-#>  9 Pseudomonas aeruginosa       S     S     S     R     NA    S    
+#>  9 Pseudomonas aer… S     S     S     R     NA    S    
-#> 10 Pseudomonas aeruginosa       S     S     S     R     S     S    
+#> 10 Pseudomonas aer… S     S     S     R     S     S    
 #> # ℹ 25 more rows
 ```
@@ -215,23 +214,24 @@ output format automatically (such as markdown, LaTeX, HTML, etc.).
 ``` r
 antibiogram(example_isolates,
            antimicrobials = c(aminoglycosides(), carbapenems()))
-#> ℹ For `aminoglycosides()` using columns GEN (gentamicin), TOB (tobramycin), AMK
+#> ℹ For `aminoglycosides()` using columns GEN (gentamicin), TOB
-#>   (amikacin), and KAN (kanamycin)
+#>   (tobramycin), AMK (amikacin), and KAN (kanamycin)
-#> ℹ For `carbapenems()` using columns IPM (imipenem) and MEM (meropenem)
+#> ℹ For `carbapenems()` using columns IPM (imipenem) and MEM
 #>   (meropenem)
 ```
-| Pathogen | Amikacin | Gentamicin | Imipenem | Kanamycin | Meropenem | Tobramycin |
+| Pathogen         | Amikacin             | Gentamicin          | Imipenem             | Kanamycin       | Meropenem            | Tobramycin          |
-|:---|:---|:---|:---|:---|:---|:---|
+|:-----------------|:---------------------|:--------------------|:---------------------|:----------------|:---------------------|:--------------------|
-| CoNS | 0% (0-8%,N=43) | 86% (82-90%,N=309) | 52% (37-67%,N=48) | 0% (0-8%,N=43) | 52% (37-67%,N=48) | 22% (12-35%,N=55) |
+| CoNS             | 0% (0-8%,N=43)       | 86% (82-90%,N=309)  | 52% (37-67%,N=48)    | 0% (0-8%,N=43)  | 52% (37-67%,N=48)    | 22% (12-35%,N=55)   |
-| *E. coli* | 100% (98-100%,N=171) | 98% (96-99%,N=460) | 100% (99-100%,N=422) | NA | 100% (99-100%,N=418) | 97% (96-99%,N=462) |
+| *E. coli*        | 100% (98-100%,N=171) | 98% (96-99%,N=460)  | 100% (99-100%,N=422) | NA              | 100% (99-100%,N=418) | 97% (96-99%,N=462)  |
-| *E. faecalis* | 0% (0-9%,N=39) | 0% (0-9%,N=39) | 100% (91-100%,N=38) | 0% (0-9%,N=39) | NA | 0% (0-9%,N=39) |
+| *E. faecalis*    | 0% (0-9%,N=39)       | 0% (0-9%,N=39)      | 100% (91-100%,N=38)  | 0% (0-9%,N=39)  | NA                   | 0% (0-9%,N=39)      |
-| *K. pneumoniae* | NA | 90% (79-96%,N=58) | 100% (93-100%,N=51) | NA | 100% (93-100%,N=53) | 90% (79-96%,N=58) |
+| *K. pneumoniae*  | NA                   | 90% (79-96%,N=58)   | 100% (93-100%,N=51)  | NA              | 100% (93-100%,N=53)  | 90% (79-96%,N=58)   |
-| *P. aeruginosa* | NA | 100% (88-100%,N=30) | NA | 0% (0-12%,N=30) | NA | 100% (88-100%,N=30) |
+| *P. aeruginosa*  | NA                   | 100% (88-100%,N=30) | NA                   | 0% (0-12%,N=30) | NA                   | 100% (88-100%,N=30) |
-| *P. mirabilis* | NA | 94% (80-99%,N=34) | 94% (79-99%,N=32) | NA | NA | 94% (80-99%,N=34) |
+| *P. mirabilis*   | NA                   | 94% (80-99%,N=34)   | 94% (79-99%,N=32)    | NA              | NA                   | 94% (80-99%,N=34)   |
-| *S. aureus* | NA | 99% (97-100%,N=233) | NA | NA | NA | 98% (92-100%,N=86) |
+| *S. aureus*      | NA                   | 99% (97-100%,N=233) | NA                   | NA              | NA                   | 98% (92-100%,N=86)  |
-| *S. epidermidis* | 0% (0-8%,N=44) | 79% (71-85%,N=163) | NA | 0% (0-8%,N=44) | NA | 51% (40-61%,N=89) |
+| *S. epidermidis* | 0% (0-8%,N=44)       | 79% (71-85%,N=163)  | NA                   | 0% (0-8%,N=44)  | NA                   | 51% (40-61%,N=89)   |
-| *S. hominis* | NA | 92% (84-97%,N=80) | NA | NA | NA | 85% (74-93%,N=62) |
+| *S. hominis*     | NA                   | 92% (84-97%,N=80)   | NA                   | NA              | NA                   | 85% (74-93%,N=62)   |
-| *S. pneumoniae* | 0% (0-3%,N=117) | 0% (0-3%,N=117) | NA | 0% (0-3%,N=117) | NA | 0% (0-3%,N=117) |
+| *S. pneumoniae*  | 0% (0-3%,N=117)      | 0% (0-3%,N=117)     | NA                   | 0% (0-3%,N=117) | NA                   | 0% (0-3%,N=117)     |
 In combination antibiograms, it is clear that combined antimicrobials
 yield higher empiric coverage:
@@ -242,10 +242,10 @@ antibiogram(example_isolates,
            mo_transform = "gramstain")
 ```
-| Pathogen | Piperacillin/tazobactam | Piperacillin/tazobactam + Gentamicin | Piperacillin/tazobactam + Tobramycin |
+| Pathogen      | Piperacillin/tazobactam | Piperacillin/tazobactam + Gentamicin | Piperacillin/tazobactam + Tobramycin |
-|:---|:---|:---|:---|
+|:--------------|:------------------------|:-------------------------------------|:-------------------------------------|
-| Gram-negative | 88% (85-91%,N=641) | 99% (97-99%,N=691) | 98% (97-99%,N=693) |
+| Gram-negative | 88% (85-91%,N=641)      | 99% (97-99%,N=691)                   | 98% (97-99%,N=693)                   |
-| Gram-positive | 86% (82-89%,N=345) | 98% (96-98%,N=1044) | 95% (93-97%,N=550) |
+| Gram-positive | 86% (82-89%,N=345)      | 98% (96-98%,N=1044)                  | 95% (93-97%,N=550)                   |
 Like many other functions in this package, `antibiogram()` comes with
 support for 28 languages that are often detected automatically based on
@@ -318,16 +318,18 @@ example_isolates %>%
  summarise(across(c(GEN, TOB),
                   list(total_R = resistance,
                        conf_int = function(x) sir_confidence_interval(x, collapse = "-"))))
-#> ℹ `resistance()` assumes the EUCAST guideline and thus considers the 'I'
+#> ℹ `resistance()` assumes the EUCAST guideline and thus
-#>   category susceptible. Set the `guideline` argument or the `AMR_guideline`
+#>   considers the 'I' category susceptible. Set the `guideline`
-#>   option to either "CLSI" or "EUCAST", see `?AMR-options`.
+#>   argument or the `AMR_guideline` option to either "CLSI" or
 #>   "EUCAST", see `?AMR-options`.
 #> ℹ This message will be shown once per session.
 #> # A tibble: 3 × 5
-#>   ward       GEN_total_R GEN_conf_int TOB_total_R TOB_conf_int
+#>   ward       GEN_total_R GEN_conf_int TOB_total_R
-#>   <chr>            <dbl> <chr>              <dbl> <chr>       
+#>   <chr>            <dbl> <chr>              <dbl>
-#> 1 Clinical         0.229 0.205-0.254        0.315 0.284-0.347 
+#> 1 Clinical         0.229 0.205-0.254        0.315
-#> 2 ICU              0.290 0.253-0.33         0.400 0.353-0.449 
+#> 2 ICU              0.290 0.253-0.33         0.400
-#> 3 Outpatient       0.2   0.131-0.285        0.368 0.254-0.493
+#> 3 Outpatient       0.2   0.131-0.285        0.368
 #> # ℹ 1 more variable: TOB_conf_int <chr>
 ```
 Or use [antimicrobial
@@ -344,15 +346,16 @@ out <- example_isolates %>%
  # calculate AMR using resistance(), over all aminoglycosides and polymyxins:
  summarise(across(c(aminoglycosides(), polymyxins()),
            resistance))
-#> ℹ For `aminoglycosides()` using columns GEN (gentamicin), TOB (tobramycin), AMK
+#> ℹ For `aminoglycosides()` using columns GEN (gentamicin), TOB
-#>   (amikacin), and KAN (kanamycin)
+#>   (tobramycin), AMK (amikacin), and KAN (kanamycin)
 #> ℹ For `polymyxins()` using column COL (colistin)
 #> Warning: There was 1 warning in `summarise()`.
-#> ℹ In argument: `across(c(aminoglycosides(), polymyxins()), resistance)`.
+#> ℹ In argument: `across(c(aminoglycosides(), polymyxins()),
 #>   resistance)`.
 #> ℹ In group 3: `ward = "Outpatient"`.
 #> Caused by warning:
-#> ! Introducing NA: only 23 results available for KAN in group: ward = "Outpatient"
+#> ! Introducing NA: only 23 results available for KAN in group:
-#> (whilst `minimum = 30`).
+#> ward = "Outpatient" (whilst `minimum = 30`).
 out
 #> # A tibble: 3 × 6
 #>   ward         GEN   TOB   AMK   KAN   COL
@@ -366,11 +369,12 @@ out
 # transform the antibiotic columns to names:
 out %>% set_ab_names()
 #> # A tibble: 3 × 6
-#>   ward       gentamicin tobramycin amikacin kanamycin colistin
+#>   ward       gentamicin tobramycin amikacin kanamycin
-#>   <chr>           <dbl>      <dbl>    <dbl>     <dbl>    <dbl>
+#>   <chr>           <dbl>      <dbl>    <dbl>     <dbl>
-#> 1 Clinical        0.229      0.315    0.626         1    0.780
+#> 1 Clinical        0.229      0.315    0.626         1
-#> 2 ICU             0.290      0.400    0.662         1    0.857
+#> 2 ICU             0.290      0.400    0.662         1
-#> 3 Outpatient      0.2        0.368    0.605        NA    0.889
+#> 3 Outpatient      0.2        0.368    0.605        NA
 #> # ℹ 1 more variable: colistin <dbl>
 ```
 ``` r
--- a/man/amr-tidymodels.Rd
+++ b/man/amr-tidymodels.Rd
@@ -31,24 +31,22 @@ step_sir_numeric(recipe, ..., role = NA, trained = FALSE, columns = NULL,
  skip = FALSE, id = recipes::rand_id("sir_numeric"))
 }
 \arguments{
-\item{recipe}{A recipe object. The step will be added to the
+\item{recipe}{A recipe object. The step will be added to the sequence of
-sequence of operations for this recipe.}
+operations for this recipe.}
-\item{...}{One or more selector functions to choose variables
+\item{...}{One or more selector functions to choose variables for this step.
-for this step. See \code{\link[recipes:selections]{selections()}} for more details.}
+See \code{\link[recipes:selections]{selections()}} for more details.}
-\item{role}{Not used by this step since no new variables are
+\item{role}{Not used by this step since no new variables are created.}
 created.}
-\item{trained}{A logical to indicate if the quantities for
+\item{trained}{A logical to indicate if the quantities for preprocessing have
-preprocessing have been estimated.}
+been estimated.}
-\item{skip}{A logical. Should the step be skipped when the
+\item{skip}{A logical. Should the step be skipped when the recipe is baked by
-recipe is baked by \code{\link[recipes:bake]{bake()}}? While all operations are baked
+\code{\link[recipes:bake]{bake()}}? While all operations are baked when \code{\link[recipes:prep]{prep()}} is run, some
-when \code{\link[recipes:prep]{prep()}} is run, some operations may not be able to be
+operations may not be able to be conducted on new data (e.g. processing the
-conducted on new data (e.g. processing the outcome variable(s)).
+outcome variable(s)). Care should be taken when using \code{skip = TRUE} as it
-Care should be taken when using \code{skip = TRUE} as it may affect
+may affect the computations for subsequent operations.}
 the computations for subsequent operations.}
 \item{id}{A character string that is unique to this step to identify it.}
 }
--- a/man/antibiogram.Rd
+++ b/man/antibiogram.Rd
@@ -72,7 +72,7 @@ retrieve_wisca_parameters(wisca_model, ...)
 \item{ab_transform}{A character to transform antimicrobial input - must be one of the column names of the \link{antimicrobials} data set (defaults to \code{"name"}): \code{"ab"}, \code{"cid"}, \code{"name"}, \code{"group"}, \code{"atc"}, \code{"atc_group1"}, \code{"atc_group2"}, \code{"abbreviations"}, \code{"synonyms"}, \code{"oral_ddd"}, \code{"oral_units"}, \code{"iv_ddd"}, \code{"iv_units"}, or \code{"loinc"}. Can also be \code{NULL} to not transform the input.}
-\item{syndromic_group}{A column name of \code{x}, or values calculated to split rows of \code{x}, e.g. by using \code{\link[=ifelse]{ifelse()}} or \code{\link[dplyr:case_when]{case_when()}}. See \emph{Examples}.}
+\item{syndromic_group}{A column name of \code{x}, or values calculated to split rows of \code{x}, e.g. by using \code{\link[=ifelse]{ifelse()}} or \code{\link[dplyr:case-and-replace-when]{case_when()}}. See \emph{Examples}.}
 \item{add_total_n}{\emph{(deprecated in favour of \code{formatting_type})} A \link{logical} to indicate whether \code{n_tested} available numbers per pathogen should be added to the table (default is \code{TRUE}). This will add the lowest and highest number of available isolates per antimicrobial (e.g, if for \emph{E. coli} 200 isolates are available for ciprofloxacin and 150 for amoxicillin, the returned number will be "150-200"). This option is unavailable when \code{wisca = TRUE}; in that case, use \code{\link[=retrieve_wisca_parameters]{retrieve_wisca_parameters()}} to get the parameters used for WISCA.}
--- a/man/as.sir.Rd
+++ b/man/as.sir.Rd
@@ -73,7 +73,7 @@ is_sir_eligible(x, threshold = 0.05)
  include_PKPD = getOption("AMR_include_PKPD", TRUE),
  breakpoint_type = getOption("AMR_breakpoint_type", "human"), host = NULL,
  language = get_AMR_locale(), verbose = FALSE, info = interactive(),
-  parallel = FALSE, max_cores = -1, conserve_capped_values = NULL)
+  parallel = FALSE, conserve_capped_values = NULL)
 sir_interpretation_history(clean = FALSE)
 }
@@ -152,8 +152,6 @@ The default \code{"conservative"} setting ensures cautious handling of uncertain
 \item{parallel}{A \link{logical} to indicate if parallel computing must be used, defaults to \code{FALSE}. Requires the \code{\link[future.apply:future_lapply]{future.apply}} package. \strong{A non-sequential \code{\link[future:plan]{future::plan()}} must already be active before setting \code{parallel = TRUE}} — for example, \code{future::plan(future::multisession)}. An error is thrown if \code{parallel = TRUE} is used without a plan set by the user. Parallelism distributes columns (and optionally row batches) across workers; it is most beneficial when there are many antibiotic columns and a large number of rows.}
 \item{max_cores}{Maximum number of workers to use when \code{parallel = TRUE}. Use a negative value to subtract that number from the available workers, e.g. a value of \code{-2} means at most \code{nbrOfWorkers() - 2} workers will be used. Defaults to \code{-1} (all but one worker). There will never be more workers used than there are antibiotic columns to analyse.}
 \item{clean}{A \link{logical} to indicate whether previously stored results should be forgotten after returning the 'logbook' with results.}
 }
 \value{
--- a/man/custom_eucast_rules.Rd
+++ b/man/custom_eucast_rules.Rd
@@ -19,7 +19,7 @@ Define custom EUCAST rules for your organisation or specific analysis and use th
 Some organisations have their own adoption of EUCAST rules. This function can be used to define custom EUCAST rules to be used in the \code{\link[=eucast_rules]{eucast_rules()}} function.
 \subsection{Basics}{
-If you are familiar with the \code{\link[dplyr:case_when]{case_when()}} function of the \code{dplyr} package, you will recognise the input method to set your own rules. Rules must be set using what \R considers to be the 'formula notation'. The rule itself is written \emph{before} the tilde (\code{~}) and the consequence of the rule is written \emph{after} the tilde:
+If you are familiar with the \code{\link[dplyr:case-and-replace-when]{case_when()}} function of the \code{dplyr} package, you will recognise the input method to set your own rules. Rules must be set using what \R considers to be the 'formula notation'. The rule itself is written \emph{before} the tilde (\code{~}) and the consequence of the rule is written \emph{after} the tilde:
 \if{html}{\out{<div class="sourceCode r">}}\preformatted{x <- custom_eucast_rules(TZP == "S" ~ aminopenicillins == "S",
                         TZP == "R" ~ aminopenicillins == "R")
--- a/man/custom_mdro_guideline.Rd
+++ b/man/custom_mdro_guideline.Rd
@@ -26,7 +26,7 @@ Define custom a MDRO guideline for your organisation or specific analysis and us
 Using a custom MDRO guideline is of importance if you have custom rules to determine MDROs in your hospital, e.g., rules that are dependent on ward, state of contact isolation or other variables in your data.
 \subsection{Basics}{
-If you are familiar with the \code{\link[dplyr:case_when]{case_when()}} function of the \code{dplyr} package, you will recognise the input method to set your own rules. Rules must be set using what \R considers to be the 'formula notation'. The rule itself is written \emph{before} the tilde (\code{~}) and the consequence of the rule is written \emph{after} the tilde:
+If you are familiar with the \code{\link[dplyr:case-and-replace-when]{case_when()}} function of the \code{dplyr} package, you will recognise the input method to set your own rules. Rules must be set using what \R considers to be the 'formula notation'. The rule itself is written \emph{before} the tilde (\code{~}) and the consequence of the rule is written \emph{after} the tilde:
 \if{html}{\out{<div class="sourceCode r">}}\preformatted{custom <- custom_mdro_guideline(CIP == "R" & age > 60 ~ "Elderly Type A",
                                ERY == "R" & age > 60 ~ "Elderly Type B")
--- a/man/g.test.Rd
+++ b/man/g.test.Rd
@@ -45,8 +45,9 @@ A list with class \code{"htest"} containing the following
  \item{residuals}{the Pearson residuals,
    \code{(observed - expected) / sqrt(expected)}.}
  \item{stdres}{standardized residuals,
-    \code{(observed - expected) / sqrt(V)}, where \code{V} is the residual cell variance (Agresti, 2007,
+    \code{(observed - expected) / sqrt(V)}, where \code{V} is the
-    section 2.4.5 for the case where \code{x} is a matrix, \code{n * p * (1 - p)} otherwise).}
+    residual cell variance (Agresti, 2007, section 2.4.5
    for the case where \code{x} is a matrix, \code{n * p * (1 - p)} otherwise).}
 }
 \description{
 \code{\link[=g.test]{g.test()}} performs chi-squared contingency table tests and goodness-of-fit tests, just like \code{\link[=chisq.test]{chisq.test()}} but is more reliable (1). A \emph{G}-test can be used to see whether the number of observations in each category fits a theoretical expectation (called a \strong{\emph{G}-test of goodness-of-fit}), or to see whether the proportions of one variable are different for different values of the other variable (called a \strong{\emph{G}-test of independence}).
--- a/man/pca.Rd
+++ b/man/pca.Rd
@@ -32,7 +32,7 @@ pca(x, ..., retx = TRUE, center = TRUE, scale. = TRUE, tol = NULL,
    standard deviations are less than or equal to \code{tol} times the
    standard deviation of the first component.)  With the default null
    setting, no components are omitted (unless \code{rank.} is specified
-    less than \code{min(dim(x))}.).  Other settings for tol could be
+    less than \code{min(dim(x))}.).  Other settings for \code{tol} could be
    \code{tol = 0} or \code{tol = sqrt(.Machine$double.eps)}, which
    would omit essentially constant components.}
--- a/tests/testthat/test-sir.R
+++ b/tests/testthat/test-sir.R
@@ -408,13 +408,13 @@ test_that("test-sir.R", {
  # Issue #278: re-running as.sir() on already-<sir> data must preserve columns
  df_already_sir <- data.frame(
-    mo  = "B_ESCHR_COLI",
+    mo = "B_ESCHR_COLI",
    AMC = as.mic(c("1", "2", "4")),
    GEN = sample(c("S", "I", "R"), 3, replace = TRUE),
    stringsAsFactors = FALSE
  )
-  first_pass  <- suppressMessages(as.sir(df_already_sir, col_mo = "mo", info = FALSE))
+  first_pass <- suppressMessages(as.sir(df_already_sir, col_mo = "mo", info = FALSE))
-  second_pass <- suppressMessages(as.sir(first_pass,    col_mo = "mo", info = FALSE))
+  second_pass <- suppressMessages(as.sir(first_pass, col_mo = "mo", info = FALSE))
  expect_equal(ncol(first_pass), ncol(second_pass))
  expect_true(is.sir(second_pass[["AMC"]]))
  expect_true(is.sir(second_pass[["GEN"]]))
@@ -424,15 +424,15 @@ test_that("test-sir.R", {
  # Issue #278: metadata columns whose names coincidentally match antibiotic
  # codes (e.g. 'patient' -> OXY, 'ward' -> PRU) must not be processed
  df_meta <- data.frame(
-    mo      = "B_ESCHR_COLI",
+    mo = "B_ESCHR_COLI",
    patient = paste0("Pt_", 1:20),
-    ward    = rep(c("ICU", "Surgery", "Outpatient", "ED"), 5),
+    ward = rep(c("ICU", "Surgery", "Outpatient", "ED"), 5),
-    AMC     = as.mic(rep(c("1", "2", "4", "8"), 5)),
+    AMC = as.mic(rep(c("1", "2", "4", "8"), 5)),
    stringsAsFactors = FALSE
  )
  df_meta_sir <- suppressMessages(as.sir(df_meta, col_mo = "mo", info = FALSE))
  expect_true("patient" %in% colnames(df_meta_sir))
-  expect_true("ward"    %in% colnames(df_meta_sir))
+  expect_true("ward" %in% colnames(df_meta_sir))
  expect_false(is.sir(df_meta_sir[["patient"]]))
  expect_false(is.sir(df_meta_sir[["ward"]]))
  expect_true(is.sir(df_meta_sir[["AMC"]]))
@@ -444,7 +444,7 @@ test_that("test-sir.R", {
  set.seed(42)
  n_par <- 200
  df_par <- data.frame(
-    mo  = "B_ESCHR_COLI",
+    mo = "B_ESCHR_COLI",
    AMC = as.mic(sample(c("0.25", "0.5", "1", "2", "4", "8", "16", "32"), n_par, TRUE)),
    GEN = as.mic(sample(c("0.5", "1", "2", "4", "8", "16", "32", "64"), n_par, TRUE)),
    CIP = as.mic(sample(c("0.001", "0.002", "0.004", "0.008", "0.016", "0.032"), n_par, TRUE)),
@@ -506,14 +506,16 @@ test_that("test-sir.R", {
  #    verify identical output to sequential for a dataset with 2 AB columns so
  #    pieces_per_col = ceiling(max_cores / 2) >= 2 and row batching activates
  df_wide <- data.frame(
-    mo  = "B_ESCHR_COLI",
+    mo = "B_ESCHR_COLI",
    AMC = as.mic(sample(c("1", "2", "4", "8"), n_par, TRUE)),
    GEN = as.mic(sample(c("1", "2", "4", "8"), n_par, TRUE)),
    stringsAsFactors = FALSE
  )
  sir_wide_seq <- suppressMessages(as.sir(df_wide, col_mo = "mo", info = FALSE))
-  sir_wide_par <- suppressMessages(as.sir(df_wide, col_mo = "mo", info = FALSE,
+  sir_wide_par <- suppressMessages(as.sir(df_wide,
-                                          parallel = TRUE, max_cores = 8L))
+    col_mo = "mo", info = FALSE,
    parallel = TRUE, max_cores = 8L
  ))
  expect_identical(sir_wide_seq[["AMC"]], sir_wide_par[["AMC"]])
  expect_identical(sir_wide_seq[["GEN"]], sir_wide_par[["GEN"]])
@@ -536,9 +538,9 @@ test_that("custom reference_data: non-EUCAST/CLSI guideline produces R", {
  # coerce_reference_data_columns() will coerce mo/ab to the right class.
  my_bp <- clinical_breakpoints[clinical_breakpoints$method == "MIC" &
    clinical_breakpoints$type == "human", ][1, ]
-  my_bp$guideline    <- "MyLab 2025"
+  my_bp$guideline <- "MyLab 2025"
-  my_bp$mo           <- "B_ACHRMB_XYLS"  # plain character — coerced to <mo>
+  my_bp$mo <- "B_ACHRMB_XYLS" # plain character — coerced to <mo>
-  my_bp$ab           <- "MEM"             # plain character — coerced to <ab>
+  my_bp$ab <- "MEM" # plain character — coerced to <ab>
  my_bp$breakpoint_S <- 8
  my_bp$breakpoint_R <- 32
@@ -556,26 +558,30 @@ test_that("custom reference_data: non-EUCAST/CLSI guideline produces R", {
  # guideline explicitly set: same result when it matches the data
  expect_equal(as.character(suppressMessages(
-    as.sir(as.mic(64), mo = "B_ACHRMB_XYLS", ab = "MEM",
+    as.sir(as.mic(64),
-      guideline = "MyLab 2025", reference_data = my_bp)
+      mo = "B_ACHRMB_XYLS", ab = "MEM",
      guideline = "MyLab 2025", reference_data = my_bp
    )
  )), "R")
 })
 test_that("custom reference_data: host = NA acts as host-agnostic fallback", {
  my_bp <- clinical_breakpoints[clinical_breakpoints$method == "MIC" &
    clinical_breakpoints$type == "human", ][1, ]
-  my_bp$guideline    <- "MyLab 2025"
+  my_bp$guideline <- "MyLab 2025"
-  my_bp$mo           <- "B_ACHRMB_XYLS"
+  my_bp$mo <- "B_ACHRMB_XYLS"
-  my_bp$ab           <- "MEM"
+  my_bp$ab <- "MEM"
-  my_bp$type         <- "animal"
+  my_bp$type <- "animal"
-  my_bp$host         <- NA  # logical NA — coerced to character by coerce_reference_data_columns()
+  my_bp$host <- NA # logical NA — coerced to character by coerce_reference_data_columns()
  my_bp$breakpoint_S <- 8
  my_bp$breakpoint_R <- 32
  # NA host should match when no species-specific row exists
  result <- suppressMessages(
-    as.sir(as.mic(64), mo = "B_ACHRMB_XYLS", ab = "MEM",
+    as.sir(as.mic(64),
-      host = "dogs", breakpoint_type = "animal", reference_data = my_bp)
+      mo = "B_ACHRMB_XYLS", ab = "MEM",
      host = "dogs", breakpoint_type = "animal", reference_data = my_bp
    )
  )
  expect_equal(as.character(result), "R")
 })
--- a/tools/benchmark_parallel.png
+++ b/tools/benchmark_parallel.png