1
0
mirror of https://github.com/msberends/AMR.git synced 2026-05-14 03:10:50 +02:00

Migrate parallel computing in as.sir() from parallel:: to future/future.apply (#280)

* Migrate parallel computing in as.sir() from parallel:: to future/future.apply

Replace parallel::mclapply() and parallel::parLapply() with
future.apply::future_lapply(), enabling transparent support for any
future backend (multisession, multicore, mirai_multisession, cluster)
on all platforms including Windows.

When parallel = TRUE the function now: (1) respects an active
future::plan() set by the user without overriding it on exit, or
(2) sets a temporary multisession plan with parallelly::availableCores()
and tears it down on exit. The max_cores argument controls worker count
only when no user plan is active.

future and future.apply are added to Suggests in DESCRIPTION.

https://claude.ai/code/session_01M1Jvf2Miu6JL4TQrEh1wS8

* Require user plan() for parallel=TRUE; fix as_wt_nwt false-positive warnings

- parallel = TRUE now errors with a cli-styled message if no non-sequential
  future::plan() is active; users must call e.g. future::plan(future::multisession)
  before using parallel = TRUE (breaking change)
- Removed auto-setup/teardown of multisession plan inside as.sir(), which was
  slow and caused version-mismatch issues with load_all() workflows
- Added as_wt_nwt to the exclusion list in as_sir_method() to suppress
  false-positive "no longer used" warnings during parallel runs
- Fixed pieces_per_col row-batch calculation to use n_workers (total available
  workers from the active plan) instead of n_cores (workers clipped to n_cols),
  so row-batch mode activates correctly when n_cols < n_workers
- Updated @param parallel and @param max_cores roxygen docs; regenerated man/as.sir.Rd
- Updated sequential-mode hint to instruct users to set plan() first

https://claude.ai/code/session_01M1Jvf2Miu6JL4TQrEh1wS8

* fix parallel

* fix parallel

* unit tests

* unit tedts

---------

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Matthijs Berends
2026-04-30 08:57:19 +01:00
committed by GitHub
parent 3f1b20c304
commit 23beebc6c3
11 changed files with 277 additions and 294 deletions

View File

@@ -408,13 +408,13 @@ test_that("test-sir.R", {
# Issue #278: re-running as.sir() on already-<sir> data must preserve columns
df_already_sir <- data.frame(
mo = "B_ESCHR_COLI",
mo = "B_ESCHR_COLI",
AMC = as.mic(c("1", "2", "4")),
GEN = sample(c("S", "I", "R"), 3, replace = TRUE),
stringsAsFactors = FALSE
)
first_pass <- suppressMessages(as.sir(df_already_sir, col_mo = "mo", info = FALSE))
second_pass <- suppressMessages(as.sir(first_pass, col_mo = "mo", info = FALSE))
first_pass <- suppressMessages(as.sir(df_already_sir, col_mo = "mo", info = FALSE))
second_pass <- suppressMessages(as.sir(first_pass, col_mo = "mo", info = FALSE))
expect_equal(ncol(first_pass), ncol(second_pass))
expect_true(is.sir(second_pass[["AMC"]]))
expect_true(is.sir(second_pass[["GEN"]]))
@@ -424,15 +424,15 @@ test_that("test-sir.R", {
# Issue #278: metadata columns whose names coincidentally match antibiotic
# codes (e.g. 'patient' -> OXY, 'ward' -> PRU) must not be processed
df_meta <- data.frame(
mo = "B_ESCHR_COLI",
mo = "B_ESCHR_COLI",
patient = paste0("Pt_", 1:20),
ward = rep(c("ICU", "Surgery", "Outpatient", "ED"), 5),
AMC = as.mic(rep(c("1", "2", "4", "8"), 5)),
ward = rep(c("ICU", "Surgery", "Outpatient", "ED"), 5),
AMC = as.mic(rep(c("1", "2", "4", "8"), 5)),
stringsAsFactors = FALSE
)
df_meta_sir <- suppressMessages(as.sir(df_meta, col_mo = "mo", info = FALSE))
expect_true("patient" %in% colnames(df_meta_sir))
expect_true("ward" %in% colnames(df_meta_sir))
expect_true("ward" %in% colnames(df_meta_sir))
expect_false(is.sir(df_meta_sir[["patient"]]))
expect_false(is.sir(df_meta_sir[["ward"]]))
expect_true(is.sir(df_meta_sir[["AMC"]]))
@@ -441,92 +441,111 @@ test_that("test-sir.R", {
# Tests must pass even when only 1 core is available; parallel = TRUE then
# silently falls back to sequential, but results must still be identical.
set.seed(42)
n_par <- 200
df_par <- data.frame(
mo = "B_ESCHR_COLI",
AMC = as.mic(sample(c("0.25", "0.5", "1", "2", "4", "8", "16", "32"), n_par, TRUE)),
GEN = as.mic(sample(c("0.5", "1", "2", "4", "8", "16", "32", "64"), n_par, TRUE)),
CIP = as.mic(sample(c("0.001", "0.002", "0.004", "0.008", "0.016", "0.032"), n_par, TRUE)),
PEN = sample(c("S", "I", "R", NA_character_), n_par, TRUE),
stringsAsFactors = FALSE
)
if (AMR:::pkg_is_available("future.apply")) {
set.seed(42)
n_par <- 200
df_par <- data.frame(
mo = "B_ESCHR_COLI",
AMC = as.mic(sample(c("0.25", "0.5", "1", "2", "4", "8", "16", "32"), n_par, TRUE)),
GEN = as.mic(sample(c("0.5", "1", "2", "4", "8", "16", "32", "64"), n_par, TRUE)),
CIP = as.mic(sample(c("0.001", "0.002", "0.004", "0.008", "0.016", "0.032"), n_par, TRUE)),
PEN = sample(c("S", "I", "R", NA_character_), n_par, TRUE),
stringsAsFactors = FALSE
)
# clear any existing history before comparing
sir_interpretation_history(clean = TRUE)
sir_seq <- suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE))
log_seq <- sir_interpretation_history(clean = TRUE)
# clear any existing history before comparing
sir_interpretation_history(clean = TRUE)
sir_seq <- suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE))
log_seq <- sir_interpretation_history(clean = TRUE)
sir_par <- suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE, parallel = TRUE))
log_par <- sir_interpretation_history(clean = TRUE)
future::plan(future::multicore)
n_max_workers <- future::nbrOfWorkers()
# 1. parallel = TRUE gives identical SIR results to sequential
expect_identical(sir_seq[["AMC"]], sir_par[["AMC"]])
expect_identical(sir_seq[["GEN"]], sir_par[["GEN"]])
expect_identical(sir_seq[["CIP"]], sir_par[["CIP"]])
expect_identical(sir_seq[["PEN"]], sir_par[["PEN"]])
sir_par <- suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE, parallel = TRUE))
log_par <- sir_interpretation_history(clean = TRUE)
# 2. same number of log rows as sequential
expect_equal(nrow(log_seq), nrow(log_par))
# 1. parallel = TRUE gives identical SIR results to sequential
expect_identical(sir_seq[["AMC"]], sir_par[["AMC"]])
expect_identical(sir_seq[["GEN"]], sir_par[["GEN"]])
expect_identical(sir_seq[["CIP"]], sir_par[["CIP"]])
expect_identical(sir_seq[["PEN"]], sir_par[["PEN"]])
# 3. pre-existing log entries must not be duplicated
# run sequential once to populate the history, then run parallel and
# verify the new parallel run adds exactly as many rows as sequential
sir_interpretation_history(clean = TRUE)
suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE)) # populate history
pre_n <- nrow(sir_interpretation_history())
suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE, parallel = TRUE))
post_n <- nrow(sir_interpretation_history())
expect_equal(post_n - pre_n, nrow(log_seq)) # exactly one run's worth of new rows
sir_interpretation_history(clean = TRUE)
# 2. same number of log rows as sequential
expect_equal(nrow(log_seq), nrow(log_par))
# 4. two sequential runs and two parallel runs yield identical results
sir_par2 <- suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE, parallel = TRUE))
expect_identical(sir_par[["AMC"]], sir_par2[["AMC"]])
expect_identical(sir_par[["GEN"]], sir_par2[["GEN"]])
# 3. pre-existing log entries must not be duplicated
# run sequential once to populate the history, then run parallel and
# verify the new parallel run adds exactly as many rows as sequential
sir_interpretation_history(clean = TRUE)
future::plan(future::sequential)
suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE)) # populate history
pre_n <- nrow(sir_interpretation_history())
future::plan(future::multicore)
suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE, parallel = TRUE))
post_n <- nrow(sir_interpretation_history())
expect_equal(post_n - pre_n, nrow(log_seq)) # exactly one run's worth of new rows
sir_interpretation_history(clean = TRUE)
# 5. max_cores = 1 gives same results as default sequential
sir_mc1 <- suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE, parallel = TRUE, max_cores = 1L))
expect_identical(sir_seq[["AMC"]], sir_mc1[["AMC"]])
expect_identical(sir_seq[["GEN"]], sir_mc1[["GEN"]])
# 4. two sequential runs and two parallel runs yield identical results
sir_par2 <- suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE, parallel = TRUE))
expect_identical(sir_par[["AMC"]], sir_par2[["AMC"]])
expect_identical(sir_par[["GEN"]], sir_par2[["GEN"]])
# 6. max_cores = 2 and max_cores = 3 give same results as sequential
sir_mc2 <- suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE, parallel = TRUE, max_cores = 2L))
sir_mc3 <- suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE, parallel = TRUE, max_cores = 3L))
expect_identical(sir_seq[["AMC"]], sir_mc2[["AMC"]])
expect_identical(sir_seq[["GEN"]], sir_mc3[["GEN"]])
# 5. used cores = 1 gives same results as default sequential
future::plan(future::multicore, workers = 1)
sir_mc1 <- suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE, parallel = TRUE))
expect_identical(sir_seq[["AMC"]], sir_mc1[["AMC"]])
expect_identical(sir_seq[["GEN"]], sir_mc1[["GEN"]])
# 7. single-column data frame falls back silently to sequential
df_single <- df_par[, c("mo", "AMC")]
sir_single_seq <- suppressMessages(as.sir(df_single, col_mo = "mo", info = FALSE))
sir_single_par <- suppressMessages(as.sir(df_single, col_mo = "mo", info = FALSE, parallel = TRUE))
expect_identical(sir_single_seq[["AMC"]], sir_single_par[["AMC"]])
# 6. used cores = 2 and used cores = 3 give same results as sequential
if (n_max_workers >= 3) {
future::plan(future::multicore, workers = 2)
sir_mc2 <- suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE, parallel = TRUE))
future::plan(future::multicore, workers = 3)
sir_mc3 <- suppressMessages(as.sir(df_par, col_mo = "mo", info = FALSE, parallel = TRUE))
expect_identical(sir_seq[["AMC"]], sir_mc2[["AMC"]])
expect_identical(sir_seq[["GEN"]], sir_mc3[["GEN"]])
}
# 9. row-batch mode (n_cols < n_cores): force row splitting via max_cores and
# verify identical output to sequential for a dataset with 2 AB columns so
# pieces_per_col = ceiling(max_cores / 2) >= 2 and row batching activates
df_wide <- data.frame(
mo = "B_ESCHR_COLI",
AMC = as.mic(sample(c("1", "2", "4", "8"), n_par, TRUE)),
GEN = as.mic(sample(c("1", "2", "4", "8"), n_par, TRUE)),
stringsAsFactors = FALSE
)
sir_wide_seq <- suppressMessages(as.sir(df_wide, col_mo = "mo", info = FALSE))
sir_wide_par <- suppressMessages(as.sir(df_wide, col_mo = "mo", info = FALSE,
parallel = TRUE, max_cores = 8L))
expect_identical(sir_wide_seq[["AMC"]], sir_wide_par[["AMC"]])
expect_identical(sir_wide_seq[["GEN"]], sir_wide_par[["GEN"]])
# 7. single-column data frame falls back silently to sequential
df_single <- df_par[, c("mo", "AMC")]
future::plan(future::sequential)
sir_single_seq <- suppressMessages(as.sir(df_single, col_mo = "mo", info = FALSE))
future::plan(future::multicore)
sir_single_par <- suppressMessages(as.sir(df_single, col_mo = "mo", info = FALSE, parallel = TRUE))
expect_identical(sir_single_seq[["AMC"]], sir_single_par[["AMC"]])
# 8. info = TRUE with parallel does not produce per-column worker messages
# (messages should only appear in the main process, not duplicated from workers)
msgs <- capture.output(
suppressWarnings(as.sir(df_par, col_mo = "mo", info = TRUE, parallel = TRUE)),
type = "message"
)
# each AB column name should appear at most once in all messages combined
for (ab_nm in c("AMC", "GEN", "CIP", "PEN")) {
n_mentions <- sum(grepl(ab_nm, msgs, fixed = TRUE))
expect_lte(n_mentions, 1L)
# 8. row-batch mode (n_cols < n_cores): force row splitting via used cores and
# verify identical output to sequential for a dataset with 2 AB columns so
# pieces_per_col = ceiling(used cores / 2) >= 2 and row batching activates
df_wide <- data.frame(
mo = "B_ESCHR_COLI",
AMC = as.mic(sample(c("1", "2", "4", "8"), n_par, TRUE)),
GEN = as.mic(sample(c("1", "2", "4", "8"), n_par, TRUE)),
stringsAsFactors = FALSE
)
future::plan(future::sequential)
sir_wide_seq <- suppressMessages(as.sir(df_wide, col_mo = "mo", info = FALSE))
future::plan(future::multicore)
sir_wide_par <- suppressMessages(as.sir(df_wide,
col_mo = "mo", info = FALSE,
parallel = TRUE
))
expect_identical(sir_wide_seq[["AMC"]], sir_wide_par[["AMC"]])
expect_identical(sir_wide_seq[["GEN"]], sir_wide_par[["GEN"]])
# 8. info = TRUE with parallel does not produce per-column worker messages
# (messages should only appear in the main process, not duplicated from workers)
msgs <- capture.output(
suppressWarnings(as.sir(df_par, col_mo = "mo", info = TRUE, parallel = TRUE)),
type = "message"
)
# each AB column name should appear at most once in all messages combined
for (ab_nm in c("AMC", "GEN", "CIP", "PEN")) {
n_mentions <- sum(grepl(ab_nm, msgs, fixed = TRUE))
expect_lte(n_mentions, 1L)
}
future::plan(future::sequential)
}
})
@@ -536,9 +555,9 @@ test_that("custom reference_data: non-EUCAST/CLSI guideline produces R", {
# coerce_reference_data_columns() will coerce mo/ab to the right class.
my_bp <- clinical_breakpoints[clinical_breakpoints$method == "MIC" &
clinical_breakpoints$type == "human", ][1, ]
my_bp$guideline <- "MyLab 2025"
my_bp$mo <- "B_ACHRMB_XYLS" # plain character — coerced to <mo>
my_bp$ab <- "MEM" # plain character — coerced to <ab>
my_bp$guideline <- "MyLab 2025"
my_bp$mo <- "B_ACHRMB_XYLS" # plain character — coerced to <mo>
my_bp$ab <- "MEM" # plain character — coerced to <ab>
my_bp$breakpoint_S <- 8
my_bp$breakpoint_R <- 32
@@ -556,26 +575,30 @@ test_that("custom reference_data: non-EUCAST/CLSI guideline produces R", {
# guideline explicitly set: same result when it matches the data
expect_equal(as.character(suppressMessages(
as.sir(as.mic(64), mo = "B_ACHRMB_XYLS", ab = "MEM",
guideline = "MyLab 2025", reference_data = my_bp)
as.sir(as.mic(64),
mo = "B_ACHRMB_XYLS", ab = "MEM",
guideline = "MyLab 2025", reference_data = my_bp
)
)), "R")
})
test_that("custom reference_data: host = NA acts as host-agnostic fallback", {
my_bp <- clinical_breakpoints[clinical_breakpoints$method == "MIC" &
clinical_breakpoints$type == "human", ][1, ]
my_bp$guideline <- "MyLab 2025"
my_bp$mo <- "B_ACHRMB_XYLS"
my_bp$ab <- "MEM"
my_bp$type <- "animal"
my_bp$host <- NA # logical NA — coerced to character by coerce_reference_data_columns()
my_bp$guideline <- "MyLab 2025"
my_bp$mo <- "B_ACHRMB_XYLS"
my_bp$ab <- "MEM"
my_bp$type <- "animal"
my_bp$host <- NA # logical NA — coerced to character by coerce_reference_data_columns()
my_bp$breakpoint_S <- 8
my_bp$breakpoint_R <- 32
# NA host should match when no species-specific row exists
result <- suppressMessages(
as.sir(as.mic(64), mo = "B_ACHRMB_XYLS", ab = "MEM",
host = "dogs", breakpoint_type = "animal", reference_data = my_bp)
as.sir(as.mic(64),
mo = "B_ACHRMB_XYLS", ab = "MEM",
host = "dogs", breakpoint_type = "animal", reference_data = my_bp
)
)
expect_equal(as.character(result), "R")
})

View File

@@ -89,6 +89,11 @@ test_that("test-zzz.R", {
"symbol" = "cli",
# curl
"has_internet" = "curl",
# future
"plan" = "future",
"nbrOfWorkers" = "future",
# future.apply
"future_lapply" = "future.apply",
# ggplot2
"aes" = "ggplot2",
"arrow" = "ggplot2",
@@ -127,8 +132,6 @@ test_that("test-zzz.R", {
"kable" = "knitr",
"knit_print" = "knitr",
"opts_chunk" = "knitr",
# parallelly
"availableCores" = "parallelly",
# pillar
"pillar_shaft" = "pillar",
"style_na" = "pillar",