mirror of
https://github.com/msberends/AMR.git
synced 2026-05-31 23:01:44 +02:00
Fix as.sir() data.frame: preserve already-<sir> columns, exclude metadata
Issue #278: two related bugs in the column-detection / type-assignment pipeline. Bug 1 – already-<sir> columns deleted on re-run Line 886 excluded already-sir columns from the type assignment (they stayed type "") causing the result loop to do x[,col] <- NULL, deleting them. Fix: drop the !is.sir() guard so all untyped columns fall through to type "sir" and are re-processed correctly. Bug 2 – metadata columns treated as antibiotics as.ab("patient") -> OXY, as.ab("ward") -> PRU. The column detector accepted any column whose name matched an antibiotic code, regardless of content. Fix: for name-matched columns that do not already carry an AMR class, also verify content looks like AMR data (all_valid_mics, all- numeric, or any SIR-like string). all_valid_disks() is intentionally avoided here because it strips letters from strings (as.disk("Pt_1")==1). Also adds tools/benchmark_parallel.R: a standalone script that times sequential vs parallel as.sir() across n=20/200/2000/20000 rows and saves a ggplot2 PNG to tools/benchmark_parallel.png. https://claude.ai/code/session_012DXCXbZUC54Zij1z9bFiHR
This commit is contained in:
@@ -406,6 +406,37 @@ test_that("test-sir.R", {
|
||||
expect_equal(out3, as.sir(c("NWT", "WT", "NWT")))
|
||||
expect_equal(out4, as.sir(c("NWT", "WT", "NWT")))
|
||||
|
||||
# Issue #278: re-running as.sir() on already-<sir> data must preserve columns
|
||||
df_already_sir <- data.frame(
|
||||
mo = "B_ESCHR_COLI",
|
||||
AMC = as.mic(c("1", "2", "4")),
|
||||
GEN = sample(c("S", "I", "R"), 3, replace = TRUE),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
first_pass <- suppressMessages(as.sir(df_already_sir, col_mo = "mo", info = FALSE))
|
||||
second_pass <- suppressMessages(as.sir(first_pass, col_mo = "mo", info = FALSE))
|
||||
expect_equal(ncol(first_pass), ncol(second_pass))
|
||||
expect_true(is.sir(second_pass[["AMC"]]))
|
||||
expect_true(is.sir(second_pass[["GEN"]]))
|
||||
expect_identical(first_pass[["AMC"]], second_pass[["AMC"]])
|
||||
expect_identical(first_pass[["GEN"]], second_pass[["GEN"]])
|
||||
|
||||
# Issue #278: metadata columns whose names coincidentally match antibiotic
|
||||
# codes (e.g. 'patient' -> OXY, 'ward' -> PRU) must not be processed
|
||||
df_meta <- data.frame(
|
||||
mo = "B_ESCHR_COLI",
|
||||
patient = paste0("Pt_", 1:20),
|
||||
ward = rep(c("ICU", "Surgery", "Outpatient", "ED"), 5),
|
||||
AMC = as.mic(rep(c("1", "2", "4", "8"), 5)),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
df_meta_sir <- suppressMessages(as.sir(df_meta, col_mo = "mo", info = FALSE))
|
||||
expect_true("patient" %in% colnames(df_meta_sir))
|
||||
expect_true("ward" %in% colnames(df_meta_sir))
|
||||
expect_false(is.sir(df_meta_sir[["patient"]]))
|
||||
expect_false(is.sir(df_meta_sir[["ward"]]))
|
||||
expect_true(is.sir(df_meta_sir[["AMC"]]))
|
||||
|
||||
# Parallel computing ----------------------------------------------------
|
||||
# Tests must pass even when only 1 core is available; parallel = TRUE then
|
||||
# silently falls back to sequential, but results must still be identical.
|
||||
|
||||
Reference in New Issue
Block a user