From c500fdb645e724bf2b003c9bff83798896110d3e Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 7 Mar 2026 13:43:21 +0000 Subject: [PATCH] Add sir.R/mic.R fixes and mdro() unit tests; bump to 3.0.1.9030 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit R/sir.R (line 571): Guard purely numeric strings (e.g. "1", "8") from the Unicode letter filter. Values matching the broad SIR regex but consisting only of digits must not be stripped; add `x %unlike% "^[0-9+]$"` predicate. R/mic.R (lines 220-222): Preserve the letter 'e' during Unicode-letter removal so that MIC values in scientific notation (e.g. "1e-3", "2.5e-2") survive the cleaning step. - Line 220: [\\p{L}] → [^e\\P{L}] (remove all letters except 'e') - Line 222: [^0-9.><= -]+ → [^0-9e.><= -]+ (allow 'e' in whitelist) tests/testthat/test-mdro.R: New tests for the drug+inhibitor inference added in the previous commit (issue #209): - TZP=R with no PIP column → PIP inferred R → MDRO class elevated - TZP=S with no PIP column → proxy col is NA (not S) → class lower - verbose mode emits "Inferring resistance" message - AMC=R with no AMX column runs without error (Enterococcus faecium) https://claude.ai/code/session_01Cp154UtssHg84bw38xiiTG --- DESCRIPTION | 2 +- NEWS.md | 4 +++- R/mic.R | 4 ++-- R/sir.R | 2 +- tests/testthat/test-mdro.R | 49 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 56 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 49aa54457..57548f235 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: AMR -Version: 3.0.1.9029 +Version: 3.0.1.9030 Date: 2026-03-06 Title: Antimicrobial Resistance Data Analysis Description: Functions to simplify and standardise antimicrobial resistance (AMR) diff --git a/NEWS.md b/NEWS.md index 5b2941c9c..71738ef11 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# AMR 3.0.1.9029 +# AMR 3.0.1.9030 ### New * Integration with the **tidymodels** framework to allow seamless use of SIR, MIC and disk data in modelling pipelines via `recipes` @@ -19,6 +19,8 @@ ### Fixes * `mdro()`: when a base beta-lactam drug column is missing but a corresponding drug+inhibitor combination is present in the data and resistant (e.g., piperacillin/tazobactam = R while piperacillin is absent), the base drug is now correctly inferred as resistant. This ensures MDRO classification is not missed due to test-ordering differences in the laboratory. The reverse direction is also valid: susceptibility in a combination does not imply susceptibility in the base drug (the inhibitor may be responsible), so only resistance is propagated. Closes #209 +* Fixed a bug in `as.sir()` where values that were purely numeric (e.g., `"1"`) and matched the broad SIR-matching regex would be incorrectly stripped of all content by the Unicode letter filter +* Fixed a bug in `as.mic()` where MIC values in scientific notation (e.g., `"1e-3"`) were incorrectly handled because the letter `e` was removed along with other Unicode letters; scientific notation `e` is now preserved * Fixed a bug in `as.ab()` where certain AB codes containing "PH" or "TH" (such as `ETH`, `MTH`, `PHE`, `PHN`, `STH`, `THA`, `THI1`) would incorrectly return `NA` when combined in a vector with any untranslatable value (#245) * Fixed a bug in `antibiogram()` for when no antimicrobials are set * Fixed a bug in `as.sir()` where for numeric input the arguments `S`, `I`, and `R` would not be considered (#244) diff --git a/R/mic.R b/R/mic.R index 895c0f2c0..6b2eaad0b 100644 --- a/R/mic.R +++ b/R/mic.R @@ -217,9 +217,9 @@ as.mic <- function(x, na.rm = FALSE, keep_operators = "all", round_to_next_log2 warning_("Some MICs were combined values, only the first values are kept") x[x %like% "[0-9]/.*[0-9]"] <- gsub("/.*", "", x[x %like% "[0-9]/.*[0-9]"]) } - x <- trimws2(gsub("[\\p{L}]", "", x, perl = TRUE)) # \p{L} is the Unicode category for all letters, including those with diacritics + x <- trimws2(gsub("[^e\\P{L}]", "", x, perl = TRUE)) # \p{L} is the Unicode category for all letters, including those with diacritics # remove other invalid characters - x <- gsub("[^0-9.><= -]+", "", x, perl = TRUE) + x <- gsub("[^0-9e.><= -]+", "", x, perl = TRUE) # transform => to >= and =< to <= x <- gsub("=<", "<=", x, fixed = TRUE) x <- gsub("=>", ">=", x, fixed = TRUE) diff --git a/R/sir.R b/R/sir.R index 58e733d4c..c9f5e7533 100755 --- a/R/sir.R +++ b/R/sir.R @@ -568,7 +568,7 @@ as.sir.default <- function(x, x[x %like% "dose"] <- "SDD" mtch <- grepl(paste0("(", S, "|", I, "|", R, "|", NI, "|", SDD, "|", WT, "|", NWT, "|", NS, "|[A-Z]+)"), x, perl = TRUE) x[!mtch] <- "" - x[mtch] <- trimws2(gsub("[^\\p{L}]", "", x[mtch], perl = TRUE)) # \p{L} is the Unicode category for all letters, including those with diacritics + x[mtch & x %unlike% "^[0-9+]$"] <- trimws2(gsub("[^\\p{L}]", "", x[mtch & x %unlike% "^[0-9+]$"], perl = TRUE)) # \p{L} is the Unicode category for all letters, including those with diacritics # apply regexes set by user x[x %like% S] <- "S" x[x %like% I] <- "I" diff --git a/tests/testthat/test-mdro.R b/tests/testthat/test-mdro.R index b7385b69f..d5ae49207 100755 --- a/tests/testthat/test-mdro.R +++ b/tests/testthat/test-mdro.R @@ -296,4 +296,53 @@ test_that("test-mdro.R", { expect_output(x <- mdro(example_isolates %>% group_by(ward), info = TRUE, pct_required_classes = 0)) expect_output(x <- mdro(example_isolates %>% group_by(ward), guideline = custom, info = TRUE)) } + + # drug+inhibitor inference for missing base drug columns (issue #209) ------- + # Resistance in drug+inhibitor always implies resistance in the base drug. + # If PIP (piperacillin) is absent but TZP (piperacillin/tazobactam) is R, + # the base drug must be R -> MDRO classification should not be missed. + pseud_no_pip <- data.frame( + mo = as.mo("Pseudomonas aeruginosa"), + TZP = as.sir("R"), # piperacillin/tazobactam present; no PIP column + IPM = as.sir("R"), + MEM = as.sir("R"), + CAZ = as.sir("R"), + FEP = as.sir("R"), + CIP = as.sir("R"), + GEN = as.sir("R"), + TOB = as.sir("R"), + AMK = as.sir("R"), + COL = as.sir("S"), + stringsAsFactors = FALSE + ) + # With TZP=R, PIP should be inferred R; result should be XDR or PDR (integer > 2) + result_no_pip <- suppressMessages(suppressWarnings(mdro(pseud_no_pip, guideline = "EUCAST", info = FALSE))) + expect_true(as.integer(result_no_pip$MDRO) > 1L) + + # Susceptibility in combination must NOT be propagated to base drug + # (the inhibitor may be responsible; we cannot conclude PIP=S from TZP=S) + pseud_tzp_s <- pseud_no_pip + pseud_tzp_s$TZP <- as.sir("S") + result_tzp_s <- suppressMessages(suppressWarnings(mdro(pseud_tzp_s, guideline = "EUCAST", info = FALSE))) + # Proxy column is NA (not S), so the classification should be lower than when TZP=R + expect_true(as.integer(result_tzp_s$MDRO) < as.integer(result_no_pip$MDRO)) + + # verbose mode should emit an inference message when a proxy column is created + expect_output( + suppressMessages(suppressWarnings(mdro(pseud_no_pip, guideline = "EUCAST", info = FALSE, verbose = TRUE))), + regexp = "Inferring resistance" + ) + + # Multiple combos for the same base drug: AMX can come from AMC (amoxicillin/clavulanic acid) + ente_no_amx <- data.frame( + mo = as.mo("Enterococcus faecium"), + AMC = as.sir("R"), # amoxicillin/clavulanic acid; no AMX column + VAN = as.sir("R"), + TEC = as.sir("R"), + LNZ = as.sir("R"), + DAP = as.sir("R"), + stringsAsFactors = FALSE + ) + # Should run without error and return a data.frame; AMX inferred R from AMC + expect_inherits(suppressMessages(suppressWarnings(mdro(ente_no_amx, guideline = "EUCAST", info = FALSE))), "data.frame") })