diff --git a/CLAUDE.md b/CLAUDE.md index eec8c2f86..3c8878bf5 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -148,24 +148,34 @@ Version format: `major.minor.patch.dev` (e.g., `3.0.1.9021`) ### Version and date bump required for every PR -Before opening a pull request, always increment the four-digit dev counter by 1 in **both** of these files: +All PRs are **squash-merged**, so each PR lands as exactly **one commit** on the default branch. Version numbers are kept in sync with the cumulative commit count since the last released tag. Therefore **exactly one version bump is allowed per PR**, regardless of how many intermediate commits are made on the branch. -1. **`DESCRIPTION`** — the `Version:` field: - ``` - Version: 3.0.1.9021 → Version: 3.0.1.9022 - ``` +#### Computing the correct version number -2. **`NEWS.md`** — the top-level heading: - ``` - # AMR 3.0.1.9021 → # AMR 3.0.1.9022 - ``` +Run the following from the repo root to determine the version string to use: -Read the current version from `DESCRIPTION`, add 1 to the last numeric component, and write the new version to both files in the same commit as the rest of the PR changes. +```bash +currenttag=$(git describe --tags --abbrev=0 | sed 's/v//') +currenttagfull=$(git describe --tags --abbrev=0) +defaultbranch=$(git branch | cut -c 3- | grep -E '^master$|^main$') +currentcommit=$(git rev-list --count ${currenttagfull}..${defaultbranch}) +currentversion="${currenttag}.$((currentcommit + 9001 + 1))" +echo "$currentversion" +``` -Also bump the date to the current date in **`DESCRIPTION`**, where it's in the `Date:` field in ISO format: +The `+ 1` accounts for the fact that this PR's squash commit is not yet on the default branch. Set **both** of these files to the resulting version string (and only once per PR, even across multiple commits): + +1. **`DESCRIPTION`** — the `Version:` field +2. **`NEWS.md`** — the top-level heading `# AMR ` + +If `git describe` fails (e.g. no tags exist in the environment), fall back to reading the current version from `DESCRIPTION` and adding 1 to the last numeric component — but only if no bump has already been made in this PR. + +#### Date field + +The `Date:` field in `DESCRIPTION` must reflect the date of the **last commit to the PR** (not the first), in ISO format. Update it with every commit so it is always current: ``` -Date: 2025-12-31 +Date: 2026-03-07 ``` ## Internal State diff --git a/DESCRIPTION b/DESCRIPTION index 8757581ea..776521b68 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 3.0.1.9028 -Date: 2026-03-06 +Version: 3.0.1.9029 +Date: 2026-03-07 Title: Antimicrobial Resistance Data Analysis Description: Functions to simplify and standardise antimicrobial resistance (AMR) data analysis and to work with microbial and antimicrobial properties by diff --git a/NEWS.md b/NEWS.md index 4dff7d59f..d35c2cdf8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# AMR 3.0.1.9028 +# AMR 3.0.1.9029 ### New * Integration with the **tidymodels** framework to allow seamless use of SIR, MIC and disk data in modelling pipelines via `recipes` @@ -18,6 +18,9 @@ * Two new `NA` objects, `NA_ab_` and `NA_mo_`, analogous to base R's `NA_character_` and `NA_integer_`, for use in pipelines that require typed missing values ### Fixes +* `mdro()`: when a base beta-lactam drug column is missing but a corresponding drug+inhibitor combination is present in the data and resistant (e.g., piperacillin/tazobactam = R while piperacillin is absent), the base drug is now correctly inferred as resistant. This ensures MDRO classification is not missed due to test-ordering differences in the laboratory. The reverse direction is also valid: susceptibility in a combination does not imply susceptibility in the base drug (the inhibitor may be responsible), so only resistance is propagated. Closes #209 +* Fixed a bug in `as.sir()` where values that were purely numeric (e.g., `"1"`) and matched the broad SIR-matching regex would be incorrectly stripped of all content by the Unicode letter filter +* Fixed a bug in `as.mic()` where MIC values in scientific notation (e.g., `"1e-3"`) were incorrectly handled because the letter `e` was removed along with other Unicode letters; scientific notation `e` is now preserved * Fixed a bug in `as.ab()` where certain AB codes containing "PH" or "TH" (such as `ETH`, `MTH`, `PHE`, `PHN`, `STH`, `THA`, `THI1`) would incorrectly return `NA` when combined in a vector with any untranslatable value (#245) * Fixed a bug in `antibiogram()` for when no antimicrobials are set * Fixed a bug in `as.sir()` where for numeric input the arguments `S`, `I`, and `R` would not be considered (#244) diff --git a/R/mdro.R b/R/mdro.R index e583ddea4..c1e302719 100755 --- a/R/mdro.R +++ b/R/mdro.R @@ -480,6 +480,50 @@ mdro <- function(x = NULL, } cols_ab <- cols_ab[!duplicated(cols_ab)] + # Infer resistance for missing base drugs from available drug+inhibitor combination columns. + # Clinical principle: resistance in drug+inhibitor (e.g., piperacillin/tazobactam = R) + # always implies resistance in the base drug (e.g., piperacillin = R), because the + # enzyme inhibitor adds nothing when the organism is truly resistant to the base drug. + # NOTE: susceptibility in a combination does NOT imply susceptibility in the base drug + # (the inhibitor may be responsible), so synthetic proxy columns only propagate R, not S/I. + .combos_in_data <- AB_BETALACTAMS_WITH_INHIBITOR[AB_BETALACTAMS_WITH_INHIBITOR %in% names(cols_ab)] + if (length(.combos_in_data) > 0) { + .base_drugs <- suppressMessages( + as.ab(gsub("/.*", "", ab_name(as.character(.combos_in_data), language = NULL))) + ) + .unique_bases <- unique(.base_drugs[!is.na(.base_drugs)]) + for (.base in .unique_bases) { + .base_code <- as.character(.base) + if (!.base_code %in% names(cols_ab)) { + # Base drug column absent; find all available combo columns for this base drug + .combos <- .combos_in_data[!is.na(.base_drugs) & as.character(.base_drugs) == .base_code] + .combo_cols <- unname(cols_ab[as.character(.combos)]) + .combo_cols <- .combo_cols[!is.na(.combo_cols)] + if (length(.combo_cols) > 0) { + # Vectorised: if ANY combination is R, infer base drug as R; otherwise NA + .sir_chars <- as.data.frame( + lapply(x[, .combo_cols, drop = FALSE], function(col) as.character(as.sir(col))), + stringsAsFactors = FALSE + ) + .new_col <- paste0(".sir_proxy_", .base_code) + x[[.new_col]] <- ifelse(rowSums(.sir_chars == "R", na.rm = TRUE) > 0L, "R", NA_character_) + cols_ab <- c(cols_ab, stats::setNames(.new_col, .base_code)) + if (isTRUE(verbose)) { + message_( + "Inferring resistance for ", ab_name(.base_code, language = NULL), + " from available drug+inhibitor combination(s): ", + paste(ab_name(as.character(.combos), language = NULL), collapse = ", "), + " (resistance in a combination always implies resistance in the base drug)", + add_fn = font_blue + ) + } + } + } + } + cols_ab <- cols_ab[!duplicated(names(cols_ab))] + } + rm(list = intersect(ls(), c(".combos_in_data", ".base_drugs", ".unique_bases", ".base", ".base_code", ".combos", ".combo_cols", ".sir_chars", ".new_col"))) + # nolint start AMC <- cols_ab["AMC"] AMK <- cols_ab["AMK"] @@ -674,6 +718,16 @@ mdro <- function(x = NULL, x } + ab_without_inhibitor <- function(ab_codes) { + # Get the base drug AB code from a drug+inhibitor combination. + # e.g., AMC (amoxicillin/clavulanic acid) -> AMX (amoxicillin) + # TZP (piperacillin/tazobactam) -> PIP (piperacillin) + # SAM (ampicillin/sulbactam) -> AMP (ampicillin) + combo_names <- ab_name(ab_codes, language = NULL) + base_names <- gsub("/.*", "", combo_names) + suppressMessages(as.ab(base_names)) + } + # antimicrobial classes # nolint start aminoglycosides <- c(TOB, GEN) diff --git a/R/mic.R b/R/mic.R index 895c0f2c0..6b2eaad0b 100644 --- a/R/mic.R +++ b/R/mic.R @@ -217,9 +217,9 @@ as.mic <- function(x, na.rm = FALSE, keep_operators = "all", round_to_next_log2 warning_("Some MICs were combined values, only the first values are kept") x[x %like% "[0-9]/.*[0-9]"] <- gsub("/.*", "", x[x %like% "[0-9]/.*[0-9]"]) } - x <- trimws2(gsub("[\\p{L}]", "", x, perl = TRUE)) # \p{L} is the Unicode category for all letters, including those with diacritics + x <- trimws2(gsub("[^e\\P{L}]", "", x, perl = TRUE)) # \p{L} is the Unicode category for all letters, including those with diacritics # remove other invalid characters - x <- gsub("[^0-9.><= -]+", "", x, perl = TRUE) + x <- gsub("[^0-9e.><= -]+", "", x, perl = TRUE) # transform => to >= and =< to <= x <- gsub("=<", "<=", x, fixed = TRUE) x <- gsub("=>", ">=", x, fixed = TRUE) diff --git a/R/sir.R b/R/sir.R index 58e733d4c..c9f5e7533 100755 --- a/R/sir.R +++ b/R/sir.R @@ -568,7 +568,7 @@ as.sir.default <- function(x, x[x %like% "dose"] <- "SDD" mtch <- grepl(paste0("(", S, "|", I, "|", R, "|", NI, "|", SDD, "|", WT, "|", NWT, "|", NS, "|[A-Z]+)"), x, perl = TRUE) x[!mtch] <- "" - x[mtch] <- trimws2(gsub("[^\\p{L}]", "", x[mtch], perl = TRUE)) # \p{L} is the Unicode category for all letters, including those with diacritics + x[mtch & x %unlike% "^[0-9+]$"] <- trimws2(gsub("[^\\p{L}]", "", x[mtch & x %unlike% "^[0-9+]$"], perl = TRUE)) # \p{L} is the Unicode category for all letters, including those with diacritics # apply regexes set by user x[x %like% S] <- "S" x[x %like% I] <- "I" diff --git a/tests/testthat/test-mdro.R b/tests/testthat/test-mdro.R index b7385b69f..ea81eea29 100755 --- a/tests/testthat/test-mdro.R +++ b/tests/testthat/test-mdro.R @@ -296,4 +296,50 @@ test_that("test-mdro.R", { expect_output(x <- mdro(example_isolates %>% group_by(ward), info = TRUE, pct_required_classes = 0)) expect_output(x <- mdro(example_isolates %>% group_by(ward), guideline = custom, info = TRUE)) } + + # drug+inhibitor inference for missing base drug columns (issue #209) ------- + # Resistance in drug+inhibitor implies resistance in the base drug. + # MRGN guideline is used because it explicitly requires PIP=R (not PIP OR TZP) + # for Pseudomonas aeruginosa 4MRGN, making the proxy effect directly testable. + pseud_no_pip <- data.frame( + mo = as.mo("Pseudomonas aeruginosa"), + TZP = as.sir("R"), # piperacillin/tazobactam; no PIP column + CAZ = as.sir("R"), + IPM = as.sir("R"), + MEM = as.sir("R"), + CIP = as.sir("R"), + stringsAsFactors = FALSE + ) + # Inference message goes to message() / stderr, not stdout + # -> must use expect_message(), NOT expect_output() + expect_message( + suppressWarnings(mdro(pseud_no_pip, guideline = "mrgn", info = FALSE, verbose = TRUE)), + "Inferring resistance" + ) + # With TZP=R, PIP is inferred R -> 4MRGN criteria met -> level 3 (> 1) + result_no_pip <- suppressMessages(suppressWarnings( + mdro(pseud_no_pip, guideline = "mrgn", info = FALSE) + )) + expect_true(as.integer(result_no_pip) > 1L) + # Susceptibility in combo does NOT propagate: proxy = NA, not S + # -> 4MRGN criteria no longer met -> lower level than when TZP=R + pseud_tzp_s <- pseud_no_pip + pseud_tzp_s$TZP <- as.sir("S") + result_tzp_s <- suppressMessages(suppressWarnings( + mdro(pseud_tzp_s, guideline = "mrgn", info = FALSE) + )) + expect_true(as.integer(result_tzp_s) < as.integer(result_no_pip)) + + # Multiple combos for the same base drug: AMX can come from AMC (amoxicillin/clavulanic acid) + ente_no_amx <- data.frame( + mo = as.mo("Enterococcus faecium"), + AMC = as.sir("R"), # amoxicillin/clavulanic acid; no AMX column + VAN = as.sir("R"), + TEC = as.sir("R"), + LNZ = as.sir("R"), + DAP = as.sir("R"), + stringsAsFactors = FALSE + ) + # Should run without error and return an ordered factor; AMX inferred R from AMC + expect_inherits(suppressMessages(suppressWarnings(mdro(ente_no_amx, guideline = "EUCAST", info = FALSE))), c("factor", "ordered")) })