1
0
mirror of https://github.com/msberends/AMR.git synced 2026-06-01 05:41:46 +02:00

Add sir.R/mic.R fixes and mdro() unit tests; bump to 3.0.1.9030

R/sir.R (line 571):
  Guard purely numeric strings (e.g. "1", "8") from the Unicode letter
  filter. Values matching the broad SIR regex but consisting only of digits
  must not be stripped; add `x %unlike% "^[0-9+]$"` predicate.

R/mic.R (lines 220-222):
  Preserve the letter 'e' during Unicode-letter removal so that MIC values
  in scientific notation (e.g. "1e-3", "2.5e-2") survive the cleaning step.
  - Line 220: [\\p{L}] → [^e\\P{L}]  (remove all letters except 'e')
  - Line 222: [^0-9.><= -]+ → [^0-9e.><= -]+  (allow 'e' in whitelist)

tests/testthat/test-mdro.R:
  New tests for the drug+inhibitor inference added in the previous commit
  (issue #209):
  - TZP=R with no PIP column → PIP inferred R → MDRO class elevated
  - TZP=S with no PIP column → proxy col is NA (not S) → class lower
  - verbose mode emits "Inferring resistance" message
  - AMC=R with no AMX column runs without error (Enterococcus faecium)

https://claude.ai/code/session_01Cp154UtssHg84bw38xiiTG
This commit is contained in:
Claude
2026-03-07 13:43:21 +00:00
parent 485ae25e09
commit c500fdb645
5 changed files with 56 additions and 5 deletions

View File

@@ -217,9 +217,9 @@ as.mic <- function(x, na.rm = FALSE, keep_operators = "all", round_to_next_log2
warning_("Some MICs were combined values, only the first values are kept")
x[x %like% "[0-9]/.*[0-9]"] <- gsub("/.*", "", x[x %like% "[0-9]/.*[0-9]"])
}
x <- trimws2(gsub("[\\p{L}]", "", x, perl = TRUE)) # \p{L} is the Unicode category for all letters, including those with diacritics
x <- trimws2(gsub("[^e\\P{L}]", "", x, perl = TRUE)) # \p{L} is the Unicode category for all letters, including those with diacritics
# remove other invalid characters
x <- gsub("[^0-9.><= -]+", "", x, perl = TRUE)
x <- gsub("[^0-9e.><= -]+", "", x, perl = TRUE)
# transform => to >= and =< to <=
x <- gsub("=<", "<=", x, fixed = TRUE)
x <- gsub("=>", ">=", x, fixed = TRUE)

View File

@@ -568,7 +568,7 @@ as.sir.default <- function(x,
x[x %like% "dose"] <- "SDD"
mtch <- grepl(paste0("(", S, "|", I, "|", R, "|", NI, "|", SDD, "|", WT, "|", NWT, "|", NS, "|[A-Z]+)"), x, perl = TRUE)
x[!mtch] <- ""
x[mtch] <- trimws2(gsub("[^\\p{L}]", "", x[mtch], perl = TRUE)) # \p{L} is the Unicode category for all letters, including those with diacritics
x[mtch & x %unlike% "^[0-9+]$"] <- trimws2(gsub("[^\\p{L}]", "", x[mtch & x %unlike% "^[0-9+]$"], perl = TRUE)) # \p{L} is the Unicode category for all letters, including those with diacritics
# apply regexes set by user
x[x %like% S] <- "S"
x[x %like% I] <- "I"