mirror of
https://github.com/msberends/AMR.git
synced 2026-05-31 13:41:42 +02:00
Add coerce_reference_data_columns() for lenient reference_data validation
check_reference_data() now returns the (possibly coerced) reference_data and the call site captures the result so downstream code sees the fixed columns. A new coerce_reference_data_columns() helper is called before the strict class check inside check_reference_data(). It coerces columns to the expected types: - mo -> as.mo() if not already <mo> class - ab -> as.ab() if not already <ab> class - character columns -> as.character() (e.g. host = NA becomes NA_character_) - numeric columns -> as.double() - logical columns -> as.logical() This allows users to build a custom reference_data from a plain data.frame without having to pre-apply as.mo()/as.ab() or worry about NA column types. Updated the reference_data roxygen argument to document the auto-coercion and restored the tests to the simpler form that uses plain character assignments, relying on the new coercion instead of workarounds. https://claude.ai/code/session_01Q8KtFFGG9qrjAgLJBbxG2U
This commit is contained in:
34
R/sir.R
34
R/sir.R
@@ -69,7 +69,7 @@ VALID_SIR_LEVELS <- c("S", "SDD", "I", "R", "NI", "WT", "NWT", "NS")
|
|||||||
#' @param host A vector (or column name) with [character]s to indicate the host. Only useful for veterinary breakpoints, as it requires `breakpoint_type = "animal"`. The values can be any text resembling the animal species, even in any of the `r length(LANGUAGES_SUPPORTED)` supported languages of this package. For foreign languages, be sure to set the language with [set_AMR_locale()] (though it will be automatically guessed based on the system language).
|
#' @param host A vector (or column name) with [character]s to indicate the host. Only useful for veterinary breakpoints, as it requires `breakpoint_type = "animal"`. The values can be any text resembling the animal species, even in any of the `r length(LANGUAGES_SUPPORTED)` supported languages of this package. For foreign languages, be sure to set the language with [set_AMR_locale()] (though it will be automatically guessed based on the system language).
|
||||||
#' @param language Language to convert values set in `host` when using animal breakpoints. Use one of these supported language names or [ISO 639-1 codes](https://en.wikipedia.org/wiki/ISO_639-1): `r vector_or(paste0(sapply(LANGUAGES_SUPPORTED_NAMES, function(x) x[[1]]), " (" , LANGUAGES_SUPPORTED, ")"), quotes = FALSE, sort = FALSE)`.
|
#' @param language Language to convert values set in `host` when using animal breakpoints. Use one of these supported language names or [ISO 639-1 codes](https://en.wikipedia.org/wiki/ISO_639-1): `r vector_or(paste0(sapply(LANGUAGES_SUPPORTED_NAMES, function(x) x[[1]]), " (" , LANGUAGES_SUPPORTED, ")"), quotes = FALSE, sort = FALSE)`.
|
||||||
#' @param verbose A [logical] to indicate that all notes should be printed during interpretation of MIC values or disk diffusion values.
|
#' @param verbose A [logical] to indicate that all notes should be printed during interpretation of MIC values or disk diffusion values.
|
||||||
#' @param reference_data A [data.frame] to be used for interpretation, which defaults to the [clinical_breakpoints] data set. Changing this argument allows for using own interpretation guidelines. This argument must contain a data set that is equal in structure to the [clinical_breakpoints] data set (same column names and column types). When `reference_data` is manually set, the `guideline` argument is optional: if omitted (or if its value does not match any row in the custom data), all rows in `reference_data` are considered. If `guideline` is set to a value that exists in the `guideline` column of the custom data, only matching rows are used — useful when a single custom table contains multiple guidelines. For the R classification, the EUCAST convention is used by default: MIC values `> breakpoint_R` and disk diffusion values `< breakpoint_R` are classified as R, with values between `breakpoint_S` and `breakpoint_R` classified as I (or SDD). Only when `guideline` contains `"CLSI"` are the closed-interval rules (`>= breakpoint_R` for MIC, `<= breakpoint_R` for disk) applied instead.
|
#' @param reference_data A [data.frame] to be used for interpretation, which defaults to the [clinical_breakpoints] data set. Changing this argument allows for using own interpretation guidelines. This argument must have the same column names as the [clinical_breakpoints] data set. Column types are coerced automatically where possible: the `mo` column is passed through [as.mo()], the `ab` column through [as.ab()], and plain character, numeric, or logical columns are cast to the expected type. When `reference_data` is manually set, the `guideline` argument is optional: if omitted (or if its value does not match any row in the custom data), all rows in `reference_data` are considered. If `guideline` is set to a value that exists in the `guideline` column of the custom data, only matching rows are used — useful when a single custom table contains multiple guidelines. For the R classification, the EUCAST convention is used by default: MIC values `> breakpoint_R` and disk diffusion values `< breakpoint_R` are classified as R, with values between `breakpoint_S` and `breakpoint_R` classified as I (or SDD). Only when using the standard [clinical_breakpoints] with a CLSI guideline are the closed-interval rules (`>= breakpoint_R` for MIC, `<= breakpoint_R` for disk) applied; custom `reference_data` always uses the open-interval (EUCAST) convention regardless of the guideline name.
|
||||||
#' @param threshold Maximum fraction of invalid antimicrobial interpretations of `x`, see *Examples*.
|
#' @param threshold Maximum fraction of invalid antimicrobial interpretations of `x`, see *Examples*.
|
||||||
#' @param conserve_capped_values Deprecated, use `capped_mic_handling` instead.
|
#' @param conserve_capped_values Deprecated, use `capped_mic_handling` instead.
|
||||||
#' @param ... For using on a [data.frame]: selection of columns to apply `as.sir()` to. Supports [tidyselect language][tidyselect::starts_with()] such as `where(is.mic)`, `starts_with(...)`, or `column1:column4`, and can thus also be [antimicrobial selectors][amr_selector()], e.g. `as.sir(df, penicillins())`.
|
#' @param ... For using on a [data.frame]: selection of columns to apply `as.sir()` to. Supports [tidyselect language][tidyselect::starts_with()] such as `where(is.mic)`, `starts_with(...)`, or `column1:column4`, and can thus also be [antimicrobial selectors][amr_selector()], e.g. `as.sir(df, penicillins())`.
|
||||||
@@ -1271,7 +1271,7 @@ as_sir_method <- function(method_short,
|
|||||||
meet_criteria(substitute_missing_r_breakpoint, allow_class = "logical", has_length = 1, .call_depth = -2)
|
meet_criteria(substitute_missing_r_breakpoint, allow_class = "logical", has_length = 1, .call_depth = -2)
|
||||||
meet_criteria(include_screening, allow_class = "logical", has_length = 1, .call_depth = -2)
|
meet_criteria(include_screening, allow_class = "logical", has_length = 1, .call_depth = -2)
|
||||||
meet_criteria(include_PKPD, allow_class = "logical", has_length = 1, .call_depth = -2)
|
meet_criteria(include_PKPD, allow_class = "logical", has_length = 1, .call_depth = -2)
|
||||||
check_reference_data(reference_data, .call_depth = -2)
|
reference_data <- check_reference_data(reference_data, .call_depth = -2)
|
||||||
meet_criteria(breakpoint_type, allow_class = "character", is_in = reference_data$type, has_length = 1, .call_depth = -2)
|
meet_criteria(breakpoint_type, allow_class = "character", is_in = reference_data$type, has_length = 1, .call_depth = -2)
|
||||||
meet_criteria(host, allow_class = c("character", "factor"), allow_NULL = TRUE, allow_NA = TRUE, .call_depth = -2)
|
meet_criteria(host, allow_class = c("character", "factor"), allow_NULL = TRUE, allow_NA = TRUE, .call_depth = -2)
|
||||||
language <- validate_language(language)
|
language <- validate_language(language)
|
||||||
@@ -2359,13 +2359,36 @@ rep.sir <- function(x, ...) {
|
|||||||
y
|
y
|
||||||
}
|
}
|
||||||
|
|
||||||
|
coerce_reference_data_columns <- function(x) {
|
||||||
|
ref <- AMR::clinical_breakpoints
|
||||||
|
for (col in names(ref)) {
|
||||||
|
col_ref <- ref[[col]]
|
||||||
|
col_x <- x[[col]]
|
||||||
|
if (identical(class(col_ref), class(col_x))) next
|
||||||
|
if (col == "mo") {
|
||||||
|
x[[col]] <- suppressMessages(as.mo(col_x))
|
||||||
|
} else if (col == "ab") {
|
||||||
|
x[[col]] <- suppressMessages(as.ab(col_x))
|
||||||
|
} else if (is.character(col_ref)) {
|
||||||
|
x[[col]] <- as.character(col_x)
|
||||||
|
} else if (is.numeric(col_ref)) {
|
||||||
|
x[[col]] <- as.double(col_x)
|
||||||
|
} else if (is.logical(col_ref)) {
|
||||||
|
x[[col]] <- as.logical(col_x)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
x
|
||||||
|
}
|
||||||
|
|
||||||
check_reference_data <- function(reference_data, .call_depth) {
|
check_reference_data <- function(reference_data, .call_depth) {
|
||||||
if (!identical(reference_data, AMR::clinical_breakpoints)) {
|
if (!identical(reference_data, AMR::clinical_breakpoints)) {
|
||||||
class_sir <- vapply(FUN.VALUE = character(1), AMR::clinical_breakpoints, function(x) paste0("<", class(x), ">", collapse = " and "))
|
if (!all(names(AMR::clinical_breakpoints) == names(reference_data))) {
|
||||||
class_ref <- vapply(FUN.VALUE = character(1), reference_data, function(x) paste0("<", class(x), ">", collapse = " and "))
|
|
||||||
if (!all(names(class_sir) == names(class_ref))) {
|
|
||||||
stop_("{.arg reference_data} must have the same column names as the {.help [clinical_breakpoints](AMR::clinical_breakpoints)} data set.", call = .call_depth)
|
stop_("{.arg reference_data} must have the same column names as the {.help [clinical_breakpoints](AMR::clinical_breakpoints)} data set.", call = .call_depth)
|
||||||
}
|
}
|
||||||
|
# coerce mo, ab, and other columns to the expected types where possible
|
||||||
|
reference_data <- coerce_reference_data_columns(reference_data)
|
||||||
|
class_sir <- vapply(FUN.VALUE = character(1), AMR::clinical_breakpoints, function(x) paste0("<", class(x), ">", collapse = " and "))
|
||||||
|
class_ref <- vapply(FUN.VALUE = character(1), reference_data, function(x) paste0("<", class(x), ">", collapse = " and "))
|
||||||
if (!all(class_sir == class_ref)) {
|
if (!all(class_sir == class_ref)) {
|
||||||
bad_col <- names(class_ref[class_sir != class_ref][1])
|
bad_col <- names(class_ref[class_sir != class_ref][1])
|
||||||
bad_cls <- gsub("<|>", "", class_ref[class_sir != class_ref][1])
|
bad_cls <- gsub("<|>", "", class_ref[class_sir != class_ref][1])
|
||||||
@@ -2373,4 +2396,5 @@ check_reference_data <- function(reference_data, .call_depth) {
|
|||||||
stop_("{.arg reference_data} must be the same structure as the {.help [clinical_breakpoints](AMR::clinical_breakpoints)} data set. Column ", paste0("{.field ", font_bold(bad_col, collapse = NULL), "}"), " is of class ", paste0("{.cls ", bad_cls, "}"), ", but should be of class ", paste0("{.cls ", exp_cls, "}"), call = .call_depth)
|
stop_("{.arg reference_data} must be the same structure as the {.help [clinical_breakpoints](AMR::clinical_breakpoints)} data set. Column ", paste0("{.field ", font_bold(bad_col, collapse = NULL), "}"), " is of class ", paste0("{.cls ", bad_cls, "}"), ", but should be of class ", paste0("{.cls ", exp_cls, "}"), call = .call_depth)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
reference_data
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -532,32 +532,31 @@ test_that("test-sir.R", {
|
|||||||
|
|
||||||
# issue #239 — custom reference_data support
|
# issue #239 — custom reference_data support
|
||||||
test_that("custom reference_data: non-EUCAST/CLSI guideline produces R", {
|
test_that("custom reference_data: non-EUCAST/CLSI guideline produces R", {
|
||||||
# Take the first MIC/human row (B_ACHRMB_XYLS / MEM) as a template.
|
# Build a minimal one-row custom breakpoint table from a plain data.frame.
|
||||||
# Only override guideline and breakpoints; keep mo/ab as <mo>/<ab> class objects.
|
# coerce_reference_data_columns() will coerce mo/ab to the right class.
|
||||||
my_bp <- clinical_breakpoints[clinical_breakpoints$method == "MIC" &
|
my_bp <- clinical_breakpoints[clinical_breakpoints$method == "MIC" &
|
||||||
clinical_breakpoints$type == "human", ][1, ]
|
clinical_breakpoints$type == "human", ][1, ]
|
||||||
my_bp$guideline <- "MyLab 2025"
|
my_bp$guideline <- "MyLab 2025"
|
||||||
|
my_bp$mo <- "B_ACHRMB_XYLS" # plain character — coerced to <mo>
|
||||||
|
my_bp$ab <- "MEM" # plain character — coerced to <ab>
|
||||||
my_bp$breakpoint_S <- 8
|
my_bp$breakpoint_S <- 8
|
||||||
my_bp$breakpoint_R <- 32
|
my_bp$breakpoint_R <- 32
|
||||||
|
|
||||||
mo_val <- as.character(my_bp$mo) # "B_ACHRMB_XYLS"
|
|
||||||
ab_val <- as.character(my_bp$ab) # "MEM"
|
|
||||||
|
|
||||||
# guideline omitted: all rows in reference_data are used; R via open interval (>)
|
# guideline omitted: all rows in reference_data are used; R via open interval (>)
|
||||||
expect_equal(as.character(suppressMessages(
|
expect_equal(as.character(suppressMessages(
|
||||||
as.sir(as.mic(64), mo = mo_val, ab = ab_val, reference_data = my_bp)
|
as.sir(as.mic(64), mo = "B_ACHRMB_XYLS", ab = "MEM", reference_data = my_bp)
|
||||||
)), "R")
|
)), "R")
|
||||||
expect_equal(as.character(suppressMessages(
|
expect_equal(as.character(suppressMessages(
|
||||||
as.sir(as.mic(16), mo = mo_val, ab = ab_val, reference_data = my_bp)
|
as.sir(as.mic(16), mo = "B_ACHRMB_XYLS", ab = "MEM", reference_data = my_bp)
|
||||||
)), "I")
|
)), "I")
|
||||||
# at R breakpoint value must be I (open interval: > not >=)
|
# at R breakpoint value must be I (open interval: > not >=)
|
||||||
expect_equal(as.character(suppressMessages(
|
expect_equal(as.character(suppressMessages(
|
||||||
as.sir(as.mic(32), mo = mo_val, ab = ab_val, reference_data = my_bp)
|
as.sir(as.mic(32), mo = "B_ACHRMB_XYLS", ab = "MEM", reference_data = my_bp)
|
||||||
)), "I")
|
)), "I")
|
||||||
|
|
||||||
# guideline explicitly set: same result when it matches the data
|
# guideline explicitly set: same result when it matches the data
|
||||||
expect_equal(as.character(suppressMessages(
|
expect_equal(as.character(suppressMessages(
|
||||||
as.sir(as.mic(64), mo = mo_val, ab = ab_val,
|
as.sir(as.mic(64), mo = "B_ACHRMB_XYLS", ab = "MEM",
|
||||||
guideline = "MyLab 2025", reference_data = my_bp)
|
guideline = "MyLab 2025", reference_data = my_bp)
|
||||||
)), "R")
|
)), "R")
|
||||||
})
|
})
|
||||||
@@ -566,17 +565,16 @@ test_that("custom reference_data: host = NA acts as host-agnostic fallback", {
|
|||||||
my_bp <- clinical_breakpoints[clinical_breakpoints$method == "MIC" &
|
my_bp <- clinical_breakpoints[clinical_breakpoints$method == "MIC" &
|
||||||
clinical_breakpoints$type == "human", ][1, ]
|
clinical_breakpoints$type == "human", ][1, ]
|
||||||
my_bp$guideline <- "MyLab 2025"
|
my_bp$guideline <- "MyLab 2025"
|
||||||
|
my_bp$mo <- "B_ACHRMB_XYLS"
|
||||||
|
my_bp$ab <- "MEM"
|
||||||
my_bp$type <- "animal"
|
my_bp$type <- "animal"
|
||||||
my_bp$host <- NA_character_ # must stay character class, not logical NA
|
my_bp$host <- NA # logical NA — coerced to character by coerce_reference_data_columns()
|
||||||
my_bp$breakpoint_S <- 8
|
my_bp$breakpoint_S <- 8
|
||||||
my_bp$breakpoint_R <- 32
|
my_bp$breakpoint_R <- 32
|
||||||
|
|
||||||
mo_val <- as.character(my_bp$mo)
|
# NA host should match when no species-specific row exists
|
||||||
ab_val <- as.character(my_bp$ab)
|
|
||||||
|
|
||||||
# NA host should match when no species-specific row exists; guideline omitted
|
|
||||||
result <- suppressMessages(
|
result <- suppressMessages(
|
||||||
as.sir(as.mic(64), mo = mo_val, ab = ab_val,
|
as.sir(as.mic(64), mo = "B_ACHRMB_XYLS", ab = "MEM",
|
||||||
host = "dogs", breakpoint_type = "animal", reference_data = my_bp)
|
host = "dogs", breakpoint_type = "animal", reference_data = my_bp)
|
||||||
)
|
)
|
||||||
expect_equal(as.character(result), "R")
|
expect_equal(as.character(result), "R")
|
||||||
|
|||||||
Reference in New Issue
Block a user