1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-20 00:33:26 +02:00

(v3.0.0.9007) allow any tidyselect language in as.sir()

This commit is contained in:
2025-07-17 14:29:35 +02:00
parent 0138e33ce9
commit 8dab0a3730
16 changed files with 123 additions and 113 deletions

View File

@ -1,6 +1,6 @@
Package: AMR
Version: 3.0.0.9004
Date: 2025-06-13
Version: 3.0.0.9007
Date: 2025-07-17
Title: Antimicrobial Resistance Data Analysis
Description: Functions to simplify and standardise antimicrobial resistance (AMR)
data analysis and to work with microbial and antimicrobial properties by

View File

@ -1,15 +1,18 @@
# AMR 3.0.0.9004
# AMR 3.0.0.9007
This is primarily a bugfix release, though we added one nice feature too.
### New
* Integration with the **tidymodels** framework to allow seamless use of MIC and SIR data in modelling pipelines via `recipes`
- `step_mic_log2()` to transform `<mic>` columns with log2, and `step_sir_numeric()` to convert `<sir>` columns to numeric
- `tidyselect` helpers: `all_mic()`, `all_mic_predictors()`, `all_sir()`, `all_sir_predictors()`
- Enables seamless use of MIC and SIR data in modelling pipelines via `recipes`
- New `tidyselect` helpers: `all_mic()`, `all_mic_predictors()`, `all_sir()`, `all_sir_predictors()`
### Changed
* Fixed a bug in `antibiogram()` for when no antimicrobials are set
* Fixed a bug in `antibiogram()` to allow column names containing the `+` character (#222)
* Fixed a bug in `as.ab()` for antimicrobial codes with a number in it if they are preceded by a space
* Fixed a bug in `eucast_rules()` for using specific custom rules
* Fixed a bug in `as.sir()` to allow any tidyselect language (#220)
* Fixed some specific Dutch translations for antimicrobials
* Updated `random_mic()` and `random_disk()` to set skewedness of the distribution and allow multiple microorganisms

View File

@ -63,31 +63,6 @@ pm_left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) {
merged
}
# support where() like tidyverse (this function will also be used when running `antibiogram()`):
where <- function(fn) {
# based on https://github.com/nathaneastwood/poorman/blob/52eb6947e0b4430cd588976ed8820013eddf955f/R/where.R#L17-L32
if (!is.function(fn)) {
stop_("`", deparse(substitute(fn)), "()` is not a valid predicate function.")
}
df <- pm_select_env$.data
cols <- pm_select_env$get_colnames()
if (is.null(df)) {
df <- get_current_data("where", call = FALSE)
cols <- colnames(df)
}
preds <- unlist(lapply(
df,
function(x, fn) {
do.call("fn", list(x))
},
fn
))
if (!is.logical(preds)) stop_("`where()` must be used with functions that return `TRUE` or `FALSE`.")
data_cols <- cols
cols <- data_cols[preds]
which(data_cols %in% cols)
}
# copied and slightly rewritten from {poorman} under permissive license (2021-10-15)
# https://github.com/nathaneastwood/poorman, MIT licensed, Nathan Eastwood, 2020
case_when_AMR <- function(...) {
@ -1636,6 +1611,36 @@ get_n_cores <- function(max_cores = Inf) {
n_cores
}
# Support `where()` if tidyselect not installed ----
if (!is.null(import_fn("where", "tidyselect", error_on_fail = FALSE))) {
# tidyselect::where() exists, load the namespace to make `where()`s work across the package in default arguments
loadNamespace("tidyselect")
} else {
where <- function(fn) {
# based on https://github.com/nathaneastwood/poorman/blob/52eb6947e0b4430cd588976ed8820013eddf955f/R/where.R#L17-L32
if (!is.function(fn)) {
stop_("`", deparse(substitute(fn)), "()` is not a valid predicate function.")
}
df <- pm_select_env$.data
cols <- pm_select_env$get_colnames()
if (is.null(df)) {
df <- get_current_data("where", call = FALSE)
cols <- colnames(df)
}
preds <- unlist(lapply(
df,
function(x, fn) {
do.call("fn", list(x))
},
fn
))
if (!is.logical(preds)) stop_("`where()` must be used with functions that return `TRUE` or `FALSE`.")
data_cols <- cols
cols <- data_cols[preds]
which(data_cols %in% cols)
}
}
# Faster data.table implementations ----
match <- function(x, table, ...) {
@ -1655,52 +1660,6 @@ match <- function(x, table, ...) {
}
}
# nolint start
# Register S3 methods ----
# copied from vctrs::s3_register by their permission:
# https://github.com/r-lib/vctrs/blob/05968ce8e669f73213e3e894b5f4424af4f46316/R/register-s3.R
s3_register <- function(generic, class, method = NULL) {
stopifnot(is.character(generic), length(generic) == 1)
stopifnot(is.character(class), length(class) == 1)
pieces <- strsplit(generic, "::")[[1]]
stopifnot(length(pieces) == 2)
package <- pieces[[1]]
generic <- pieces[[2]]
caller <- parent.frame()
get_method_env <- function() {
top <- topenv(caller)
if (isNamespace(top)) {
asNamespace(environmentName(top))
} else {
caller
}
}
get_method <- function(method, env) {
if (is.null(method)) {
get(paste0(generic, ".", class), envir = get_method_env())
} else {
method
}
}
method_fn <- get_method(method)
stopifnot(is.function(method_fn))
setHook(packageEvent(package, "onLoad"), function(...) {
ns <- asNamespace(package)
method_fn <- get_method(method)
registerS3method(generic, class, method_fn, envir = ns)
})
if (!isNamespaceLoaded(package)) {
return(invisible())
}
envir <- asNamespace(package)
if (exists(generic, envir)) {
registerS3method(generic, class, method_fn, envir = envir)
}
invisible()
}
# Support old R versions ----
# these functions were not available in previous versions of R
# see here for the full list: https://github.com/r-lib/backports

View File

@ -952,7 +952,19 @@ pm_select_env$get_nrow <- function() nrow(pm_select_env$.data)
pm_select_env$get_ncol <- function() ncol(pm_select_env$.data)
pm_select <- function(.data, ...) {
col_pos <- pm_select_positions(.data, ..., .group_pos = TRUE)
# col_pos <- pm_select_positions(.data, ..., .group_pos = TRUE),
col_pos <- tryCatch(pm_select_positions(.data, ..., .group_pos = TRUE), error = function(e) NULL)
if (is.null(col_pos)) {
# try with tidyverse
select_dplyr <- import_fn("select", "dplyr", error_on_fail = FALSE)
if (!is.null(select_dplyr)) {
col_pos <- which(colnames(.data) %in% colnames(select_dplyr(.data, ...)))
} else {
# this will throw an error as it did, but dplyr is not available, so no other option
col_pos <- pm_select_positions(.data, ..., .group_pos = TRUE)
}
}
map_names <- names(col_pos)
map_names_length <- nchar(map_names)
if (any(map_names_length == 0L)) {

3
R/ab.R
View File

@ -184,7 +184,8 @@ as.ab <- function(x, flag_multiple_results = TRUE, language = get_AMR_locale(),
x_new[known_codes_cid] <- AMR_env$AB_lookup$ab[match(x[known_codes_cid], AMR_env$AB_lookup$cid)]
previously_coerced <- x %in% AMR_env$ab_previously_coerced$x
x_new[previously_coerced & is.na(x_new)] <- AMR_env$ab_previously_coerced$ab[match(x[is.na(x_new) & x %in% AMR_env$ab_previously_coerced$x], AMR_env$ab_previously_coerced$x)]
if (any(previously_coerced) && isTRUE(info) && message_not_thrown_before("as.ab", entire_session = TRUE)) {
previously_coerced_mention <- x %in% AMR_env$ab_previously_coerced$x & !x %in% AMR_env$AB_lookup$ab & !x %in% AMR_env$AB_lookup$generalised_name
if (any(previously_coerced_mention) && isTRUE(info) && message_not_thrown_before("as.ab", entire_session = TRUE)) {
message_(
"Returning previously coerced ",
ifelse(length(unique(which(x[which(previously_coerced)] %in% x_bak_clean))) > 1, "value for an antimicrobial", "values for various antimicrobials"),

View File

@ -576,6 +576,15 @@ antibiogram.default <- function(x,
}
antimicrobials <- unlist(antimicrobials)
} else {
existing_ab_combined_cols <- ab_trycatch[ab_trycatch %like% "[+]" & ab_trycatch %in% colnames(x)]
if (length(existing_ab_combined_cols) > 0 && !is.null(ab_transform)) {
ab_transform <- NULL
warning_(
"Detected column name(s) containing the '+' character, which conflicts with the expected syntax in `antibiogram()`: the '+' is used to combine separate antimicrobial agent columns (e.g., \"AMP+GEN\").\n\n",
"To avoid incorrectly guessing which antimicrobials this represents, `ab_transform` was automatically set to `NULL`.\n\n",
"If this is unintended, please rename the column(s) to avoid using '+' in the name, or set `ab_transform = NULL` explicitly to suppress this message."
)
}
antimicrobials <- ab_trycatch
}

View File

@ -31,7 +31,7 @@
#'
#' Calculates a normalised mean for antimicrobial resistance between multiple observations, to help to identify similar isolates without comparing antibiograms by hand.
#' @param x A vector of class [sir][as.sir()], [mic][as.mic()] or [disk][as.disk()], or a [data.frame] containing columns of any of these classes.
#' @param ... Variables to select. Supports [tidyselect language][tidyselect::language] (such as `column1:column4` and `where(is.mic)`), and can thus also be [antimicrobial selectors][amr_selector()].
#' @param ... Variables to select. Supports [tidyselect language][tidyselect::starts_with()] such as `where(is.mic)`, `starts_with(...)`, or `column1:column4`, and can thus also be [antimicrobial selectors][amr_selector()].
#' @param combine_SI A [logical] to indicate whether all values of S, SDD, and I must be merged into one, so the input only consists of S+I vs. R (susceptible vs. resistant) - the default is `TRUE`.
#' @details The mean AMR distance is effectively [the Z-score](https://en.wikipedia.org/wiki/Standard_score); a normalised numeric value to compare AMR test results which can help to identify similar isolates, without comparing antibiograms by hand.
#'

27
R/sir.R
View File

@ -69,7 +69,9 @@
#' @param reference_data A [data.frame] to be used for interpretation, which defaults to the [clinical_breakpoints] data set. Changing this argument allows for using own interpretation guidelines. This argument must contain a data set that is equal in structure to the [clinical_breakpoints] data set (same column names and column types). Please note that the `guideline` argument will be ignored when `reference_data` is manually set.
#' @param threshold Maximum fraction of invalid antimicrobial interpretations of `x`, see *Examples*.
#' @param conserve_capped_values Deprecated, use `capped_mic_handling` instead.
#' @param ... For using on a [data.frame]: names of columns to apply [as.sir()] on (supports tidy selection such as `column1:column4`). Otherwise: arguments passed on to methods.
#' @param ... For using on a [data.frame]: selection of columns to apply `as.sir()` to. Supports [tidyselect language][tidyselect::starts_with()] such as `where(is.mic)`, `starts_with(...)`, or `column1:column4`, and can thus also be [antimicrobial selectors][amr_selector()] such as `as.sir(df, penicillins())`.
#'
#' Otherwise: arguments passed on to methods.
#' @details
#' *Note: The clinical breakpoints in this package were validated through, and imported from, [WHONET](https://whonet.org). The public use of this `AMR` package has been endorsed by both CLSI and EUCAST. See [clinical_breakpoints] for more information.*
#'
@ -225,9 +227,12 @@
#' df_wide %>% mutate_if(is.mic, as.sir)
#' df_wide %>% mutate_if(function(x) is.mic(x) | is.disk(x), as.sir)
#' df_wide %>% mutate(across(where(is.mic), as.sir))
#'
#' df_wide %>% mutate_at(vars(amoxicillin:tobra), as.sir)
#' df_wide %>% mutate(across(amoxicillin:tobra, as.sir))
#'
#' df_wide %>% mutate(across(aminopenicillins(), as.sir))
#'
#' # approaches that all work with additional arguments:
#' df_long %>%
#' # given a certain data type, e.g. MIC values
@ -722,8 +727,17 @@ as.sir.data.frame <- function(x,
meet_criteria(info, allow_class = "logical", has_length = 1)
meet_criteria(parallel, allow_class = "logical", has_length = 1)
meet_criteria(max_cores, allow_class = c("numeric", "integer"), has_length = 1)
x.bak <- x
if (tryCatch(length(list(...)) > 0, error = function(e) TRUE)) {
sel <- colnames(pm_select(x, ...))
} else {
sel <- colnames(x)
}
if (!is.null(col_mo)) {
sel <- sel[sel != col_mo]
}
for (i in seq_len(ncol(x))) {
# don't keep factors, overwriting them is hard
if (is.factor(x[, i, drop = TRUE])) {
@ -803,15 +817,6 @@ as.sir.data.frame <- function(x,
}
i <- 0
if (tryCatch(length(list(...)) > 0, error = function(e) TRUE)) {
sel <- colnames(pm_select(x, ...))
} else {
sel <- colnames(x)
}
if (!is.null(col_mo)) {
sel <- sel[sel != col_mo]
}
ab_cols <- colnames(x)[vapply(FUN.VALUE = logical(1), x, function(y) {
i <<- i + 1
check <- is.mic(y) | is.disk(y)

Binary file not shown.

View File

@ -30,7 +30,6 @@
# These are all S3 implementations for the vctrs package,
# that is used internally by tidyverse packages such as dplyr.
# They are to convert AMR-specific classes to bare characters and integers.
# All of them will be exported using s3_register() in R/zzz.R when loading the package.
# see https://github.com/tidyverse/dplyr/issues/5955 why this is required

View File

@ -108,3 +108,18 @@ writeLines(contents, "R/aa_helper_pm_functions.R")
# note: pm_left_join() will be overwritten by aaa_helper_functions.R, which contains a faster implementation
# replace `res <- as.data.frame(res)` with `res <- as.data.frame(res, stringsAsFactors = FALSE)`
# after running, pm_select must be altered. The line:
# col_pos <- pm_select_positions(.data, ..., .group_pos = TRUE)
# ... must be replaced with this to support tidyselect functionality such as `starts_with()`:
# col_pos <- tryCatch(pm_select_positions(.data, ..., .group_pos = TRUE), error = function(e) NULL)
# if (is.null(col_pos)) {
# # try with tidyverse
# select_dplyr <- import_fn("select", "dplyr", error_on_fail = FALSE)
# if (!is.null(select_dplyr)) {
# col_pos <- which(colnames(.data) %in% colnames(select_dplyr(.data, ...)))
# } else {
# # this will throw an error as it did, but dplyr is not available, so no other option
# col_pos <- pm_select_positions(.data, ..., .group_pos = TRUE)
# }
# }

Binary file not shown.

View File

@ -27,12 +27,12 @@
<p style="text-align:left; width: 50%;">
<small><a href="https://amr-for-r.org/">https://amr-for-r.org</a></small>
<small><a href="https://amr-for-r.org/">amr-for-r.org</a></small>
</p>
<p style="text-align:right; width: 50%;">
<small><a href="https://doi.org/10.18637/jss.v104.i03" target="_blank">https://doi.org/10.18637/jss.v104.i03</a></small>
<small><a href="https://doi.org/10.18637/jss.v104.i03" target="_blank">doi.org/10.18637/jss.v104.i03</a></small>
</p>
</div>
@ -321,9 +321,9 @@ example_isolates %>%
#> # A tibble: 3 × 5
#> ward GEN_total_R GEN_conf_int TOB_total_R TOB_conf_int
#> <chr> <dbl> <chr> <dbl> <chr>
#> 1 Clinical 0.2289362 0.205-0.254 0.3147503 0.284-0.347
#> 2 ICU 0.2902655 0.253-0.33 0.4004739 0.353-0.449
#> 3 Outpatient 0.2 0.131-0.285 0.3676471 0.254-0.493
#> 1 Clinical 0.229 0.205-0.254 0.315 0.284-0.347
#> 2 ICU 0.290 0.253-0.33 0.400 0.353-0.449
#> 3 Outpatient 0.2 0.131-0.285 0.368 0.254-0.493
```
Or use [antimicrobial
@ -351,33 +351,33 @@ out <- example_isolates %>%
#> "Outpatient" (minimum = 30).
out
#> # A tibble: 3 × 6
#> ward GEN TOB AMK KAN COL
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 Clinical 0.2289362 0.3147503 0.6258993 1 0.7802956
#> 2 ICU 0.2902655 0.4004739 0.6624473 1 0.8574144
#> 3 Outpatient 0.2 0.3676471 0.6052632 NA 0.8888889
#> ward GEN TOB AMK KAN COL
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 Clinical 0.229 0.315 0.626 1 0.780
#> 2 ICU 0.290 0.400 0.662 1 0.857
#> 3 Outpatient 0.2 0.368 0.605 NA 0.889
```
``` r
# transform the antibiotic columns to names:
out %>% set_ab_names()
#> # A tibble: 3 × 6
#> ward gentamicin tobramycin amikacin kanamycin colistin
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 Clinical 0.2289362 0.3147503 0.6258993 1 0.7802956
#> 2 ICU 0.2902655 0.4004739 0.6624473 1 0.8574144
#> 3 Outpatient 0.2 0.3676471 0.6052632 NA 0.8888889
#> ward gentamicin tobramycin amikacin kanamycin colistin
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 Clinical 0.229 0.315 0.626 1 0.780
#> 2 ICU 0.290 0.400 0.662 1 0.857
#> 3 Outpatient 0.2 0.368 0.605 NA 0.889
```
``` r
# transform the antibiotic column to ATC codes:
out %>% set_ab_names(property = "atc")
#> # A tibble: 3 × 6
#> ward J01GB03 J01GB01 J01GB06 J01GB04 J01XB01
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 Clinical 0.2289362 0.3147503 0.6258993 1 0.7802956
#> 2 ICU 0.2902655 0.4004739 0.6624473 1 0.8574144
#> 3 Outpatient 0.2 0.3676471 0.6052632 NA 0.8888889
#> ward J01GB03 J01GB01 J01GB06 J01GB04 J01XB01
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 Clinical 0.229 0.315 0.626 1 0.780
#> 2 ICU 0.290 0.400 0.662 1 0.857
#> 3 Outpatient 0.2 0.368 0.605 NA 0.889
```
## What else can you do with this package?

View File

@ -75,7 +75,9 @@ sir_interpretation_history(clean = FALSE)
\arguments{
\item{x}{Vector of values (for class \code{\link{mic}}: MIC values in mg/L, for class \code{\link{disk}}: a disk diffusion radius in millimetres).}
\item{...}{For using on a \link{data.frame}: names of columns to apply \code{\link[=as.sir]{as.sir()}} on (supports tidy selection such as \code{column1:column4}). Otherwise: arguments passed on to methods.}
\item{...}{For using on a \link{data.frame}: selection of columns to apply \code{as.sir()} to. Supports \link[tidyselect:starts_with]{tidyselect language} such as \code{where(is.mic)}, \code{starts_with(...)}, or \code{column1:column4}, and can thus also be \link[=amr_selector]{antimicrobial selectors} such as \code{as.sir(df, penicillins())}.
Otherwise: arguments passed on to methods.}
\item{threshold}{Maximum fraction of invalid antimicrobial interpretations of \code{x}, see \emph{Examples}.}
@ -314,9 +316,12 @@ if (require("dplyr")) {
df_wide \%>\% mutate_if(is.mic, as.sir)
df_wide \%>\% mutate_if(function(x) is.mic(x) | is.disk(x), as.sir)
df_wide \%>\% mutate(across(where(is.mic), as.sir))
df_wide \%>\% mutate_at(vars(amoxicillin:tobra), as.sir)
df_wide \%>\% mutate(across(amoxicillin:tobra, as.sir))
df_wide \%>\% mutate(across(aminopenicillins(), as.sir))
# approaches that all work with additional arguments:
df_long \%>\%
# given a certain data type, e.g. MIC values

View File

@ -18,7 +18,7 @@ amr_distance_from_row(amr_distance, row)
\arguments{
\item{x}{A vector of class \link[=as.sir]{sir}, \link[=as.mic]{mic} or \link[=as.disk]{disk}, or a \link{data.frame} containing columns of any of these classes.}
\item{...}{Variables to select. Supports \link[tidyselect:language]{tidyselect language} (such as \code{column1:column4} and \code{where(is.mic)}), and can thus also be \link[=amr_selector]{antimicrobial selectors}.}
\item{...}{Variables to select. Supports \link[tidyselect:starts_with]{tidyselect language} such as \code{where(is.mic)}, \code{starts_with(...)}, or \code{column1:column4}, and can thus also be \link[=amr_selector]{antimicrobial selectors}.}
\item{combine_SI}{A \link{logical} to indicate whether all values of S, SDD, and I must be merged into one, so the input only consists of S+I vs. R (susceptible vs. resistant) - the default is \code{TRUE}.}

View File

@ -63,10 +63,12 @@ test_that("test-zzz.R", {
"progress_bar" = "progress",
"read_html" = "xml2",
"right_join" = "dplyr",
"select" = "dplyr",
"semi_join" = "dplyr",
"showQuestion" = "rstudioapi",
"symbol" = "cli",
"tibble" = "tibble",
"where" = "tidyselect",
"write.xlsx" = "openxlsx"
)