diff --git a/DESCRIPTION b/DESCRIPTION index 621ef0f22..127806a41 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 3.0.0.9004 -Date: 2025-06-13 +Version: 3.0.0.9007 +Date: 2025-07-17 Title: Antimicrobial Resistance Data Analysis Description: Functions to simplify and standardise antimicrobial resistance (AMR) data analysis and to work with microbial and antimicrobial properties by diff --git a/NEWS.md b/NEWS.md index 3b3c72638..be8df5fd3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,15 +1,18 @@ -# AMR 3.0.0.9004 +# AMR 3.0.0.9007 + +This is primarily a bugfix release, though we added one nice feature too. ### New * Integration with the **tidymodels** framework to allow seamless use of MIC and SIR data in modelling pipelines via `recipes` - `step_mic_log2()` to transform `` columns with log2, and `step_sir_numeric()` to convert `` columns to numeric - - `tidyselect` helpers: `all_mic()`, `all_mic_predictors()`, `all_sir()`, `all_sir_predictors()` - - Enables seamless use of MIC and SIR data in modelling pipelines via `recipes` + - New `tidyselect` helpers: `all_mic()`, `all_mic_predictors()`, `all_sir()`, `all_sir_predictors()` ### Changed * Fixed a bug in `antibiogram()` for when no antimicrobials are set +* Fixed a bug in `antibiogram()` to allow column names containing the `+` character (#222) * Fixed a bug in `as.ab()` for antimicrobial codes with a number in it if they are preceded by a space * Fixed a bug in `eucast_rules()` for using specific custom rules +* Fixed a bug in `as.sir()` to allow any tidyselect language (#220) * Fixed some specific Dutch translations for antimicrobials * Updated `random_mic()` and `random_disk()` to set skewedness of the distribution and allow multiple microorganisms diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R index 2aa98cb43..a9c816638 100644 --- a/R/aa_helper_functions.R +++ b/R/aa_helper_functions.R @@ -63,31 +63,6 @@ pm_left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { merged } -# support where() like tidyverse (this function will also be used when running `antibiogram()`): -where <- function(fn) { - # based on https://github.com/nathaneastwood/poorman/blob/52eb6947e0b4430cd588976ed8820013eddf955f/R/where.R#L17-L32 - if (!is.function(fn)) { - stop_("`", deparse(substitute(fn)), "()` is not a valid predicate function.") - } - df <- pm_select_env$.data - cols <- pm_select_env$get_colnames() - if (is.null(df)) { - df <- get_current_data("where", call = FALSE) - cols <- colnames(df) - } - preds <- unlist(lapply( - df, - function(x, fn) { - do.call("fn", list(x)) - }, - fn - )) - if (!is.logical(preds)) stop_("`where()` must be used with functions that return `TRUE` or `FALSE`.") - data_cols <- cols - cols <- data_cols[preds] - which(data_cols %in% cols) -} - # copied and slightly rewritten from {poorman} under permissive license (2021-10-15) # https://github.com/nathaneastwood/poorman, MIT licensed, Nathan Eastwood, 2020 case_when_AMR <- function(...) { @@ -1636,6 +1611,36 @@ get_n_cores <- function(max_cores = Inf) { n_cores } +# Support `where()` if tidyselect not installed ---- +if (!is.null(import_fn("where", "tidyselect", error_on_fail = FALSE))) { + # tidyselect::where() exists, load the namespace to make `where()`s work across the package in default arguments + loadNamespace("tidyselect") +} else { + where <- function(fn) { + # based on https://github.com/nathaneastwood/poorman/blob/52eb6947e0b4430cd588976ed8820013eddf955f/R/where.R#L17-L32 + if (!is.function(fn)) { + stop_("`", deparse(substitute(fn)), "()` is not a valid predicate function.") + } + df <- pm_select_env$.data + cols <- pm_select_env$get_colnames() + if (is.null(df)) { + df <- get_current_data("where", call = FALSE) + cols <- colnames(df) + } + preds <- unlist(lapply( + df, + function(x, fn) { + do.call("fn", list(x)) + }, + fn + )) + if (!is.logical(preds)) stop_("`where()` must be used with functions that return `TRUE` or `FALSE`.") + data_cols <- cols + cols <- data_cols[preds] + which(data_cols %in% cols) + } +} + # Faster data.table implementations ---- match <- function(x, table, ...) { @@ -1655,52 +1660,6 @@ match <- function(x, table, ...) { } } -# nolint start - -# Register S3 methods ---- -# copied from vctrs::s3_register by their permission: -# https://github.com/r-lib/vctrs/blob/05968ce8e669f73213e3e894b5f4424af4f46316/R/register-s3.R -s3_register <- function(generic, class, method = NULL) { - stopifnot(is.character(generic), length(generic) == 1) - stopifnot(is.character(class), length(class) == 1) - pieces <- strsplit(generic, "::")[[1]] - stopifnot(length(pieces) == 2) - package <- pieces[[1]] - generic <- pieces[[2]] - caller <- parent.frame() - get_method_env <- function() { - top <- topenv(caller) - if (isNamespace(top)) { - asNamespace(environmentName(top)) - } else { - caller - } - } - get_method <- function(method, env) { - if (is.null(method)) { - get(paste0(generic, ".", class), envir = get_method_env()) - } else { - method - } - } - method_fn <- get_method(method) - stopifnot(is.function(method_fn)) - setHook(packageEvent(package, "onLoad"), function(...) { - ns <- asNamespace(package) - method_fn <- get_method(method) - registerS3method(generic, class, method_fn, envir = ns) - }) - if (!isNamespaceLoaded(package)) { - return(invisible()) - } - envir <- asNamespace(package) - if (exists(generic, envir)) { - registerS3method(generic, class, method_fn, envir = envir) - } - invisible() -} - - # Support old R versions ---- # these functions were not available in previous versions of R # see here for the full list: https://github.com/r-lib/backports diff --git a/R/aa_helper_pm_functions.R b/R/aa_helper_pm_functions.R index 6d822b18d..93a9a2236 100755 --- a/R/aa_helper_pm_functions.R +++ b/R/aa_helper_pm_functions.R @@ -952,7 +952,19 @@ pm_select_env$get_nrow <- function() nrow(pm_select_env$.data) pm_select_env$get_ncol <- function() ncol(pm_select_env$.data) pm_select <- function(.data, ...) { - col_pos <- pm_select_positions(.data, ..., .group_pos = TRUE) + # col_pos <- pm_select_positions(.data, ..., .group_pos = TRUE), + col_pos <- tryCatch(pm_select_positions(.data, ..., .group_pos = TRUE), error = function(e) NULL) + if (is.null(col_pos)) { + # try with tidyverse + select_dplyr <- import_fn("select", "dplyr", error_on_fail = FALSE) + if (!is.null(select_dplyr)) { + col_pos <- which(colnames(.data) %in% colnames(select_dplyr(.data, ...))) + } else { + # this will throw an error as it did, but dplyr is not available, so no other option + col_pos <- pm_select_positions(.data, ..., .group_pos = TRUE) + } + } + map_names <- names(col_pos) map_names_length <- nchar(map_names) if (any(map_names_length == 0L)) { diff --git a/R/ab.R b/R/ab.R index 07102f12f..0660533f2 100755 --- a/R/ab.R +++ b/R/ab.R @@ -184,7 +184,8 @@ as.ab <- function(x, flag_multiple_results = TRUE, language = get_AMR_locale(), x_new[known_codes_cid] <- AMR_env$AB_lookup$ab[match(x[known_codes_cid], AMR_env$AB_lookup$cid)] previously_coerced <- x %in% AMR_env$ab_previously_coerced$x x_new[previously_coerced & is.na(x_new)] <- AMR_env$ab_previously_coerced$ab[match(x[is.na(x_new) & x %in% AMR_env$ab_previously_coerced$x], AMR_env$ab_previously_coerced$x)] - if (any(previously_coerced) && isTRUE(info) && message_not_thrown_before("as.ab", entire_session = TRUE)) { + previously_coerced_mention <- x %in% AMR_env$ab_previously_coerced$x & !x %in% AMR_env$AB_lookup$ab & !x %in% AMR_env$AB_lookup$generalised_name + if (any(previously_coerced_mention) && isTRUE(info) && message_not_thrown_before("as.ab", entire_session = TRUE)) { message_( "Returning previously coerced ", ifelse(length(unique(which(x[which(previously_coerced)] %in% x_bak_clean))) > 1, "value for an antimicrobial", "values for various antimicrobials"), diff --git a/R/antibiogram.R b/R/antibiogram.R index 42eef9f4b..fa099bb00 100755 --- a/R/antibiogram.R +++ b/R/antibiogram.R @@ -576,6 +576,15 @@ antibiogram.default <- function(x, } antimicrobials <- unlist(antimicrobials) } else { + existing_ab_combined_cols <- ab_trycatch[ab_trycatch %like% "[+]" & ab_trycatch %in% colnames(x)] + if (length(existing_ab_combined_cols) > 0 && !is.null(ab_transform)) { + ab_transform <- NULL + warning_( + "Detected column name(s) containing the '+' character, which conflicts with the expected syntax in `antibiogram()`: the '+' is used to combine separate antimicrobial agent columns (e.g., \"AMP+GEN\").\n\n", + "To avoid incorrectly guessing which antimicrobials this represents, `ab_transform` was automatically set to `NULL`.\n\n", + "If this is unintended, please rename the column(s) to avoid using '+' in the name, or set `ab_transform = NULL` explicitly to suppress this message." + ) + } antimicrobials <- ab_trycatch } diff --git a/R/mean_amr_distance.R b/R/mean_amr_distance.R index 96b9c99cd..08611a63a 100755 --- a/R/mean_amr_distance.R +++ b/R/mean_amr_distance.R @@ -31,7 +31,7 @@ #' #' Calculates a normalised mean for antimicrobial resistance between multiple observations, to help to identify similar isolates without comparing antibiograms by hand. #' @param x A vector of class [sir][as.sir()], [mic][as.mic()] or [disk][as.disk()], or a [data.frame] containing columns of any of these classes. -#' @param ... Variables to select. Supports [tidyselect language][tidyselect::language] (such as `column1:column4` and `where(is.mic)`), and can thus also be [antimicrobial selectors][amr_selector()]. +#' @param ... Variables to select. Supports [tidyselect language][tidyselect::starts_with()] such as `where(is.mic)`, `starts_with(...)`, or `column1:column4`, and can thus also be [antimicrobial selectors][amr_selector()]. #' @param combine_SI A [logical] to indicate whether all values of S, SDD, and I must be merged into one, so the input only consists of S+I vs. R (susceptible vs. resistant) - the default is `TRUE`. #' @details The mean AMR distance is effectively [the Z-score](https://en.wikipedia.org/wiki/Standard_score); a normalised numeric value to compare AMR test results which can help to identify similar isolates, without comparing antibiograms by hand. #' diff --git a/R/sir.R b/R/sir.R index a0647a4b4..a0ad525bd 100755 --- a/R/sir.R +++ b/R/sir.R @@ -69,7 +69,9 @@ #' @param reference_data A [data.frame] to be used for interpretation, which defaults to the [clinical_breakpoints] data set. Changing this argument allows for using own interpretation guidelines. This argument must contain a data set that is equal in structure to the [clinical_breakpoints] data set (same column names and column types). Please note that the `guideline` argument will be ignored when `reference_data` is manually set. #' @param threshold Maximum fraction of invalid antimicrobial interpretations of `x`, see *Examples*. #' @param conserve_capped_values Deprecated, use `capped_mic_handling` instead. -#' @param ... For using on a [data.frame]: names of columns to apply [as.sir()] on (supports tidy selection such as `column1:column4`). Otherwise: arguments passed on to methods. +#' @param ... For using on a [data.frame]: selection of columns to apply `as.sir()` to. Supports [tidyselect language][tidyselect::starts_with()] such as `where(is.mic)`, `starts_with(...)`, or `column1:column4`, and can thus also be [antimicrobial selectors][amr_selector()] such as `as.sir(df, penicillins())`. +#' +#' Otherwise: arguments passed on to methods. #' @details #' *Note: The clinical breakpoints in this package were validated through, and imported from, [WHONET](https://whonet.org). The public use of this `AMR` package has been endorsed by both CLSI and EUCAST. See [clinical_breakpoints] for more information.* #' @@ -225,9 +227,12 @@ #' df_wide %>% mutate_if(is.mic, as.sir) #' df_wide %>% mutate_if(function(x) is.mic(x) | is.disk(x), as.sir) #' df_wide %>% mutate(across(where(is.mic), as.sir)) +#' #' df_wide %>% mutate_at(vars(amoxicillin:tobra), as.sir) #' df_wide %>% mutate(across(amoxicillin:tobra, as.sir)) #' +#' df_wide %>% mutate(across(aminopenicillins(), as.sir)) +#' #' # approaches that all work with additional arguments: #' df_long %>% #' # given a certain data type, e.g. MIC values @@ -722,8 +727,17 @@ as.sir.data.frame <- function(x, meet_criteria(info, allow_class = "logical", has_length = 1) meet_criteria(parallel, allow_class = "logical", has_length = 1) meet_criteria(max_cores, allow_class = c("numeric", "integer"), has_length = 1) - x.bak <- x + + if (tryCatch(length(list(...)) > 0, error = function(e) TRUE)) { + sel <- colnames(pm_select(x, ...)) + } else { + sel <- colnames(x) + } + if (!is.null(col_mo)) { + sel <- sel[sel != col_mo] + } + for (i in seq_len(ncol(x))) { # don't keep factors, overwriting them is hard if (is.factor(x[, i, drop = TRUE])) { @@ -803,15 +817,6 @@ as.sir.data.frame <- function(x, } i <- 0 - if (tryCatch(length(list(...)) > 0, error = function(e) TRUE)) { - sel <- colnames(pm_select(x, ...)) - } else { - sel <- colnames(x) - } - if (!is.null(col_mo)) { - sel <- sel[sel != col_mo] - } - ab_cols <- colnames(x)[vapply(FUN.VALUE = logical(1), x, function(y) { i <<- i + 1 check <- is.mic(y) | is.disk(y) diff --git a/R/sysdata.rda b/R/sysdata.rda index 0b9dce541..2ab938899 100755 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/R/vctrs.R b/R/vctrs.R index 35a940621..904f1e0db 100755 --- a/R/vctrs.R +++ b/R/vctrs.R @@ -30,7 +30,6 @@ # These are all S3 implementations for the vctrs package, # that is used internally by tidyverse packages such as dplyr. # They are to convert AMR-specific classes to bare characters and integers. -# All of them will be exported using s3_register() in R/zzz.R when loading the package. # see https://github.com/tidyverse/dplyr/issues/5955 why this is required diff --git a/data-raw/_reproduction_scripts/reproduction_of_poorman.R b/data-raw/_reproduction_scripts/reproduction_of_poorman.R index 96660fadc..ec644ab7c 100644 --- a/data-raw/_reproduction_scripts/reproduction_of_poorman.R +++ b/data-raw/_reproduction_scripts/reproduction_of_poorman.R @@ -108,3 +108,18 @@ writeLines(contents, "R/aa_helper_pm_functions.R") # note: pm_left_join() will be overwritten by aaa_helper_functions.R, which contains a faster implementation # replace `res <- as.data.frame(res)` with `res <- as.data.frame(res, stringsAsFactors = FALSE)` + +# after running, pm_select must be altered. The line: +# col_pos <- pm_select_positions(.data, ..., .group_pos = TRUE) +# ... must be replaced with this to support tidyselect functionality such as `starts_with()`: +# col_pos <- tryCatch(pm_select_positions(.data, ..., .group_pos = TRUE), error = function(e) NULL) +# if (is.null(col_pos)) { +# # try with tidyverse +# select_dplyr <- import_fn("select", "dplyr", error_on_fail = FALSE) +# if (!is.null(select_dplyr)) { +# col_pos <- which(colnames(.data) %in% colnames(select_dplyr(.data, ...))) +# } else { +# # this will throw an error as it did, but dplyr is not available, so no other option +# col_pos <- pm_select_positions(.data, ..., .group_pos = TRUE) +# } +# } diff --git a/data/antibiotics.rda b/data/antibiotics.rda index 93ffa1f8c..4ecc20316 100644 Binary files a/data/antibiotics.rda and b/data/antibiotics.rda differ diff --git a/index.md b/index.md index ca584ebc0..707ff18f7 100644 --- a/index.md +++ b/index.md @@ -27,12 +27,12 @@

-https://amr-for-r.org +amr-for-r.org

-https://doi.org/10.18637/jss.v104.i03 +doi.org/10.18637/jss.v104.i03

@@ -321,9 +321,9 @@ example_isolates %>% #> # A tibble: 3 × 5 #> ward GEN_total_R GEN_conf_int TOB_total_R TOB_conf_int #> -#> 1 Clinical 0.2289362 0.205-0.254 0.3147503 0.284-0.347 -#> 2 ICU 0.2902655 0.253-0.33 0.4004739 0.353-0.449 -#> 3 Outpatient 0.2 0.131-0.285 0.3676471 0.254-0.493 +#> 1 Clinical 0.229 0.205-0.254 0.315 0.284-0.347 +#> 2 ICU 0.290 0.253-0.33 0.400 0.353-0.449 +#> 3 Outpatient 0.2 0.131-0.285 0.368 0.254-0.493 ``` Or use [antimicrobial @@ -351,33 +351,33 @@ out <- example_isolates %>% #> "Outpatient" (minimum = 30). out #> # A tibble: 3 × 6 -#> ward GEN TOB AMK KAN COL -#> -#> 1 Clinical 0.2289362 0.3147503 0.6258993 1 0.7802956 -#> 2 ICU 0.2902655 0.4004739 0.6624473 1 0.8574144 -#> 3 Outpatient 0.2 0.3676471 0.6052632 NA 0.8888889 +#> ward GEN TOB AMK KAN COL +#> +#> 1 Clinical 0.229 0.315 0.626 1 0.780 +#> 2 ICU 0.290 0.400 0.662 1 0.857 +#> 3 Outpatient 0.2 0.368 0.605 NA 0.889 ``` ``` r # transform the antibiotic columns to names: out %>% set_ab_names() #> # A tibble: 3 × 6 -#> ward gentamicin tobramycin amikacin kanamycin colistin -#> -#> 1 Clinical 0.2289362 0.3147503 0.6258993 1 0.7802956 -#> 2 ICU 0.2902655 0.4004739 0.6624473 1 0.8574144 -#> 3 Outpatient 0.2 0.3676471 0.6052632 NA 0.8888889 +#> ward gentamicin tobramycin amikacin kanamycin colistin +#> +#> 1 Clinical 0.229 0.315 0.626 1 0.780 +#> 2 ICU 0.290 0.400 0.662 1 0.857 +#> 3 Outpatient 0.2 0.368 0.605 NA 0.889 ``` ``` r # transform the antibiotic column to ATC codes: out %>% set_ab_names(property = "atc") #> # A tibble: 3 × 6 -#> ward J01GB03 J01GB01 J01GB06 J01GB04 J01XB01 -#> -#> 1 Clinical 0.2289362 0.3147503 0.6258993 1 0.7802956 -#> 2 ICU 0.2902655 0.4004739 0.6624473 1 0.8574144 -#> 3 Outpatient 0.2 0.3676471 0.6052632 NA 0.8888889 +#> ward J01GB03 J01GB01 J01GB06 J01GB04 J01XB01 +#> +#> 1 Clinical 0.229 0.315 0.626 1 0.780 +#> 2 ICU 0.290 0.400 0.662 1 0.857 +#> 3 Outpatient 0.2 0.368 0.605 NA 0.889 ``` ## What else can you do with this package? diff --git a/man/as.sir.Rd b/man/as.sir.Rd index 9629e5b87..5a20480b9 100644 --- a/man/as.sir.Rd +++ b/man/as.sir.Rd @@ -75,7 +75,9 @@ sir_interpretation_history(clean = FALSE) \arguments{ \item{x}{Vector of values (for class \code{\link{mic}}: MIC values in mg/L, for class \code{\link{disk}}: a disk diffusion radius in millimetres).} -\item{...}{For using on a \link{data.frame}: names of columns to apply \code{\link[=as.sir]{as.sir()}} on (supports tidy selection such as \code{column1:column4}). Otherwise: arguments passed on to methods.} +\item{...}{For using on a \link{data.frame}: selection of columns to apply \code{as.sir()} to. Supports \link[tidyselect:starts_with]{tidyselect language} such as \code{where(is.mic)}, \code{starts_with(...)}, or \code{column1:column4}, and can thus also be \link[=amr_selector]{antimicrobial selectors} such as \code{as.sir(df, penicillins())}. + +Otherwise: arguments passed on to methods.} \item{threshold}{Maximum fraction of invalid antimicrobial interpretations of \code{x}, see \emph{Examples}.} @@ -314,9 +316,12 @@ if (require("dplyr")) { df_wide \%>\% mutate_if(is.mic, as.sir) df_wide \%>\% mutate_if(function(x) is.mic(x) | is.disk(x), as.sir) df_wide \%>\% mutate(across(where(is.mic), as.sir)) + df_wide \%>\% mutate_at(vars(amoxicillin:tobra), as.sir) df_wide \%>\% mutate(across(amoxicillin:tobra, as.sir)) + df_wide \%>\% mutate(across(aminopenicillins(), as.sir)) + # approaches that all work with additional arguments: df_long \%>\% # given a certain data type, e.g. MIC values diff --git a/man/mean_amr_distance.Rd b/man/mean_amr_distance.Rd index 7e2665b1b..4d86f34a8 100644 --- a/man/mean_amr_distance.Rd +++ b/man/mean_amr_distance.Rd @@ -18,7 +18,7 @@ amr_distance_from_row(amr_distance, row) \arguments{ \item{x}{A vector of class \link[=as.sir]{sir}, \link[=as.mic]{mic} or \link[=as.disk]{disk}, or a \link{data.frame} containing columns of any of these classes.} -\item{...}{Variables to select. Supports \link[tidyselect:language]{tidyselect language} (such as \code{column1:column4} and \code{where(is.mic)}), and can thus also be \link[=amr_selector]{antimicrobial selectors}.} +\item{...}{Variables to select. Supports \link[tidyselect:starts_with]{tidyselect language} such as \code{where(is.mic)}, \code{starts_with(...)}, or \code{column1:column4}, and can thus also be \link[=amr_selector]{antimicrobial selectors}.} \item{combine_SI}{A \link{logical} to indicate whether all values of S, SDD, and I must be merged into one, so the input only consists of S+I vs. R (susceptible vs. resistant) - the default is \code{TRUE}.} diff --git a/tests/testthat/test-zzz.R b/tests/testthat/test-zzz.R index ec6834cdd..a26ae3a43 100644 --- a/tests/testthat/test-zzz.R +++ b/tests/testthat/test-zzz.R @@ -63,10 +63,12 @@ test_that("test-zzz.R", { "progress_bar" = "progress", "read_html" = "xml2", "right_join" = "dplyr", + "select" = "dplyr", "semi_join" = "dplyr", "showQuestion" = "rstudioapi", "symbol" = "cli", "tibble" = "tibble", + "where" = "tidyselect", "write.xlsx" = "openxlsx" )