(v3.0.0.9007) allow any tidyselect language in as.sir()

2025-12-04 20:01:58 +01:00 · 2025-07-17 14:29:35 +02:00
parent 0138e33ce9
commit 8dab0a3730
16 changed files with 123 additions and 113 deletions
--- a/4
+++ b/4
@@ -1,6 +1,6 @@
 Package: AMR
-Version: 3.0.0.9004
-Date: 2025-06-13
+Version: 3.0.0.9007
+Date: 2025-07-17
 Title: Antimicrobial Resistance Data Analysis
 Description: Functions to simplify and standardise antimicrobial resistance (AMR)
  data analysis and to work with microbial and antimicrobial properties by
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,15 +1,18 @@
-# AMR 3.0.0.9004
+# AMR 3.0.0.9007
+
+This is primarily a bugfix release, though we added one nice feature too.

 ### New
 * Integration with the **tidymodels** framework to allow seamless use of MIC and SIR data in modelling pipelines via `recipes`
  - `step_mic_log2()` to transform `<mic>` columns with log2, and `step_sir_numeric()` to convert `<sir>` columns to numeric
-  - `tidyselect` helpers: `all_mic()`, `all_mic_predictors()`, `all_sir()`, `all_sir_predictors()`
-  - Enables seamless use of MIC and SIR data in modelling pipelines via `recipes`
+  - New `tidyselect` helpers: `all_mic()`, `all_mic_predictors()`, `all_sir()`, `all_sir_predictors()`

 ### Changed
 * Fixed a bug in `antibiogram()` for when no antimicrobials are set
+* Fixed a bug in `antibiogram()` to allow column names containing the `+` character (#222)
 * Fixed a bug in `as.ab()` for antimicrobial codes with a number in it if they are preceded by a space
 * Fixed a bug in `eucast_rules()` for using specific custom rules
+* Fixed a bug in `as.sir()` to allow any tidyselect language (#220)
 * Fixed some specific Dutch translations for antimicrobials
 * Updated `random_mic()` and `random_disk()` to set skewedness of the distribution and allow multiple microorganisms

--- a/R/aa_helper_functions.R
+++ b/R/aa_helper_functions.R
@@ -63,31 +63,6 @@ pm_left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) {
  merged
 }

-# support where() like tidyverse (this function will also be used when running `antibiogram()`):
-where <- function(fn) {
-  # based on https://github.com/nathaneastwood/poorman/blob/52eb6947e0b4430cd588976ed8820013eddf955f/R/where.R#L17-L32
-  if (!is.function(fn)) {
-    stop_("`", deparse(substitute(fn)), "()` is not a valid predicate function.")
-  }
-  df <- pm_select_env$.data
-  cols <- pm_select_env$get_colnames()
-  if (is.null(df)) {
-    df <- get_current_data("where", call = FALSE)
-    cols <- colnames(df)
-  }
-  preds <- unlist(lapply(
-    df,
-    function(x, fn) {
-      do.call("fn", list(x))
-    },
-    fn
-  ))
-  if (!is.logical(preds)) stop_("`where()` must be used with functions that return `TRUE` or `FALSE`.")
-  data_cols <- cols
-  cols <- data_cols[preds]
-  which(data_cols %in% cols)
-}
-
 # copied and slightly rewritten from {poorman} under permissive license (2021-10-15)
 # https://github.com/nathaneastwood/poorman, MIT licensed, Nathan Eastwood, 2020
 case_when_AMR <- function(...) {
@@ -1636,6 +1611,36 @@ get_n_cores <- function(max_cores = Inf) {
  n_cores
 }

+# Support `where()` if tidyselect not installed ----
+if (!is.null(import_fn("where", "tidyselect", error_on_fail = FALSE))) {
+  # tidyselect::where() exists, load the namespace to make `where()`s work across the package in default arguments
+  loadNamespace("tidyselect")
+} else {
+  where <- function(fn) {
+    # based on https://github.com/nathaneastwood/poorman/blob/52eb6947e0b4430cd588976ed8820013eddf955f/R/where.R#L17-L32
+    if (!is.function(fn)) {
+      stop_("`", deparse(substitute(fn)), "()` is not a valid predicate function.")
+    }
+    df <- pm_select_env$.data
+    cols <- pm_select_env$get_colnames()
+    if (is.null(df)) {
+      df <- get_current_data("where", call = FALSE)
+      cols <- colnames(df)
+    }
+    preds <- unlist(lapply(
+      df,
+      function(x, fn) {
+        do.call("fn", list(x))
+      },
+      fn
+    ))
+    if (!is.logical(preds)) stop_("`where()` must be used with functions that return `TRUE` or `FALSE`.")
+    data_cols <- cols
+    cols <- data_cols[preds]
+    which(data_cols %in% cols)
+  }
+}
+
 # Faster data.table implementations ----

 match <- function(x, table, ...) {
@@ -1655,52 +1660,6 @@ match <- function(x, table, ...) {
  }
 }

-# nolint start
-
-# Register S3 methods ----
-# copied from vctrs::s3_register by their permission:
-# https://github.com/r-lib/vctrs/blob/05968ce8e669f73213e3e894b5f4424af4f46316/R/register-s3.R
-s3_register <- function(generic, class, method = NULL) {
-  stopifnot(is.character(generic), length(generic) == 1)
-  stopifnot(is.character(class), length(class) == 1)
-  pieces <- strsplit(generic, "::")[[1]]
-  stopifnot(length(pieces) == 2)
-  package <- pieces[[1]]
-  generic <- pieces[[2]]
-  caller <- parent.frame()
-  get_method_env <- function() {
-    top <- topenv(caller)
-    if (isNamespace(top)) {
-      asNamespace(environmentName(top))
-    } else {
-      caller
-    }
-  }
-  get_method <- function(method, env) {
-    if (is.null(method)) {
-      get(paste0(generic, ".", class), envir = get_method_env())
-    } else {
-      method
-    }
-  }
-  method_fn <- get_method(method)
-  stopifnot(is.function(method_fn))
-  setHook(packageEvent(package, "onLoad"), function(...) {
-    ns <- asNamespace(package)
-    method_fn <- get_method(method)
-    registerS3method(generic, class, method_fn, envir = ns)
-  })
-  if (!isNamespaceLoaded(package)) {
-    return(invisible())
-  }
-  envir <- asNamespace(package)
-  if (exists(generic, envir)) {
-    registerS3method(generic, class, method_fn, envir = envir)
-  }
-  invisible()
-}
-
-
 # Support old R versions ----
 # these functions were not available in previous versions of R
 # see here for the full list: https://github.com/r-lib/backports
--- a/R/aa_helper_pm_functions.R
+++ b/R/aa_helper_pm_functions.R
@@ -952,7 +952,19 @@ pm_select_env$get_nrow <- function() nrow(pm_select_env$.data)
 pm_select_env$get_ncol <- function() ncol(pm_select_env$.data)

 pm_select <- function(.data, ...) {
-  col_pos <- pm_select_positions(.data, ..., .group_pos = TRUE)
+  # col_pos <- pm_select_positions(.data, ..., .group_pos = TRUE),
+  col_pos <- tryCatch(pm_select_positions(.data, ..., .group_pos = TRUE), error = function(e) NULL)
+  if (is.null(col_pos)) {
+    # try with tidyverse
+    select_dplyr <- import_fn("select", "dplyr", error_on_fail = FALSE)
+    if (!is.null(select_dplyr)) {
+      col_pos <- which(colnames(.data) %in% colnames(select_dplyr(.data, ...)))
+    } else {
+      # this will throw an error as it did, but dplyr is not available, so no other option
+      col_pos <- pm_select_positions(.data, ..., .group_pos = TRUE)
+    }
+  }
+
  map_names <- names(col_pos)
  map_names_length <- nchar(map_names)
  if (any(map_names_length == 0L)) {
--- a/R/ab.R
+++ b/R/ab.R
@@ -184,7 +184,8 @@ as.ab <- function(x, flag_multiple_results = TRUE, language = get_AMR_locale(),
  x_new[known_codes_cid] <- AMR_env$AB_lookup$ab[match(x[known_codes_cid], AMR_env$AB_lookup$cid)]
  previously_coerced <- x %in% AMR_env$ab_previously_coerced$x
  x_new[previously_coerced & is.na(x_new)] <- AMR_env$ab_previously_coerced$ab[match(x[is.na(x_new) & x %in% AMR_env$ab_previously_coerced$x], AMR_env$ab_previously_coerced$x)]
-  if (any(previously_coerced) && isTRUE(info) && message_not_thrown_before("as.ab", entire_session = TRUE)) {
+  previously_coerced_mention <- x %in% AMR_env$ab_previously_coerced$x & !x %in% AMR_env$AB_lookup$ab & !x %in% AMR_env$AB_lookup$generalised_name
+  if (any(previously_coerced_mention) && isTRUE(info) && message_not_thrown_before("as.ab", entire_session = TRUE)) {
    message_(
      "Returning previously coerced ",
      ifelse(length(unique(which(x[which(previously_coerced)] %in% x_bak_clean))) > 1, "value for an antimicrobial", "values for various antimicrobials"),
--- a/R/antibiogram.R
+++ b/R/antibiogram.R
@@ -576,6 +576,15 @@ antibiogram.default <- function(x,
    }
    antimicrobials <- unlist(antimicrobials)
  } else {
+    existing_ab_combined_cols <- ab_trycatch[ab_trycatch %like% "[+]" & ab_trycatch %in% colnames(x)]
+    if (length(existing_ab_combined_cols) > 0 && !is.null(ab_transform)) {
+      ab_transform <- NULL
+      warning_(
+        "Detected column name(s) containing the '+' character, which conflicts with the expected syntax in `antibiogram()`: the '+' is used to combine separate antimicrobial agent columns (e.g., \"AMP+GEN\").\n\n",
+        "To avoid incorrectly guessing which antimicrobials this represents, `ab_transform` was automatically set to `NULL`.\n\n",
+        "If this is unintended, please rename the column(s) to avoid using '+' in the name, or set `ab_transform = NULL` explicitly to suppress this message."
+      )
+    }
    antimicrobials <- ab_trycatch
  }

--- a/R/mean_amr_distance.R
+++ b/R/mean_amr_distance.R
@@ -31,7 +31,7 @@
 #'
 #' Calculates a normalised mean for antimicrobial resistance between multiple observations, to help to identify similar isolates without comparing antibiograms by hand.
 #' @param x A vector of class [sir][as.sir()], [mic][as.mic()] or [disk][as.disk()], or a [data.frame] containing columns of any of these classes.
-#' @param ... Variables to select. Supports [tidyselect language][tidyselect::language] (such as `column1:column4` and `where(is.mic)`), and can thus also be [antimicrobial selectors][amr_selector()].
+#' @param ... Variables to select. Supports [tidyselect language][tidyselect::starts_with()] such as `where(is.mic)`, `starts_with(...)`, or `column1:column4`, and can thus also be [antimicrobial selectors][amr_selector()].
 #' @param combine_SI A [logical] to indicate whether all values of S, SDD, and I must be merged into one, so the input only consists of S+I vs. R (susceptible vs. resistant) - the default is `TRUE`.
 #' @details The mean AMR distance is effectively [the Z-score](https://en.wikipedia.org/wiki/Standard_score); a normalised numeric value to compare AMR test results which can help to identify similar isolates, without comparing antibiograms by hand.
 #'
--- a/R/sir.R
+++ b/R/sir.R
@@ -69,7 +69,9 @@
 #' @param reference_data A [data.frame] to be used for interpretation, which defaults to the [clinical_breakpoints] data set. Changing this argument allows for using own interpretation guidelines. This argument must contain a data set that is equal in structure to the [clinical_breakpoints] data set (same column names and column types). Please note that the `guideline` argument will be ignored when `reference_data` is manually set.
 #' @param threshold Maximum fraction of invalid antimicrobial interpretations of `x`, see *Examples*.
 #' @param conserve_capped_values Deprecated, use `capped_mic_handling` instead.
-#' @param ... For using on a [data.frame]: names of columns to apply [as.sir()] on (supports tidy selection such as `column1:column4`). Otherwise: arguments passed on to methods.
+#' @param ... For using on a [data.frame]: selection of columns to apply `as.sir()` to. Supports [tidyselect language][tidyselect::starts_with()] such as `where(is.mic)`, `starts_with(...)`, or `column1:column4`, and can thus also be [antimicrobial selectors][amr_selector()] such as `as.sir(df, penicillins())`.
+#'
+#' Otherwise: arguments passed on to methods.
 #' @details
 #' *Note: The clinical breakpoints in this package were validated through, and imported from, [WHONET](https://whonet.org). The public use of this `AMR` package has been endorsed by both CLSI and EUCAST. See [clinical_breakpoints] for more information.*
 #'
@@ -225,9 +227,12 @@
 #'   df_wide %>% mutate_if(is.mic, as.sir)
 #'   df_wide %>% mutate_if(function(x) is.mic(x) | is.disk(x), as.sir)
 #'   df_wide %>% mutate(across(where(is.mic), as.sir))
+#'
 #'   df_wide %>% mutate_at(vars(amoxicillin:tobra), as.sir)
 #'   df_wide %>% mutate(across(amoxicillin:tobra, as.sir))
 #'
+#'   df_wide %>% mutate(across(aminopenicillins(), as.sir))
+#'
 #'   # approaches that all work with additional arguments:
 #'   df_long %>%
 #'     # given a certain data type, e.g. MIC values
@@ -722,8 +727,17 @@ as.sir.data.frame <- function(x,
  meet_criteria(info, allow_class = "logical", has_length = 1)
  meet_criteria(parallel, allow_class = "logical", has_length = 1)
  meet_criteria(max_cores, allow_class = c("numeric", "integer"), has_length = 1)
-
  x.bak <- x
+
+  if (tryCatch(length(list(...)) > 0, error = function(e) TRUE)) {
+    sel <- colnames(pm_select(x, ...))
+  } else {
+    sel <- colnames(x)
+  }
+  if (!is.null(col_mo)) {
+    sel <- sel[sel != col_mo]
+  }
+
  for (i in seq_len(ncol(x))) {
    # don't keep factors, overwriting them is hard
    if (is.factor(x[, i, drop = TRUE])) {
@@ -803,15 +817,6 @@ as.sir.data.frame <- function(x,
  }

  i <- 0
-  if (tryCatch(length(list(...)) > 0, error = function(e) TRUE)) {
-    sel <- colnames(pm_select(x, ...))
-  } else {
-    sel <- colnames(x)
-  }
-  if (!is.null(col_mo)) {
-    sel <- sel[sel != col_mo]
-  }
-
  ab_cols <- colnames(x)[vapply(FUN.VALUE = logical(1), x, function(y) {
    i <<- i + 1
    check <- is.mic(y) | is.disk(y)
--- a/R/sysdata.rda
+++ b/R/sysdata.rda
--- a/R/vctrs.R
+++ b/R/vctrs.R
@@ -30,7 +30,6 @@
 # These are all S3 implementations for the vctrs package,
 # that is used internally by tidyverse packages such as dplyr.
 # They are to convert AMR-specific classes to bare characters and integers.
-# All of them will be exported using s3_register() in R/zzz.R when loading the package.

 # see https://github.com/tidyverse/dplyr/issues/5955 why this is required

--- a/data-raw/_reproduction_scripts/reproduction_of_poorman.R
+++ b/data-raw/_reproduction_scripts/reproduction_of_poorman.R
@@ -108,3 +108,18 @@ writeLines(contents, "R/aa_helper_pm_functions.R")

 # note: pm_left_join() will be overwritten by aaa_helper_functions.R, which contains a faster implementation
 # replace `res <- as.data.frame(res)` with  `res <- as.data.frame(res, stringsAsFactors = FALSE)`
+
+# after running, pm_select must be altered. The line:
+# col_pos <- pm_select_positions(.data, ..., .group_pos = TRUE)
+# ... must be replaced with this to support tidyselect functionality such as `starts_with()`:
+# col_pos <- tryCatch(pm_select_positions(.data, ..., .group_pos = TRUE), error = function(e) NULL)
+# if (is.null(col_pos)) {
+#   # try with tidyverse
+#   select_dplyr <- import_fn("select", "dplyr", error_on_fail = FALSE)
+#   if (!is.null(select_dplyr)) {
+#     col_pos <- which(colnames(.data) %in% colnames(select_dplyr(.data, ...)))
+#   } else {
+#     # this will throw an error as it did, but dplyr is not available, so no other option
+#     col_pos <- pm_select_positions(.data, ..., .group_pos = TRUE)
+#   }
+# }
--- a/data/antibiotics.rda
+++ b/data/antibiotics.rda
--- a/index.md
+++ b/index.md
@@ -27,12 +27,12 @@

 <p style="text-align:left; width: 50%;">

-<small><a href="https://amr-for-r.org/">https://amr-for-r.org</a></small>
+<small><a href="https://amr-for-r.org/">amr-for-r.org</a></small>
 </p>

 <p style="text-align:right; width: 50%;">

-<small><a href="https://doi.org/10.18637/jss.v104.i03" target="_blank">https://doi.org/10.18637/jss.v104.i03</a></small>
+<small><a href="https://doi.org/10.18637/jss.v104.i03" target="_blank">doi.org/10.18637/jss.v104.i03</a></small>
 </p>

 </div>
@@ -321,9 +321,9 @@ example_isolates %>%
 #> # A tibble: 3 × 5
 #>   ward       GEN_total_R GEN_conf_int TOB_total_R TOB_conf_int
 #>   <chr>            <dbl> <chr>              <dbl> <chr>       
-#> 1 Clinical     0.2289362 0.205-0.254    0.3147503 0.284-0.347 
-#> 2 ICU          0.2902655 0.253-0.33     0.4004739 0.353-0.449 
-#> 3 Outpatient   0.2       0.131-0.285    0.3676471 0.254-0.493
+#> 1 Clinical         0.229 0.205-0.254        0.315 0.284-0.347 
+#> 2 ICU              0.290 0.253-0.33         0.400 0.353-0.449 
+#> 3 Outpatient       0.2   0.131-0.285        0.368 0.254-0.493
 ```

 Or use [antimicrobial
@@ -351,33 +351,33 @@ out <- example_isolates %>%
 #> "Outpatient" (minimum = 30).
 out
 #> # A tibble: 3 × 6
-#>   ward             GEN       TOB       AMK   KAN       COL
-#>   <chr>          <dbl>     <dbl>     <dbl> <dbl>     <dbl>
-#> 1 Clinical   0.2289362 0.3147503 0.6258993     1 0.7802956
-#> 2 ICU        0.2902655 0.4004739 0.6624473     1 0.8574144
-#> 3 Outpatient 0.2       0.3676471 0.6052632    NA 0.8888889
+#>   ward         GEN   TOB   AMK   KAN   COL
+#>   <chr>      <dbl> <dbl> <dbl> <dbl> <dbl>
+#> 1 Clinical   0.229 0.315 0.626     1 0.780
+#> 2 ICU        0.290 0.400 0.662     1 0.857
+#> 3 Outpatient 0.2   0.368 0.605    NA 0.889
 ```

 ``` r
 # transform the antibiotic columns to names:
 out %>% set_ab_names()
 #> # A tibble: 3 × 6
-#>   ward       gentamicin tobramycin  amikacin kanamycin  colistin
-#>   <chr>           <dbl>      <dbl>     <dbl>     <dbl>     <dbl>
-#> 1 Clinical    0.2289362  0.3147503 0.6258993         1 0.7802956
-#> 2 ICU         0.2902655  0.4004739 0.6624473         1 0.8574144
-#> 3 Outpatient  0.2        0.3676471 0.6052632        NA 0.8888889
+#>   ward       gentamicin tobramycin amikacin kanamycin colistin
+#>   <chr>           <dbl>      <dbl>    <dbl>     <dbl>    <dbl>
+#> 1 Clinical        0.229      0.315    0.626         1    0.780
+#> 2 ICU             0.290      0.400    0.662         1    0.857
+#> 3 Outpatient      0.2        0.368    0.605        NA    0.889
 ```

 ``` r
 # transform the antibiotic column to ATC codes:
 out %>% set_ab_names(property = "atc")
 #> # A tibble: 3 × 6
-#>   ward         J01GB03   J01GB01   J01GB06 J01GB04   J01XB01
-#>   <chr>          <dbl>     <dbl>     <dbl>   <dbl>     <dbl>
-#> 1 Clinical   0.2289362 0.3147503 0.6258993       1 0.7802956
-#> 2 ICU        0.2902655 0.4004739 0.6624473       1 0.8574144
-#> 3 Outpatient 0.2       0.3676471 0.6052632      NA 0.8888889
+#>   ward       J01GB03 J01GB01 J01GB06 J01GB04 J01XB01
+#>   <chr>        <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
+#> 1 Clinical     0.229   0.315   0.626       1   0.780
+#> 2 ICU          0.290   0.400   0.662       1   0.857
+#> 3 Outpatient   0.2     0.368   0.605      NA   0.889
 ```

 ## What else can you do with this package?
--- a/man/as.sir.Rd
+++ b/man/as.sir.Rd
@@ -75,7 +75,9 @@ sir_interpretation_history(clean = FALSE)
 \arguments{
 \item{x}{Vector of values (for class \code{\link{mic}}: MIC values in mg/L, for class \code{\link{disk}}: a disk diffusion radius in millimetres).}

-\item{...}{For using on a \link{data.frame}: names of columns to apply \code{\link[=as.sir]{as.sir()}} on (supports tidy selection such as \code{column1:column4}). Otherwise: arguments passed on to methods.}
+\item{...}{For using on a \link{data.frame}: selection of columns to apply \code{as.sir()} to. Supports \link[tidyselect:starts_with]{tidyselect language} such as \code{where(is.mic)}, \code{starts_with(...)}, or \code{column1:column4}, and can thus also be \link[=amr_selector]{antimicrobial selectors} such as \code{as.sir(df, penicillins())}.
+
+Otherwise: arguments passed on to methods.}

 \item{threshold}{Maximum fraction of invalid antimicrobial interpretations of \code{x}, see \emph{Examples}.}

@@ -314,9 +316,12 @@ if (require("dplyr")) {
  df_wide \%>\% mutate_if(is.mic, as.sir)
  df_wide \%>\% mutate_if(function(x) is.mic(x) | is.disk(x), as.sir)
  df_wide \%>\% mutate(across(where(is.mic), as.sir))
+
  df_wide \%>\% mutate_at(vars(amoxicillin:tobra), as.sir)
  df_wide \%>\% mutate(across(amoxicillin:tobra, as.sir))

+  df_wide \%>\% mutate(across(aminopenicillins(), as.sir))
+
  # approaches that all work with additional arguments:
  df_long \%>\%
    # given a certain data type, e.g. MIC values
--- a/man/mean_amr_distance.Rd
+++ b/man/mean_amr_distance.Rd
@@ -18,7 +18,7 @@ amr_distance_from_row(amr_distance, row)
 \arguments{
 \item{x}{A vector of class \link[=as.sir]{sir}, \link[=as.mic]{mic} or \link[=as.disk]{disk}, or a \link{data.frame} containing columns of any of these classes.}

-\item{...}{Variables to select. Supports \link[tidyselect:language]{tidyselect language} (such as \code{column1:column4} and \code{where(is.mic)}), and can thus also be \link[=amr_selector]{antimicrobial selectors}.}
+\item{...}{Variables to select. Supports \link[tidyselect:starts_with]{tidyselect language} such as \code{where(is.mic)}, \code{starts_with(...)}, or \code{column1:column4}, and can thus also be \link[=amr_selector]{antimicrobial selectors}.}

 \item{combine_SI}{A \link{logical} to indicate whether all values of S, SDD, and I must be merged into one, so the input only consists of S+I vs. R (susceptible vs. resistant) - the default is \code{TRUE}.}

--- a/tests/testthat/test-zzz.R
+++ b/tests/testthat/test-zzz.R
@@ -63,10 +63,12 @@ test_that("test-zzz.R", {
    "progress_bar" = "progress",
    "read_html" = "xml2",
    "right_join" = "dplyr",
+    "select" = "dplyr",
    "semi_join" = "dplyr",
    "showQuestion" = "rstudioapi",
    "symbol" = "cli",
    "tibble" = "tibble",
+    "where" = "tidyselect",
    "write.xlsx" = "openxlsx"
  )