documentation for 'data.table' AB selectors

2025-08-19 02:13:06 +02:00 · 2023-03-11 16:54:02 +01:00
parent 45e840c02f
commit 7ad8635994
9 changed files with 174 additions and 40 deletions
--- a/2
+++ b/2
@@ -1,5 +1,5 @@
 Package: AMR
-Version: 1.8.2.9149
+Version: 1.8.2.9150
 Date: 2023-03-11
 Title: Antimicrobial Resistance Data Analysis
 Description: Functions to simplify and standardise antimicrobial resistance (AMR)
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,4 @@
-# AMR 1.8.2.9149
+# AMR 1.8.2.9150

 *(this beta version will eventually become v2.0! We're happy to reach a new major milestone soon!)*

--- a/R/aa_helper_functions.R
+++ b/R/aa_helper_functions.R
@@ -934,7 +934,7 @@ get_current_data <- function(arg_name, call) {
    }
  }

-  # now go over all underlying environments looking for other dplyr and base R selection environments
+  # now go over all underlying environments looking for other dplyr, data.table and base R selection environments
  with_generic <- vapply(FUN.VALUE = logical(1), frms, function(e) !is.null(e$`.Generic`))
  for (env in frms[which(with_generic)]) {
    if (valid_df(env$`.data`)) {
@@ -945,6 +945,7 @@ get_current_data <- function(arg_name, call) {
      return(env$xx)
    } else if (valid_df(env$x)) {
      # an element `x` will be in the environment for only cols in base R, e.g. `example_isolates[, carbapenems()]`
+      # this element will also be present in data.table environments where there's a .Generic available
      return(env$x)
    }
  }
--- a/R/ab_selectors.R
+++ b/R/ab_selectors.R
@@ -29,14 +29,16 @@

 #' Antibiotic Selectors
 #'
-#' These functions allow for filtering rows and selecting columns based on antibiotic test results that are of a specific antibiotic class or group, without the need to define the columns or antibiotic abbreviations. In short, if you have a column name that resembles an antimicrobial drug, it will be picked up by any of these functions that matches its pharmaceutical class: "cefazolin", "CZO" and "J01DB04" will all be picked up by [cephalosporins()].
+#' @description These functions allow for filtering rows and selecting columns based on antibiotic test results that are of a specific antibiotic class or group (according to the [antibiotics] data set), without the need to define the columns or antibiotic abbreviations.
+#' 
+#' In short, if you have a column name that resembles an antimicrobial drug, it will be picked up by any of these functions that matches its pharmaceutical class: "cefazolin", "kefzol", "CZO" and "J01DB04" will all be picked up by [cephalosporins()].
 #' @param ab_class an antimicrobial class or a part of it, such as `"carba"` and `"carbapenems"`. The columns `group`, `atc_group1` and `atc_group2` of the [antibiotics] data set will be searched (case-insensitive) for this value.
 #' @param filter an [expression] to be evaluated in the [antibiotics] data set, such as `name %like% "trim"`
 #' @param only_sir_columns a [logical] to indicate whether only columns of class `sir` must be selected (default is `FALSE`), see [as.sir()]
 #' @param only_treatable a [logical] to indicate whether antimicrobial drugs should be excluded that are only for laboratory tests (default is `TRUE`), such as gentamicin-high (`GEH`) and imipenem/EDTA (`IPE`)
 #' @param ... ignored, only in place to allow future extensions
 #' @details
-#' These functions can be used in data set calls for selecting columns and filtering rows. They are heavily inspired by the [Tidyverse selection helpers][tidyselect::language] such as [`everything()`][tidyselect::everything()], but also work in base \R and not only in `dplyr` verbs. Nonetheless, they are very convenient to use with `dplyr` functions such as [`select()`][dplyr::select()], [`filter()`][dplyr::filter()] and [`summarise()`][dplyr::summarise()], see *Examples*.
+#' These functions can be used in data set calls for selecting columns and filtering rows. They work with base \R, the Tidyverse, and `data.table`. They are heavily inspired by the [Tidyverse selection helpers][tidyselect::language] such as [`everything()`][tidyselect::everything()], but are not limited to `dplyr` verbs. Nonetheless, they are very convenient to use with `dplyr` functions such as [`select()`][dplyr::select()], [`filter()`][dplyr::filter()] and [`summarise()`][dplyr::summarise()], see *Examples*.
 #'
 #' All columns in the data in which these functions are called will be searched for known antibiotic names, abbreviations, brand names, and codes (ATC, EARS-Net, WHO, etc.) according to the [antibiotics] data set. This means that a selector such as [aminoglycosides()] will pick up column names like 'gen', 'genta', 'J01GB03', 'tobra', 'Tobracin', etc.
 #'
@@ -53,6 +55,10 @@
 #' # `example_isolates` is a data set available in the AMR package.
 #' # See ?example_isolates.
 #' example_isolates
+#' 
+#' 
+#' # Examples sections below are split into 'base R', 'dplyr', and 'data.table':
+#'
 #'
 #' # base R ------------------------------------------------------------------
 #'
@@ -76,7 +82,7 @@
 #' # filter with multiple antibiotic selectors using c()
 #' example_isolates[all(c(carbapenems(), aminoglycosides()) == "R"), ]
 #'
-#' # filter + select in one go: get penicillins in carbapenems-resistant strains
+#' # filter + select in one go: get penicillins in carbapenem-resistant strains
 #' example_isolates[any(carbapenems() == "R"), penicillins()]
 #'
 #' # You can combine selectors with '&' to be more specific. For example,
@@ -86,13 +92,19 @@
 #' # and erythromycin is not a penicillin:
 #' example_isolates[, penicillins() & administrable_per_os()]
 #'
-#' # ab_selector() applies a filter in the `antibiotics` data set and is thus very
-#' # flexible. For instance, to select antibiotic columns with an oral DDD of at
-#' # least 1 gram:
+#' # ab_selector() applies a filter in the `antibiotics` data set and is thus
+#' # very flexible. For instance, to select antibiotic columns with an oral DDD
+#' # of at least 1 gram:
 #' example_isolates[, ab_selector(oral_ddd > 1 & oral_units == "g")]
-#'
-#' # dplyr -------------------------------------------------------------------
+#' 
 #' \donttest{
+#' # dplyr -------------------------------------------------------------------
+#' 
+#' if (require("dplyr")) {
+#'   tibble(kefzol = random_sir(5)) %>%
+#'     select(cephalosporins())
+#' }
+#' 
 #' if (require("dplyr")) {
 #'   # get AMR for all aminoglycosides e.g., per ward:
 #'   example_isolates %>%
@@ -173,6 +185,35 @@
 #'   z <- example_isolates %>% filter(if_all(carbapenems(), ~ .x == "R"))
 #'   identical(x, y) && identical(y, z)
 #' }
+#' 
+#' 
+#' # data.table --------------------------------------------------------------
+#' 
+#' # data.table is supported as well, just use it in the same way as with
+#' # base R, but add `with = FALSE` if using a single AB selector:
+#' 
+#' if (require("data.table")) {
+#'   dt <- as.data.table(example_isolates)
+#' 
+#'   print(
+#'     dt[, carbapenems()] # incorrect, returns column *names*
+#'   )
+#'   print(
+#'     dt[, carbapenems(), with = FALSE] # so `with = FALSE` is required
+#'   )
+#'   
+#'   # for multiple selections or AB selectors, `with = FALSE` is not needed:
+#'   print(
+#'     dt[, c("mo", aminoglycosides())]
+#'   )
+#'   print(
+#'     dt[, c(carbapenems(), aminoglycosides())]
+#'   )
+#'   
+#'   # row filters are also supported:
+#'   print(dt[any(carbapenems() == "S"), ])
+#'   print(dt[any(carbapenems() == "S"), penicillins(), with = FALSE])
+#' }
 #' }
 ab_class <- function(ab_class,
                     only_sir_columns = FALSE,
--- a/R/get_episode.R
+++ b/R/get_episode.R
@@ -214,7 +214,7 @@ is_new_episode <- function(x, episode_days = NULL, case_free_days = NULL, ...) {
 }

 exec_episode <- function(x, episode_days, case_free_days, ...) {
-  stop_if_not(is.null(episode_days) || is.null(case_free_days),
+  stop_ifnot(is.null(episode_days) || is.null(case_free_days),
    "either argument `episode_days` or argument `case_free_days` must be set.",
    call = -2
  )
--- a/R/proportion.R
+++ b/R/proportion.R
@@ -43,6 +43,7 @@
 #' @param ab_result antibiotic results to test against, must be one or more values of "S", "I", or "R"
 #' @param confidence_level the confidence level for the returned confidence interval. For the calculation, the number of S or SI isolates, and R isolates are compared with the total number of available isolates with R, S, or I by using [binom.test()], i.e., the Clopper-Pearson method.
 #' @param side the side of the confidence interval to return. The default is `"both"` for a length 2 vector, but can also be (abbreviated as) `"min"`/`"left"`/`"lower"`/`"less"` or `"max"`/`"right"`/`"higher"`/`"greater"`.
+#' @param collapse a [logical] to indicate whether the output values should be 'collapsed', i.e. be merged together into one value, or a character value to use for collapsing
 #' @inheritSection as.sir Interpretation of SIR
 #' @details
 #' The function [resistance()] is equal to the function [proportion_R()]. The function [susceptibility()] is equal to the function [proportion_SI()].
@@ -112,6 +113,10 @@
 #' sir_confidence_interval(example_isolates$AMX,
 #'   confidence_level = 0.975
 #' )
+#' sir_confidence_interval(example_isolates$AMX,
+#'   confidence_level = 0.975,
+#'   collapse = ", "
+#' )
 #'
 #' # determines %S+I:
 #' susceptibility(example_isolates$AMX)
@@ -260,10 +265,16 @@ sir_confidence_interval <- function(...,
                                    as_percent = FALSE,
                                    only_all_tested = FALSE,
                                    confidence_level = 0.95,
-                                    side = "both") {
+                                    side = "both",
+                                    collapse = FALSE) {
  meet_criteria(ab_result, allow_class = c("character", "sir"), has_length = c(1, 2, 3), is_in = c("S", "I", "R"))
+  meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE)
+  meet_criteria(as_percent, allow_class = "logical", has_length = 1)
+  meet_criteria(only_all_tested, allow_class = "logical", has_length = 1)
  meet_criteria(confidence_level, allow_class = "numeric", is_positive = TRUE, has_length = 1)
  meet_criteria(side, allow_class = "character", has_length = 1, is_in = c("both", "b", "left", "l", "lower", "lowest", "less", "min", "right", "r", "higher", "highest", "greater", "g", "max"))
+  meet_criteria(collapse, allow_class = c("logical", "character"), has_length = 1)
+  
  x <- tryCatch(
    sir_calc(...,
      ab_result = ab_result,
@@ -281,19 +292,7 @@ sir_confidence_interval <- function(...,
    error = function(e) stop_(gsub("in sir_calc(): ", "", e$message, fixed = TRUE), call = -5)
  )

-  if (n < minimum) {
-    warning_("Introducing NA: ",
-      ifelse(n == 0, "no", paste("only", n)),
-      " results available for `sir_confidence_interval()` (`minimum` = ", minimum, ").",
-      call = FALSE
-    )
-    if (as_percent == TRUE) {
-      return(NA_character_)
-    } else {
-      return(NA_real_)
-    }
-  }
-
+  # this applies the Clopper-Pearson method
  out <- stats::binom.test(x = x, n = n, conf.level = confidence_level)$conf.int
  out <- set_clean_class(out, "double")

@@ -302,11 +301,29 @@ sir_confidence_interval <- function(...,
  } else if (side %in% c("right", "r", "higher", "highest", "greater", "g", "max")) {
    out <- out[2]
  }
-  if (as_percent == TRUE) {
-    percentage(out, digits = 1)
+  if (isTRUE(as_percent)) {
+    out <- percentage(out, digits = 1)
  } else {
-    out
+    out <- round(out, digits = 3)
  }
+  if (!isFALSE(collapse) && length(out) > 1) {
+    out <- paste(out, collapse = ifelse(isTRUE(collapse), "-", collapse))
+  }
+  
+  if (n < minimum) {
+    warning_("Introducing NA: ",
+             ifelse(n == 0, "no", paste("only", n)),
+             " results available for `sir_confidence_interval()` (`minimum` = ", minimum, ").",
+             call = FALSE
+    )
+    if (is.character(out)) {
+      return(NA_character_)
+    } else {
+      return(NA_real_)
+    }
+  }
+  
+  out
 }

 #' @rdname proportion
--- a/index.md
+++ b/index.md
@@ -34,6 +34,8 @@ With the help of contributors from all corners of the world, the `AMR` package i

 #### Filtering and selecting data

+One of the most powerful functions of this package, aside from calculating and plotting AMR, is selecting and filtering based on antibiotic columns. This can be done using the so-called [antibiotic class selectors](https://msberends.github.io/AMR/reference/antibiotic_class_selectors.html) that work in base R, `dplyr` and `data.table`:
+
 ```r
 # AMR works great with dplyr, but it's not required or neccesary
 library(AMR)
@@ -41,8 +43,10 @@ library(dplyr)

 example_isolates %>%
  mutate(bacteria = mo_fullname()) %>%
+  # filtering functions for microorganisms:
  filter(mo_is_gram_negative(),
         mo_is_intrinsic_resistant(ab = "cefotax")) %>%
+  # antibiotic selectors:
  select(bacteria,
         aminoglycosides(),
         carbapenems())
@@ -66,13 +70,18 @@ With only having defined a row filter on Gram-negative bacteria with intrinsic r
 A base R equivalent would be:

 ```r
+library(AMR)
 example_isolates$bacteria <- mo_fullname(example_isolates$mo)
 example_isolates[which(mo_is_gram_negative() &
                         mo_is_intrinsic_resistant(ab = "cefotax")),
                 c("bacteria", aminoglycosides(), carbapenems())]
 ```

-This base R snippet will work in any version of R since April 2013 (R-3.0).
+This base R code will work in any version of R since April 2013 (R-3.0). Moreover, this code works identically with the `data.table` package, only by starting with:
+
+```r
+example_isolates <- data.table::as.data.table(example_isolates)
+```

 #### Generating antibiograms

@@ -131,6 +140,25 @@ antibiogram(example_isolates,

 For a manual approach, you can use the `resistance` or `susceptibility()` function:

+```r
+example_isolates %>%
+  # group by ward:
+  group_by(ward) %>%
+  # calculate AMR using resistance() for gentamicin and tobramycin
+  # and get their 95% confidence intervals using sir_confidence_interval():
+  summarise(across(c(GEN, TOB),
+                   list(total_R = resistance,
+                        conf_int = function(x) sir_confidence_interval(x, collapse = "-"))))
+```
+
+|ward       | GEN_total_R|GEN_conf_int | TOB_total_R|TOB_conf_int |
+|:---------:|:----------:|:-----------:|:----------:|:-----------:|
+|Clinical   |   0.229    |0.205-0.254  |   0.315    |0.284-0.347  |
+|ICU        |   0.290    |0.253-0.330  |   0.400    |0.353-0.449  |
+|Outpatient |   0.200    |0.131-0.285  |   0.368    |0.254-0.493  |
+
+Or use [antibiotic class selectors](https://msberends.github.io/AMR/reference/antibiotic_class_selectors.html) to select a series of antibiotic columns:
+
 ```r
 library(AMR)
 library(dplyr)
@@ -138,8 +166,7 @@ library(dplyr)
 out <- example_isolates %>%
  # group by ward:
  group_by(ward) %>%
-  # calculate AMR using resistance(), over all aminoglycosides
-  # and polymyxins:
+  # calculate AMR using resistance(), over all aminoglycosides and polymyxins:
  summarise(across(c(aminoglycosides(), polymyxins()),
            resistance))
 out
--- a/man/antibiotic_class_selectors.Rd
+++ b/man/antibiotic_class_selectors.Rd
@@ -118,10 +118,12 @@ not_intrinsic_resistant(
 (internally) a \link{character} vector of column names, with additional class \code{"ab_selector"}
 }
 \description{
-These functions allow for filtering rows and selecting columns based on antibiotic test results that are of a specific antibiotic class or group, without the need to define the columns or antibiotic abbreviations. In short, if you have a column name that resembles an antimicrobial drug, it will be picked up by any of these functions that matches its pharmaceutical class: "cefazolin", "CZO" and "J01DB04" will all be picked up by \code{\link[=cephalosporins]{cephalosporins()}}.
+These functions allow for filtering rows and selecting columns based on antibiotic test results that are of a specific antibiotic class or group (according to the \link{antibiotics} data set), without the need to define the columns or antibiotic abbreviations.
+
+In short, if you have a column name that resembles an antimicrobial drug, it will be picked up by any of these functions that matches its pharmaceutical class: "cefazolin", "kefzol", "CZO" and "J01DB04" will all be picked up by \code{\link[=cephalosporins]{cephalosporins()}}.
 }
 \details{
-These functions can be used in data set calls for selecting columns and filtering rows. They are heavily inspired by the \link[tidyselect:language]{Tidyverse selection helpers} such as \code{\link[tidyselect:everything]{everything()}}, but also work in base \R and not only in \code{dplyr} verbs. Nonetheless, they are very convenient to use with \code{dplyr} functions such as \code{\link[dplyr:select]{select()}}, \code{\link[dplyr:filter]{filter()}} and \code{\link[dplyr:summarise]{summarise()}}, see \emph{Examples}.
+These functions can be used in data set calls for selecting columns and filtering rows. They work with base \R, the Tidyverse, and \code{data.table}. They are heavily inspired by the \link[tidyselect:language]{Tidyverse selection helpers} such as \code{\link[tidyselect:everything]{everything()}}, but are not limited to \code{dplyr} verbs. Nonetheless, they are very convenient to use with \code{dplyr} functions such as \code{\link[dplyr:select]{select()}}, \code{\link[dplyr:filter]{filter()}} and \code{\link[dplyr:summarise]{summarise()}}, see \emph{Examples}.

 All columns in the data in which these functions are called will be searched for known antibiotic names, abbreviations, brand names, and codes (ATC, EARS-Net, WHO, etc.) according to the \link{antibiotics} data set. This means that a selector such as \code{\link[=aminoglycosides]{aminoglycosides()}} will pick up column names like 'gen', 'genta', 'J01GB03', 'tobra', 'Tobracin', etc.

@@ -174,6 +176,10 @@ All data sets in this \code{AMR} package (about microorganisms, antibiotics, SIR
 # See ?example_isolates.
 example_isolates

+
+# Examples sections below are split into 'base R', 'dplyr', and 'data.table':
+
+
 # base R ------------------------------------------------------------------

 # select columns 'IPM' (imipenem) and 'MEM' (meropenem)
@@ -196,7 +202,7 @@ example_isolates[all(carbapenems()), ]
 # filter with multiple antibiotic selectors using c()
 example_isolates[all(c(carbapenems(), aminoglycosides()) == "R"), ]

-# filter + select in one go: get penicillins in carbapenems-resistant strains
+# filter + select in one go: get penicillins in carbapenem-resistant strains
 example_isolates[any(carbapenems() == "R"), penicillins()]

 # You can combine selectors with '&' to be more specific. For example,
@@ -206,13 +212,19 @@ example_isolates[any(carbapenems() == "R"), penicillins()]
 # and erythromycin is not a penicillin:
 example_isolates[, penicillins() & administrable_per_os()]

-# ab_selector() applies a filter in the `antibiotics` data set and is thus very
-# flexible. For instance, to select antibiotic columns with an oral DDD of at
-# least 1 gram:
+# ab_selector() applies a filter in the `antibiotics` data set and is thus
+# very flexible. For instance, to select antibiotic columns with an oral DDD
+# of at least 1 gram:
 example_isolates[, ab_selector(oral_ddd > 1 & oral_units == "g")]

-# dplyr -------------------------------------------------------------------
 \donttest{
+# dplyr -------------------------------------------------------------------
+
+if (require("dplyr")) {
+  tibble(kefzol = random_sir(5)) \%>\%
+    select(cephalosporins())
+}
+
 if (require("dplyr")) {
  # get AMR for all aminoglycosides e.g., per ward:
  example_isolates \%>\%
@@ -293,5 +305,34 @@ if (require("dplyr")) {
  z <- example_isolates \%>\% filter(if_all(carbapenems(), ~ .x == "R"))
  identical(x, y) && identical(y, z)
 }
+
+
+# data.table --------------------------------------------------------------
+
+# data.table is supported as well, just use it in the same way as with
+# base R, but add `with = FALSE` if using a single AB selector:
+
+if (require("data.table")) {
+  dt <- as.data.table(example_isolates)
+
+  print(
+    dt[, carbapenems()] # incorrect, returns column *names*
+  )
+  print(
+    dt[, carbapenems(), with = FALSE] # so `with = FALSE` is required
+  )
+  
+  # for multiple selections or AB selectors, `with = FALSE` is not needed:
+  print(
+    dt[, c("mo", aminoglycosides())]
+  )
+  print(
+    dt[, c(carbapenems(), aminoglycosides())]
+  )
+  
+  # row filters are also supported:
+  print(dt[any(carbapenems() == "S"), ])
+  print(dt[any(carbapenems() == "S"), penicillins(), with = FALSE])
+}
 }
 }
--- a/man/proportion.Rd
+++ b/man/proportion.Rd
@@ -29,7 +29,8 @@ sir_confidence_interval(
  as_percent = FALSE,
  only_all_tested = FALSE,
  confidence_level = 0.95,
-  side = "both"
+  side = "both",
+  collapse = FALSE
 )

 proportion_R(..., minimum = 30, as_percent = FALSE, only_all_tested = FALSE)
@@ -77,6 +78,8 @@ sir_df(

 \item{side}{the side of the confidence interval to return. The default is \code{"both"} for a length 2 vector, but can also be (abbreviated as) \code{"min"}/\code{"left"}/\code{"lower"}/\code{"less"} or \code{"max"}/\code{"right"}/\code{"higher"}/\code{"greater"}.}

+\item{collapse}{a \link{logical} to indicate whether the output values should be 'collapsed', i.e. be merged together into one value, or a character value to use for collapsing}
+
 \item{data}{a \link{data.frame} containing columns with class \code{\link{sir}} (see \code{\link[=as.sir]{as.sir()}})}

 \item{translate_ab}{a column name of the \link{antibiotics} data set to translate the antibiotic abbreviations to, using \code{\link[=ab_property]{ab_property()}}}
@@ -172,6 +175,10 @@ sir_confidence_interval(example_isolates$AMX)
 sir_confidence_interval(example_isolates$AMX,
  confidence_level = 0.975
 )
+sir_confidence_interval(example_isolates$AMX,
+  confidence_level = 0.975,
+  collapse = ", "
+)

 # determines \%S+I:
 susceptibility(example_isolates$AMX)