documentation for 'data.table' AB selectors

2026-02-26 16:38:33 +01:00 · 2023-03-11 16:54:02 +01:00
parent 45e840c02f
commit 7ad8635994
9 changed files with 174 additions and 40 deletions
--- a/R/aa_helper_functions.R
+++ b/R/aa_helper_functions.R
@@ -934,7 +934,7 @@ get_current_data <- function(arg_name, call) {
    }
  }

-  # now go over all underlying environments looking for other dplyr and base R selection environments
+  # now go over all underlying environments looking for other dplyr, data.table and base R selection environments
  with_generic <- vapply(FUN.VALUE = logical(1), frms, function(e) !is.null(e$`.Generic`))
  for (env in frms[which(with_generic)]) {
    if (valid_df(env$`.data`)) {
@@ -945,6 +945,7 @@ get_current_data <- function(arg_name, call) {
      return(env$xx)
    } else if (valid_df(env$x)) {
      # an element `x` will be in the environment for only cols in base R, e.g. `example_isolates[, carbapenems()]`
+      # this element will also be present in data.table environments where there's a .Generic available
      return(env$x)
    }
  }
--- a/R/ab_selectors.R
+++ b/R/ab_selectors.R
@@ -29,14 +29,16 @@

 #' Antibiotic Selectors
 #'
-#' These functions allow for filtering rows and selecting columns based on antibiotic test results that are of a specific antibiotic class or group, without the need to define the columns or antibiotic abbreviations. In short, if you have a column name that resembles an antimicrobial drug, it will be picked up by any of these functions that matches its pharmaceutical class: "cefazolin", "CZO" and "J01DB04" will all be picked up by [cephalosporins()].
+#' @description These functions allow for filtering rows and selecting columns based on antibiotic test results that are of a specific antibiotic class or group (according to the [antibiotics] data set), without the need to define the columns or antibiotic abbreviations.
+#' 
+#' In short, if you have a column name that resembles an antimicrobial drug, it will be picked up by any of these functions that matches its pharmaceutical class: "cefazolin", "kefzol", "CZO" and "J01DB04" will all be picked up by [cephalosporins()].
 #' @param ab_class an antimicrobial class or a part of it, such as `"carba"` and `"carbapenems"`. The columns `group`, `atc_group1` and `atc_group2` of the [antibiotics] data set will be searched (case-insensitive) for this value.
 #' @param filter an [expression] to be evaluated in the [antibiotics] data set, such as `name %like% "trim"`
 #' @param only_sir_columns a [logical] to indicate whether only columns of class `sir` must be selected (default is `FALSE`), see [as.sir()]
 #' @param only_treatable a [logical] to indicate whether antimicrobial drugs should be excluded that are only for laboratory tests (default is `TRUE`), such as gentamicin-high (`GEH`) and imipenem/EDTA (`IPE`)
 #' @param ... ignored, only in place to allow future extensions
 #' @details
-#' These functions can be used in data set calls for selecting columns and filtering rows. They are heavily inspired by the [Tidyverse selection helpers][tidyselect::language] such as [`everything()`][tidyselect::everything()], but also work in base \R and not only in `dplyr` verbs. Nonetheless, they are very convenient to use with `dplyr` functions such as [`select()`][dplyr::select()], [`filter()`][dplyr::filter()] and [`summarise()`][dplyr::summarise()], see *Examples*.
+#' These functions can be used in data set calls for selecting columns and filtering rows. They work with base \R, the Tidyverse, and `data.table`. They are heavily inspired by the [Tidyverse selection helpers][tidyselect::language] such as [`everything()`][tidyselect::everything()], but are not limited to `dplyr` verbs. Nonetheless, they are very convenient to use with `dplyr` functions such as [`select()`][dplyr::select()], [`filter()`][dplyr::filter()] and [`summarise()`][dplyr::summarise()], see *Examples*.
 #'
 #' All columns in the data in which these functions are called will be searched for known antibiotic names, abbreviations, brand names, and codes (ATC, EARS-Net, WHO, etc.) according to the [antibiotics] data set. This means that a selector such as [aminoglycosides()] will pick up column names like 'gen', 'genta', 'J01GB03', 'tobra', 'Tobracin', etc.
 #'
@@ -53,6 +55,10 @@
 #' # `example_isolates` is a data set available in the AMR package.
 #' # See ?example_isolates.
 #' example_isolates
+#' 
+#' 
+#' # Examples sections below are split into 'base R', 'dplyr', and 'data.table':
+#'
 #'
 #' # base R ------------------------------------------------------------------
 #'
@@ -76,7 +82,7 @@
 #' # filter with multiple antibiotic selectors using c()
 #' example_isolates[all(c(carbapenems(), aminoglycosides()) == "R"), ]
 #'
-#' # filter + select in one go: get penicillins in carbapenems-resistant strains
+#' # filter + select in one go: get penicillins in carbapenem-resistant strains
 #' example_isolates[any(carbapenems() == "R"), penicillins()]
 #'
 #' # You can combine selectors with '&' to be more specific. For example,
@@ -86,13 +92,19 @@
 #' # and erythromycin is not a penicillin:
 #' example_isolates[, penicillins() & administrable_per_os()]
 #'
-#' # ab_selector() applies a filter in the `antibiotics` data set and is thus very
-#' # flexible. For instance, to select antibiotic columns with an oral DDD of at
-#' # least 1 gram:
+#' # ab_selector() applies a filter in the `antibiotics` data set and is thus
+#' # very flexible. For instance, to select antibiotic columns with an oral DDD
+#' # of at least 1 gram:
 #' example_isolates[, ab_selector(oral_ddd > 1 & oral_units == "g")]
-#'
-#' # dplyr -------------------------------------------------------------------
+#' 
 #' \donttest{
+#' # dplyr -------------------------------------------------------------------
+#' 
+#' if (require("dplyr")) {
+#'   tibble(kefzol = random_sir(5)) %>%
+#'     select(cephalosporins())
+#' }
+#' 
 #' if (require("dplyr")) {
 #'   # get AMR for all aminoglycosides e.g., per ward:
 #'   example_isolates %>%
@@ -173,6 +185,35 @@
 #'   z <- example_isolates %>% filter(if_all(carbapenems(), ~ .x == "R"))
 #'   identical(x, y) && identical(y, z)
 #' }
+#' 
+#' 
+#' # data.table --------------------------------------------------------------
+#' 
+#' # data.table is supported as well, just use it in the same way as with
+#' # base R, but add `with = FALSE` if using a single AB selector:
+#' 
+#' if (require("data.table")) {
+#'   dt <- as.data.table(example_isolates)
+#' 
+#'   print(
+#'     dt[, carbapenems()] # incorrect, returns column *names*
+#'   )
+#'   print(
+#'     dt[, carbapenems(), with = FALSE] # so `with = FALSE` is required
+#'   )
+#'   
+#'   # for multiple selections or AB selectors, `with = FALSE` is not needed:
+#'   print(
+#'     dt[, c("mo", aminoglycosides())]
+#'   )
+#'   print(
+#'     dt[, c(carbapenems(), aminoglycosides())]
+#'   )
+#'   
+#'   # row filters are also supported:
+#'   print(dt[any(carbapenems() == "S"), ])
+#'   print(dt[any(carbapenems() == "S"), penicillins(), with = FALSE])
+#' }
 #' }
 ab_class <- function(ab_class,
                     only_sir_columns = FALSE,
--- a/R/get_episode.R
+++ b/R/get_episode.R
@@ -214,7 +214,7 @@ is_new_episode <- function(x, episode_days = NULL, case_free_days = NULL, ...) {
 }

 exec_episode <- function(x, episode_days, case_free_days, ...) {
-  stop_if_not(is.null(episode_days) || is.null(case_free_days),
+  stop_ifnot(is.null(episode_days) || is.null(case_free_days),
    "either argument `episode_days` or argument `case_free_days` must be set.",
    call = -2
  )
--- a/R/proportion.R
+++ b/R/proportion.R
@@ -43,6 +43,7 @@
 #' @param ab_result antibiotic results to test against, must be one or more values of "S", "I", or "R"
 #' @param confidence_level the confidence level for the returned confidence interval. For the calculation, the number of S or SI isolates, and R isolates are compared with the total number of available isolates with R, S, or I by using [binom.test()], i.e., the Clopper-Pearson method.
 #' @param side the side of the confidence interval to return. The default is `"both"` for a length 2 vector, but can also be (abbreviated as) `"min"`/`"left"`/`"lower"`/`"less"` or `"max"`/`"right"`/`"higher"`/`"greater"`.
+#' @param collapse a [logical] to indicate whether the output values should be 'collapsed', i.e. be merged together into one value, or a character value to use for collapsing
 #' @inheritSection as.sir Interpretation of SIR
 #' @details
 #' The function [resistance()] is equal to the function [proportion_R()]. The function [susceptibility()] is equal to the function [proportion_SI()].
@@ -112,6 +113,10 @@
 #' sir_confidence_interval(example_isolates$AMX,
 #'   confidence_level = 0.975
 #' )
+#' sir_confidence_interval(example_isolates$AMX,
+#'   confidence_level = 0.975,
+#'   collapse = ", "
+#' )
 #'
 #' # determines %S+I:
 #' susceptibility(example_isolates$AMX)
@@ -260,10 +265,16 @@ sir_confidence_interval <- function(...,
                                    as_percent = FALSE,
                                    only_all_tested = FALSE,
                                    confidence_level = 0.95,
-                                    side = "both") {
+                                    side = "both",
+                                    collapse = FALSE) {
  meet_criteria(ab_result, allow_class = c("character", "sir"), has_length = c(1, 2, 3), is_in = c("S", "I", "R"))
+  meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE)
+  meet_criteria(as_percent, allow_class = "logical", has_length = 1)
+  meet_criteria(only_all_tested, allow_class = "logical", has_length = 1)
  meet_criteria(confidence_level, allow_class = "numeric", is_positive = TRUE, has_length = 1)
  meet_criteria(side, allow_class = "character", has_length = 1, is_in = c("both", "b", "left", "l", "lower", "lowest", "less", "min", "right", "r", "higher", "highest", "greater", "g", "max"))
+  meet_criteria(collapse, allow_class = c("logical", "character"), has_length = 1)
+  
  x <- tryCatch(
    sir_calc(...,
      ab_result = ab_result,
@@ -281,19 +292,7 @@ sir_confidence_interval <- function(...,
    error = function(e) stop_(gsub("in sir_calc(): ", "", e$message, fixed = TRUE), call = -5)
  )

-  if (n < minimum) {
-    warning_("Introducing NA: ",
-      ifelse(n == 0, "no", paste("only", n)),
-      " results available for `sir_confidence_interval()` (`minimum` = ", minimum, ").",
-      call = FALSE
-    )
-    if (as_percent == TRUE) {
-      return(NA_character_)
-    } else {
-      return(NA_real_)
-    }
-  }
-
+  # this applies the Clopper-Pearson method
  out <- stats::binom.test(x = x, n = n, conf.level = confidence_level)$conf.int
  out <- set_clean_class(out, "double")

@@ -302,11 +301,29 @@ sir_confidence_interval <- function(...,
  } else if (side %in% c("right", "r", "higher", "highest", "greater", "g", "max")) {
    out <- out[2]
  }
-  if (as_percent == TRUE) {
-    percentage(out, digits = 1)
+  if (isTRUE(as_percent)) {
+    out <- percentage(out, digits = 1)
  } else {
-    out
+    out <- round(out, digits = 3)
  }
+  if (!isFALSE(collapse) && length(out) > 1) {
+    out <- paste(out, collapse = ifelse(isTRUE(collapse), "-", collapse))
+  }
+  
+  if (n < minimum) {
+    warning_("Introducing NA: ",
+             ifelse(n == 0, "no", paste("only", n)),
+             " results available for `sir_confidence_interval()` (`minimum` = ", minimum, ").",
+             call = FALSE
+    )
+    if (is.character(out)) {
+      return(NA_character_)
+    } else {
+      return(NA_real_)
+    }
+  }
+  
+  out
 }

 #' @rdname proportion