documentation for 'data.table' AB selectors

This commit is contained in:
dr. M.S. (Matthijs) Berends 2023-03-11 16:54:02 +01:00
parent 45e840c02f
commit 7ad8635994
9 changed files with 174 additions and 40 deletions

View File

@ -1,5 +1,5 @@
Package: AMR
Version: 1.8.2.9149
Version: 1.8.2.9150
Date: 2023-03-11
Title: Antimicrobial Resistance Data Analysis
Description: Functions to simplify and standardise antimicrobial resistance (AMR)

View File

@ -1,4 +1,4 @@
# AMR 1.8.2.9149
# AMR 1.8.2.9150
*(this beta version will eventually become v2.0! We're happy to reach a new major milestone soon!)*

View File

@ -934,7 +934,7 @@ get_current_data <- function(arg_name, call) {
}
}
# now go over all underlying environments looking for other dplyr and base R selection environments
# now go over all underlying environments looking for other dplyr, data.table and base R selection environments
with_generic <- vapply(FUN.VALUE = logical(1), frms, function(e) !is.null(e$`.Generic`))
for (env in frms[which(with_generic)]) {
if (valid_df(env$`.data`)) {
@ -945,6 +945,7 @@ get_current_data <- function(arg_name, call) {
return(env$xx)
} else if (valid_df(env$x)) {
# an element `x` will be in the environment for only cols in base R, e.g. `example_isolates[, carbapenems()]`
# this element will also be present in data.table environments where there's a .Generic available
return(env$x)
}
}

View File

@ -29,14 +29,16 @@
#' Antibiotic Selectors
#'
#' These functions allow for filtering rows and selecting columns based on antibiotic test results that are of a specific antibiotic class or group, without the need to define the columns or antibiotic abbreviations. In short, if you have a column name that resembles an antimicrobial drug, it will be picked up by any of these functions that matches its pharmaceutical class: "cefazolin", "CZO" and "J01DB04" will all be picked up by [cephalosporins()].
#' @description These functions allow for filtering rows and selecting columns based on antibiotic test results that are of a specific antibiotic class or group (according to the [antibiotics] data set), without the need to define the columns or antibiotic abbreviations.
#'
#' In short, if you have a column name that resembles an antimicrobial drug, it will be picked up by any of these functions that matches its pharmaceutical class: "cefazolin", "kefzol", "CZO" and "J01DB04" will all be picked up by [cephalosporins()].
#' @param ab_class an antimicrobial class or a part of it, such as `"carba"` and `"carbapenems"`. The columns `group`, `atc_group1` and `atc_group2` of the [antibiotics] data set will be searched (case-insensitive) for this value.
#' @param filter an [expression] to be evaluated in the [antibiotics] data set, such as `name %like% "trim"`
#' @param only_sir_columns a [logical] to indicate whether only columns of class `sir` must be selected (default is `FALSE`), see [as.sir()]
#' @param only_treatable a [logical] to indicate whether antimicrobial drugs should be excluded that are only for laboratory tests (default is `TRUE`), such as gentamicin-high (`GEH`) and imipenem/EDTA (`IPE`)
#' @param ... ignored, only in place to allow future extensions
#' @details
#' These functions can be used in data set calls for selecting columns and filtering rows. They are heavily inspired by the [Tidyverse selection helpers][tidyselect::language] such as [`everything()`][tidyselect::everything()], but also work in base \R and not only in `dplyr` verbs. Nonetheless, they are very convenient to use with `dplyr` functions such as [`select()`][dplyr::select()], [`filter()`][dplyr::filter()] and [`summarise()`][dplyr::summarise()], see *Examples*.
#' These functions can be used in data set calls for selecting columns and filtering rows. They work with base \R, the Tidyverse, and `data.table`. They are heavily inspired by the [Tidyverse selection helpers][tidyselect::language] such as [`everything()`][tidyselect::everything()], but are not limited to `dplyr` verbs. Nonetheless, they are very convenient to use with `dplyr` functions such as [`select()`][dplyr::select()], [`filter()`][dplyr::filter()] and [`summarise()`][dplyr::summarise()], see *Examples*.
#'
#' All columns in the data in which these functions are called will be searched for known antibiotic names, abbreviations, brand names, and codes (ATC, EARS-Net, WHO, etc.) according to the [antibiotics] data set. This means that a selector such as [aminoglycosides()] will pick up column names like 'gen', 'genta', 'J01GB03', 'tobra', 'Tobracin', etc.
#'
@ -53,6 +55,10 @@
#' # `example_isolates` is a data set available in the AMR package.
#' # See ?example_isolates.
#' example_isolates
#'
#'
#' # Examples sections below are split into 'base R', 'dplyr', and 'data.table':
#'
#'
#' # base R ------------------------------------------------------------------
#'
@ -76,7 +82,7 @@
#' # filter with multiple antibiotic selectors using c()
#' example_isolates[all(c(carbapenems(), aminoglycosides()) == "R"), ]
#'
#' # filter + select in one go: get penicillins in carbapenems-resistant strains
#' # filter + select in one go: get penicillins in carbapenem-resistant strains
#' example_isolates[any(carbapenems() == "R"), penicillins()]
#'
#' # You can combine selectors with '&' to be more specific. For example,
@ -86,13 +92,19 @@
#' # and erythromycin is not a penicillin:
#' example_isolates[, penicillins() & administrable_per_os()]
#'
#' # ab_selector() applies a filter in the `antibiotics` data set and is thus very
#' # flexible. For instance, to select antibiotic columns with an oral DDD of at
#' # least 1 gram:
#' # ab_selector() applies a filter in the `antibiotics` data set and is thus
#' # very flexible. For instance, to select antibiotic columns with an oral DDD
#' # of at least 1 gram:
#' example_isolates[, ab_selector(oral_ddd > 1 & oral_units == "g")]
#'
#' # dplyr -------------------------------------------------------------------
#'
#' \donttest{
#' # dplyr -------------------------------------------------------------------
#'
#' if (require("dplyr")) {
#' tibble(kefzol = random_sir(5)) %>%
#' select(cephalosporins())
#' }
#'
#' if (require("dplyr")) {
#' # get AMR for all aminoglycosides e.g., per ward:
#' example_isolates %>%
@ -173,6 +185,35 @@
#' z <- example_isolates %>% filter(if_all(carbapenems(), ~ .x == "R"))
#' identical(x, y) && identical(y, z)
#' }
#'
#'
#' # data.table --------------------------------------------------------------
#'
#' # data.table is supported as well, just use it in the same way as with
#' # base R, but add `with = FALSE` if using a single AB selector:
#'
#' if (require("data.table")) {
#' dt <- as.data.table(example_isolates)
#'
#' print(
#' dt[, carbapenems()] # incorrect, returns column *names*
#' )
#' print(
#' dt[, carbapenems(), with = FALSE] # so `with = FALSE` is required
#' )
#'
#' # for multiple selections or AB selectors, `with = FALSE` is not needed:
#' print(
#' dt[, c("mo", aminoglycosides())]
#' )
#' print(
#' dt[, c(carbapenems(), aminoglycosides())]
#' )
#'
#' # row filters are also supported:
#' print(dt[any(carbapenems() == "S"), ])
#' print(dt[any(carbapenems() == "S"), penicillins(), with = FALSE])
#' }
#' }
ab_class <- function(ab_class,
only_sir_columns = FALSE,

View File

@ -214,7 +214,7 @@ is_new_episode <- function(x, episode_days = NULL, case_free_days = NULL, ...) {
}
exec_episode <- function(x, episode_days, case_free_days, ...) {
stop_if_not(is.null(episode_days) || is.null(case_free_days),
stop_ifnot(is.null(episode_days) || is.null(case_free_days),
"either argument `episode_days` or argument `case_free_days` must be set.",
call = -2
)

View File

@ -43,6 +43,7 @@
#' @param ab_result antibiotic results to test against, must be one or more values of "S", "I", or "R"
#' @param confidence_level the confidence level for the returned confidence interval. For the calculation, the number of S or SI isolates, and R isolates are compared with the total number of available isolates with R, S, or I by using [binom.test()], i.e., the Clopper-Pearson method.
#' @param side the side of the confidence interval to return. The default is `"both"` for a length 2 vector, but can also be (abbreviated as) `"min"`/`"left"`/`"lower"`/`"less"` or `"max"`/`"right"`/`"higher"`/`"greater"`.
#' @param collapse a [logical] to indicate whether the output values should be 'collapsed', i.e. be merged together into one value, or a character value to use for collapsing
#' @inheritSection as.sir Interpretation of SIR
#' @details
#' The function [resistance()] is equal to the function [proportion_R()]. The function [susceptibility()] is equal to the function [proportion_SI()].
@ -112,6 +113,10 @@
#' sir_confidence_interval(example_isolates$AMX,
#' confidence_level = 0.975
#' )
#' sir_confidence_interval(example_isolates$AMX,
#' confidence_level = 0.975,
#' collapse = ", "
#' )
#'
#' # determines %S+I:
#' susceptibility(example_isolates$AMX)
@ -260,10 +265,16 @@ sir_confidence_interval <- function(...,
as_percent = FALSE,
only_all_tested = FALSE,
confidence_level = 0.95,
side = "both") {
side = "both",
collapse = FALSE) {
meet_criteria(ab_result, allow_class = c("character", "sir"), has_length = c(1, 2, 3), is_in = c("S", "I", "R"))
meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE)
meet_criteria(as_percent, allow_class = "logical", has_length = 1)
meet_criteria(only_all_tested, allow_class = "logical", has_length = 1)
meet_criteria(confidence_level, allow_class = "numeric", is_positive = TRUE, has_length = 1)
meet_criteria(side, allow_class = "character", has_length = 1, is_in = c("both", "b", "left", "l", "lower", "lowest", "less", "min", "right", "r", "higher", "highest", "greater", "g", "max"))
meet_criteria(collapse, allow_class = c("logical", "character"), has_length = 1)
x <- tryCatch(
sir_calc(...,
ab_result = ab_result,
@ -281,19 +292,7 @@ sir_confidence_interval <- function(...,
error = function(e) stop_(gsub("in sir_calc(): ", "", e$message, fixed = TRUE), call = -5)
)
if (n < minimum) {
warning_("Introducing NA: ",
ifelse(n == 0, "no", paste("only", n)),
" results available for `sir_confidence_interval()` (`minimum` = ", minimum, ").",
call = FALSE
)
if (as_percent == TRUE) {
return(NA_character_)
} else {
return(NA_real_)
}
}
# this applies the Clopper-Pearson method
out <- stats::binom.test(x = x, n = n, conf.level = confidence_level)$conf.int
out <- set_clean_class(out, "double")
@ -302,11 +301,29 @@ sir_confidence_interval <- function(...,
} else if (side %in% c("right", "r", "higher", "highest", "greater", "g", "max")) {
out <- out[2]
}
if (as_percent == TRUE) {
percentage(out, digits = 1)
if (isTRUE(as_percent)) {
out <- percentage(out, digits = 1)
} else {
out
out <- round(out, digits = 3)
}
if (!isFALSE(collapse) && length(out) > 1) {
out <- paste(out, collapse = ifelse(isTRUE(collapse), "-", collapse))
}
if (n < minimum) {
warning_("Introducing NA: ",
ifelse(n == 0, "no", paste("only", n)),
" results available for `sir_confidence_interval()` (`minimum` = ", minimum, ").",
call = FALSE
)
if (is.character(out)) {
return(NA_character_)
} else {
return(NA_real_)
}
}
out
}
#' @rdname proportion

View File

@ -34,6 +34,8 @@ With the help of contributors from all corners of the world, the `AMR` package i
#### Filtering and selecting data
One of the most powerful functions of this package, aside from calculating and plotting AMR, is selecting and filtering based on antibiotic columns. This can be done using the so-called [antibiotic class selectors](https://msberends.github.io/AMR/reference/antibiotic_class_selectors.html) that work in base R, `dplyr` and `data.table`:
```r
# AMR works great with dplyr, but it's not required or neccesary
library(AMR)
@ -41,8 +43,10 @@ library(dplyr)
example_isolates %>%
mutate(bacteria = mo_fullname()) %>%
# filtering functions for microorganisms:
filter(mo_is_gram_negative(),
mo_is_intrinsic_resistant(ab = "cefotax")) %>%
# antibiotic selectors:
select(bacteria,
aminoglycosides(),
carbapenems())
@ -66,13 +70,18 @@ With only having defined a row filter on Gram-negative bacteria with intrinsic r
A base R equivalent would be:
```r
library(AMR)
example_isolates$bacteria <- mo_fullname(example_isolates$mo)
example_isolates[which(mo_is_gram_negative() &
mo_is_intrinsic_resistant(ab = "cefotax")),
c("bacteria", aminoglycosides(), carbapenems())]
```
This base R snippet will work in any version of R since April 2013 (R-3.0).
This base R code will work in any version of R since April 2013 (R-3.0). Moreover, this code works identically with the `data.table` package, only by starting with:
```r
example_isolates <- data.table::as.data.table(example_isolates)
```
#### Generating antibiograms
@ -131,6 +140,25 @@ antibiogram(example_isolates,
For a manual approach, you can use the `resistance` or `susceptibility()` function:
```r
example_isolates %>%
# group by ward:
group_by(ward) %>%
# calculate AMR using resistance() for gentamicin and tobramycin
# and get their 95% confidence intervals using sir_confidence_interval():
summarise(across(c(GEN, TOB),
list(total_R = resistance,
conf_int = function(x) sir_confidence_interval(x, collapse = "-"))))
```
|ward | GEN_total_R|GEN_conf_int | TOB_total_R|TOB_conf_int |
|:---------:|:----------:|:-----------:|:----------:|:-----------:|
|Clinical | 0.229 |0.205-0.254 | 0.315 |0.284-0.347 |
|ICU | 0.290 |0.253-0.330 | 0.400 |0.353-0.449 |
|Outpatient | 0.200 |0.131-0.285 | 0.368 |0.254-0.493 |
Or use [antibiotic class selectors](https://msberends.github.io/AMR/reference/antibiotic_class_selectors.html) to select a series of antibiotic columns:
```r
library(AMR)
library(dplyr)
@ -138,8 +166,7 @@ library(dplyr)
out <- example_isolates %>%
# group by ward:
group_by(ward) %>%
# calculate AMR using resistance(), over all aminoglycosides
# and polymyxins:
# calculate AMR using resistance(), over all aminoglycosides and polymyxins:
summarise(across(c(aminoglycosides(), polymyxins()),
resistance))
out

View File

@ -118,10 +118,12 @@ not_intrinsic_resistant(
(internally) a \link{character} vector of column names, with additional class \code{"ab_selector"}
}
\description{
These functions allow for filtering rows and selecting columns based on antibiotic test results that are of a specific antibiotic class or group, without the need to define the columns or antibiotic abbreviations. In short, if you have a column name that resembles an antimicrobial drug, it will be picked up by any of these functions that matches its pharmaceutical class: "cefazolin", "CZO" and "J01DB04" will all be picked up by \code{\link[=cephalosporins]{cephalosporins()}}.
These functions allow for filtering rows and selecting columns based on antibiotic test results that are of a specific antibiotic class or group (according to the \link{antibiotics} data set), without the need to define the columns or antibiotic abbreviations.
In short, if you have a column name that resembles an antimicrobial drug, it will be picked up by any of these functions that matches its pharmaceutical class: "cefazolin", "kefzol", "CZO" and "J01DB04" will all be picked up by \code{\link[=cephalosporins]{cephalosporins()}}.
}
\details{
These functions can be used in data set calls for selecting columns and filtering rows. They are heavily inspired by the \link[tidyselect:language]{Tidyverse selection helpers} such as \code{\link[tidyselect:everything]{everything()}}, but also work in base \R and not only in \code{dplyr} verbs. Nonetheless, they are very convenient to use with \code{dplyr} functions such as \code{\link[dplyr:select]{select()}}, \code{\link[dplyr:filter]{filter()}} and \code{\link[dplyr:summarise]{summarise()}}, see \emph{Examples}.
These functions can be used in data set calls for selecting columns and filtering rows. They work with base \R, the Tidyverse, and \code{data.table}. They are heavily inspired by the \link[tidyselect:language]{Tidyverse selection helpers} such as \code{\link[tidyselect:everything]{everything()}}, but are not limited to \code{dplyr} verbs. Nonetheless, they are very convenient to use with \code{dplyr} functions such as \code{\link[dplyr:select]{select()}}, \code{\link[dplyr:filter]{filter()}} and \code{\link[dplyr:summarise]{summarise()}}, see \emph{Examples}.
All columns in the data in which these functions are called will be searched for known antibiotic names, abbreviations, brand names, and codes (ATC, EARS-Net, WHO, etc.) according to the \link{antibiotics} data set. This means that a selector such as \code{\link[=aminoglycosides]{aminoglycosides()}} will pick up column names like 'gen', 'genta', 'J01GB03', 'tobra', 'Tobracin', etc.
@ -174,6 +176,10 @@ All data sets in this \code{AMR} package (about microorganisms, antibiotics, SIR
# See ?example_isolates.
example_isolates
# Examples sections below are split into 'base R', 'dplyr', and 'data.table':
# base R ------------------------------------------------------------------
# select columns 'IPM' (imipenem) and 'MEM' (meropenem)
@ -196,7 +202,7 @@ example_isolates[all(carbapenems()), ]
# filter with multiple antibiotic selectors using c()
example_isolates[all(c(carbapenems(), aminoglycosides()) == "R"), ]
# filter + select in one go: get penicillins in carbapenems-resistant strains
# filter + select in one go: get penicillins in carbapenem-resistant strains
example_isolates[any(carbapenems() == "R"), penicillins()]
# You can combine selectors with '&' to be more specific. For example,
@ -206,13 +212,19 @@ example_isolates[any(carbapenems() == "R"), penicillins()]
# and erythromycin is not a penicillin:
example_isolates[, penicillins() & administrable_per_os()]
# ab_selector() applies a filter in the `antibiotics` data set and is thus very
# flexible. For instance, to select antibiotic columns with an oral DDD of at
# least 1 gram:
# ab_selector() applies a filter in the `antibiotics` data set and is thus
# very flexible. For instance, to select antibiotic columns with an oral DDD
# of at least 1 gram:
example_isolates[, ab_selector(oral_ddd > 1 & oral_units == "g")]
# dplyr -------------------------------------------------------------------
\donttest{
# dplyr -------------------------------------------------------------------
if (require("dplyr")) {
tibble(kefzol = random_sir(5)) \%>\%
select(cephalosporins())
}
if (require("dplyr")) {
# get AMR for all aminoglycosides e.g., per ward:
example_isolates \%>\%
@ -293,5 +305,34 @@ if (require("dplyr")) {
z <- example_isolates \%>\% filter(if_all(carbapenems(), ~ .x == "R"))
identical(x, y) && identical(y, z)
}
# data.table --------------------------------------------------------------
# data.table is supported as well, just use it in the same way as with
# base R, but add `with = FALSE` if using a single AB selector:
if (require("data.table")) {
dt <- as.data.table(example_isolates)
print(
dt[, carbapenems()] # incorrect, returns column *names*
)
print(
dt[, carbapenems(), with = FALSE] # so `with = FALSE` is required
)
# for multiple selections or AB selectors, `with = FALSE` is not needed:
print(
dt[, c("mo", aminoglycosides())]
)
print(
dt[, c(carbapenems(), aminoglycosides())]
)
# row filters are also supported:
print(dt[any(carbapenems() == "S"), ])
print(dt[any(carbapenems() == "S"), penicillins(), with = FALSE])
}
}
}

View File

@ -29,7 +29,8 @@ sir_confidence_interval(
as_percent = FALSE,
only_all_tested = FALSE,
confidence_level = 0.95,
side = "both"
side = "both",
collapse = FALSE
)
proportion_R(..., minimum = 30, as_percent = FALSE, only_all_tested = FALSE)
@ -77,6 +78,8 @@ sir_df(
\item{side}{the side of the confidence interval to return. The default is \code{"both"} for a length 2 vector, but can also be (abbreviated as) \code{"min"}/\code{"left"}/\code{"lower"}/\code{"less"} or \code{"max"}/\code{"right"}/\code{"higher"}/\code{"greater"}.}
\item{collapse}{a \link{logical} to indicate whether the output values should be 'collapsed', i.e. be merged together into one value, or a character value to use for collapsing}
\item{data}{a \link{data.frame} containing columns with class \code{\link{sir}} (see \code{\link[=as.sir]{as.sir()}})}
\item{translate_ab}{a column name of the \link{antibiotics} data set to translate the antibiotic abbreviations to, using \code{\link[=ab_property]{ab_property()}}}
@ -172,6 +175,10 @@ sir_confidence_interval(example_isolates$AMX)
sir_confidence_interval(example_isolates$AMX,
confidence_level = 0.975
)
sir_confidence_interval(example_isolates$AMX,
confidence_level = 0.975,
collapse = ", "
)
# determines \%S+I:
susceptibility(example_isolates$AMX)