From 7ad8635994d67eb6057bb4db178c27a400c84592 Mon Sep 17 00:00:00 2001 From: "Matthijs S. Berends" Date: Sat, 11 Mar 2023 16:54:02 +0100 Subject: [PATCH] documentation for 'data.table' AB selectors --- DESCRIPTION | 2 +- NEWS.md | 2 +- R/aa_helper_functions.R | 3 +- R/ab_selectors.R | 57 ++++++++++++++++++++++++++----- R/get_episode.R | 2 +- R/proportion.R | 51 ++++++++++++++++++--------- index.md | 33 ++++++++++++++++-- man/antibiotic_class_selectors.Rd | 55 +++++++++++++++++++++++++---- man/proportion.Rd | 9 ++++- 9 files changed, 174 insertions(+), 40 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ee5db9dc..38b76c80 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: AMR -Version: 1.8.2.9149 +Version: 1.8.2.9150 Date: 2023-03-11 Title: Antimicrobial Resistance Data Analysis Description: Functions to simplify and standardise antimicrobial resistance (AMR) diff --git a/NEWS.md b/NEWS.md index 86a0ac23..bc4cbd68 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# AMR 1.8.2.9149 +# AMR 1.8.2.9150 *(this beta version will eventually become v2.0! We're happy to reach a new major milestone soon!)* diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R index 00aa0f4f..be75542b 100755 --- a/R/aa_helper_functions.R +++ b/R/aa_helper_functions.R @@ -934,7 +934,7 @@ get_current_data <- function(arg_name, call) { } } - # now go over all underlying environments looking for other dplyr and base R selection environments + # now go over all underlying environments looking for other dplyr, data.table and base R selection environments with_generic <- vapply(FUN.VALUE = logical(1), frms, function(e) !is.null(e$`.Generic`)) for (env in frms[which(with_generic)]) { if (valid_df(env$`.data`)) { @@ -945,6 +945,7 @@ get_current_data <- function(arg_name, call) { return(env$xx) } else if (valid_df(env$x)) { # an element `x` will be in the environment for only cols in base R, e.g. `example_isolates[, carbapenems()]` + # this element will also be present in data.table environments where there's a .Generic available return(env$x) } } diff --git a/R/ab_selectors.R b/R/ab_selectors.R index bc61e7fe..7c042b26 100755 --- a/R/ab_selectors.R +++ b/R/ab_selectors.R @@ -29,14 +29,16 @@ #' Antibiotic Selectors #' -#' These functions allow for filtering rows and selecting columns based on antibiotic test results that are of a specific antibiotic class or group, without the need to define the columns or antibiotic abbreviations. In short, if you have a column name that resembles an antimicrobial drug, it will be picked up by any of these functions that matches its pharmaceutical class: "cefazolin", "CZO" and "J01DB04" will all be picked up by [cephalosporins()]. +#' @description These functions allow for filtering rows and selecting columns based on antibiotic test results that are of a specific antibiotic class or group (according to the [antibiotics] data set), without the need to define the columns or antibiotic abbreviations. +#' +#' In short, if you have a column name that resembles an antimicrobial drug, it will be picked up by any of these functions that matches its pharmaceutical class: "cefazolin", "kefzol", "CZO" and "J01DB04" will all be picked up by [cephalosporins()]. #' @param ab_class an antimicrobial class or a part of it, such as `"carba"` and `"carbapenems"`. The columns `group`, `atc_group1` and `atc_group2` of the [antibiotics] data set will be searched (case-insensitive) for this value. #' @param filter an [expression] to be evaluated in the [antibiotics] data set, such as `name %like% "trim"` #' @param only_sir_columns a [logical] to indicate whether only columns of class `sir` must be selected (default is `FALSE`), see [as.sir()] #' @param only_treatable a [logical] to indicate whether antimicrobial drugs should be excluded that are only for laboratory tests (default is `TRUE`), such as gentamicin-high (`GEH`) and imipenem/EDTA (`IPE`) #' @param ... ignored, only in place to allow future extensions #' @details -#' These functions can be used in data set calls for selecting columns and filtering rows. They are heavily inspired by the [Tidyverse selection helpers][tidyselect::language] such as [`everything()`][tidyselect::everything()], but also work in base \R and not only in `dplyr` verbs. Nonetheless, they are very convenient to use with `dplyr` functions such as [`select()`][dplyr::select()], [`filter()`][dplyr::filter()] and [`summarise()`][dplyr::summarise()], see *Examples*. +#' These functions can be used in data set calls for selecting columns and filtering rows. They work with base \R, the Tidyverse, and `data.table`. They are heavily inspired by the [Tidyverse selection helpers][tidyselect::language] such as [`everything()`][tidyselect::everything()], but are not limited to `dplyr` verbs. Nonetheless, they are very convenient to use with `dplyr` functions such as [`select()`][dplyr::select()], [`filter()`][dplyr::filter()] and [`summarise()`][dplyr::summarise()], see *Examples*. #' #' All columns in the data in which these functions are called will be searched for known antibiotic names, abbreviations, brand names, and codes (ATC, EARS-Net, WHO, etc.) according to the [antibiotics] data set. This means that a selector such as [aminoglycosides()] will pick up column names like 'gen', 'genta', 'J01GB03', 'tobra', 'Tobracin', etc. #' @@ -53,6 +55,10 @@ #' # `example_isolates` is a data set available in the AMR package. #' # See ?example_isolates. #' example_isolates +#' +#' +#' # Examples sections below are split into 'base R', 'dplyr', and 'data.table': +#' #' #' # base R ------------------------------------------------------------------ #' @@ -76,7 +82,7 @@ #' # filter with multiple antibiotic selectors using c() #' example_isolates[all(c(carbapenems(), aminoglycosides()) == "R"), ] #' -#' # filter + select in one go: get penicillins in carbapenems-resistant strains +#' # filter + select in one go: get penicillins in carbapenem-resistant strains #' example_isolates[any(carbapenems() == "R"), penicillins()] #' #' # You can combine selectors with '&' to be more specific. For example, @@ -86,13 +92,19 @@ #' # and erythromycin is not a penicillin: #' example_isolates[, penicillins() & administrable_per_os()] #' -#' # ab_selector() applies a filter in the `antibiotics` data set and is thus very -#' # flexible. For instance, to select antibiotic columns with an oral DDD of at -#' # least 1 gram: +#' # ab_selector() applies a filter in the `antibiotics` data set and is thus +#' # very flexible. For instance, to select antibiotic columns with an oral DDD +#' # of at least 1 gram: #' example_isolates[, ab_selector(oral_ddd > 1 & oral_units == "g")] -#' -#' # dplyr ------------------------------------------------------------------- +#' #' \donttest{ +#' # dplyr ------------------------------------------------------------------- +#' +#' if (require("dplyr")) { +#' tibble(kefzol = random_sir(5)) %>% +#' select(cephalosporins()) +#' } +#' #' if (require("dplyr")) { #' # get AMR for all aminoglycosides e.g., per ward: #' example_isolates %>% @@ -173,6 +185,35 @@ #' z <- example_isolates %>% filter(if_all(carbapenems(), ~ .x == "R")) #' identical(x, y) && identical(y, z) #' } +#' +#' +#' # data.table -------------------------------------------------------------- +#' +#' # data.table is supported as well, just use it in the same way as with +#' # base R, but add `with = FALSE` if using a single AB selector: +#' +#' if (require("data.table")) { +#' dt <- as.data.table(example_isolates) +#' +#' print( +#' dt[, carbapenems()] # incorrect, returns column *names* +#' ) +#' print( +#' dt[, carbapenems(), with = FALSE] # so `with = FALSE` is required +#' ) +#' +#' # for multiple selections or AB selectors, `with = FALSE` is not needed: +#' print( +#' dt[, c("mo", aminoglycosides())] +#' ) +#' print( +#' dt[, c(carbapenems(), aminoglycosides())] +#' ) +#' +#' # row filters are also supported: +#' print(dt[any(carbapenems() == "S"), ]) +#' print(dt[any(carbapenems() == "S"), penicillins(), with = FALSE]) +#' } #' } ab_class <- function(ab_class, only_sir_columns = FALSE, diff --git a/R/get_episode.R b/R/get_episode.R index 584e7c78..a0c807b8 100755 --- a/R/get_episode.R +++ b/R/get_episode.R @@ -214,7 +214,7 @@ is_new_episode <- function(x, episode_days = NULL, case_free_days = NULL, ...) { } exec_episode <- function(x, episode_days, case_free_days, ...) { - stop_if_not(is.null(episode_days) || is.null(case_free_days), + stop_ifnot(is.null(episode_days) || is.null(case_free_days), "either argument `episode_days` or argument `case_free_days` must be set.", call = -2 ) diff --git a/R/proportion.R b/R/proportion.R index 6958a3eb..179bb859 100755 --- a/R/proportion.R +++ b/R/proportion.R @@ -43,6 +43,7 @@ #' @param ab_result antibiotic results to test against, must be one or more values of "S", "I", or "R" #' @param confidence_level the confidence level for the returned confidence interval. For the calculation, the number of S or SI isolates, and R isolates are compared with the total number of available isolates with R, S, or I by using [binom.test()], i.e., the Clopper-Pearson method. #' @param side the side of the confidence interval to return. The default is `"both"` for a length 2 vector, but can also be (abbreviated as) `"min"`/`"left"`/`"lower"`/`"less"` or `"max"`/`"right"`/`"higher"`/`"greater"`. +#' @param collapse a [logical] to indicate whether the output values should be 'collapsed', i.e. be merged together into one value, or a character value to use for collapsing #' @inheritSection as.sir Interpretation of SIR #' @details #' The function [resistance()] is equal to the function [proportion_R()]. The function [susceptibility()] is equal to the function [proportion_SI()]. @@ -112,6 +113,10 @@ #' sir_confidence_interval(example_isolates$AMX, #' confidence_level = 0.975 #' ) +#' sir_confidence_interval(example_isolates$AMX, +#' confidence_level = 0.975, +#' collapse = ", " +#' ) #' #' # determines %S+I: #' susceptibility(example_isolates$AMX) @@ -260,10 +265,16 @@ sir_confidence_interval <- function(..., as_percent = FALSE, only_all_tested = FALSE, confidence_level = 0.95, - side = "both") { + side = "both", + collapse = FALSE) { meet_criteria(ab_result, allow_class = c("character", "sir"), has_length = c(1, 2, 3), is_in = c("S", "I", "R")) + meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE) + meet_criteria(as_percent, allow_class = "logical", has_length = 1) + meet_criteria(only_all_tested, allow_class = "logical", has_length = 1) meet_criteria(confidence_level, allow_class = "numeric", is_positive = TRUE, has_length = 1) meet_criteria(side, allow_class = "character", has_length = 1, is_in = c("both", "b", "left", "l", "lower", "lowest", "less", "min", "right", "r", "higher", "highest", "greater", "g", "max")) + meet_criteria(collapse, allow_class = c("logical", "character"), has_length = 1) + x <- tryCatch( sir_calc(..., ab_result = ab_result, @@ -281,19 +292,7 @@ sir_confidence_interval <- function(..., error = function(e) stop_(gsub("in sir_calc(): ", "", e$message, fixed = TRUE), call = -5) ) - if (n < minimum) { - warning_("Introducing NA: ", - ifelse(n == 0, "no", paste("only", n)), - " results available for `sir_confidence_interval()` (`minimum` = ", minimum, ").", - call = FALSE - ) - if (as_percent == TRUE) { - return(NA_character_) - } else { - return(NA_real_) - } - } - + # this applies the Clopper-Pearson method out <- stats::binom.test(x = x, n = n, conf.level = confidence_level)$conf.int out <- set_clean_class(out, "double") @@ -302,11 +301,29 @@ sir_confidence_interval <- function(..., } else if (side %in% c("right", "r", "higher", "highest", "greater", "g", "max")) { out <- out[2] } - if (as_percent == TRUE) { - percentage(out, digits = 1) + if (isTRUE(as_percent)) { + out <- percentage(out, digits = 1) } else { - out + out <- round(out, digits = 3) } + if (!isFALSE(collapse) && length(out) > 1) { + out <- paste(out, collapse = ifelse(isTRUE(collapse), "-", collapse)) + } + + if (n < minimum) { + warning_("Introducing NA: ", + ifelse(n == 0, "no", paste("only", n)), + " results available for `sir_confidence_interval()` (`minimum` = ", minimum, ").", + call = FALSE + ) + if (is.character(out)) { + return(NA_character_) + } else { + return(NA_real_) + } + } + + out } #' @rdname proportion diff --git a/index.md b/index.md index 794d4777..f3bf14bd 100644 --- a/index.md +++ b/index.md @@ -34,6 +34,8 @@ With the help of contributors from all corners of the world, the `AMR` package i #### Filtering and selecting data +One of the most powerful functions of this package, aside from calculating and plotting AMR, is selecting and filtering based on antibiotic columns. This can be done using the so-called [antibiotic class selectors](https://msberends.github.io/AMR/reference/antibiotic_class_selectors.html) that work in base R, `dplyr` and `data.table`: + ```r # AMR works great with dplyr, but it's not required or neccesary library(AMR) @@ -41,8 +43,10 @@ library(dplyr) example_isolates %>% mutate(bacteria = mo_fullname()) %>% + # filtering functions for microorganisms: filter(mo_is_gram_negative(), mo_is_intrinsic_resistant(ab = "cefotax")) %>% + # antibiotic selectors: select(bacteria, aminoglycosides(), carbapenems()) @@ -66,13 +70,18 @@ With only having defined a row filter on Gram-negative bacteria with intrinsic r A base R equivalent would be: ```r +library(AMR) example_isolates$bacteria <- mo_fullname(example_isolates$mo) example_isolates[which(mo_is_gram_negative() & mo_is_intrinsic_resistant(ab = "cefotax")), c("bacteria", aminoglycosides(), carbapenems())] ``` -This base R snippet will work in any version of R since April 2013 (R-3.0). +This base R code will work in any version of R since April 2013 (R-3.0). Moreover, this code works identically with the `data.table` package, only by starting with: + +```r +example_isolates <- data.table::as.data.table(example_isolates) +``` #### Generating antibiograms @@ -131,6 +140,25 @@ antibiogram(example_isolates, For a manual approach, you can use the `resistance` or `susceptibility()` function: +```r +example_isolates %>% + # group by ward: + group_by(ward) %>% + # calculate AMR using resistance() for gentamicin and tobramycin + # and get their 95% confidence intervals using sir_confidence_interval(): + summarise(across(c(GEN, TOB), + list(total_R = resistance, + conf_int = function(x) sir_confidence_interval(x, collapse = "-")))) +``` + +|ward | GEN_total_R|GEN_conf_int | TOB_total_R|TOB_conf_int | +|:---------:|:----------:|:-----------:|:----------:|:-----------:| +|Clinical | 0.229 |0.205-0.254 | 0.315 |0.284-0.347 | +|ICU | 0.290 |0.253-0.330 | 0.400 |0.353-0.449 | +|Outpatient | 0.200 |0.131-0.285 | 0.368 |0.254-0.493 | + +Or use [antibiotic class selectors](https://msberends.github.io/AMR/reference/antibiotic_class_selectors.html) to select a series of antibiotic columns: + ```r library(AMR) library(dplyr) @@ -138,8 +166,7 @@ library(dplyr) out <- example_isolates %>% # group by ward: group_by(ward) %>% - # calculate AMR using resistance(), over all aminoglycosides - # and polymyxins: + # calculate AMR using resistance(), over all aminoglycosides and polymyxins: summarise(across(c(aminoglycosides(), polymyxins()), resistance)) out diff --git a/man/antibiotic_class_selectors.Rd b/man/antibiotic_class_selectors.Rd index e80177c4..b34605d2 100644 --- a/man/antibiotic_class_selectors.Rd +++ b/man/antibiotic_class_selectors.Rd @@ -118,10 +118,12 @@ not_intrinsic_resistant( (internally) a \link{character} vector of column names, with additional class \code{"ab_selector"} } \description{ -These functions allow for filtering rows and selecting columns based on antibiotic test results that are of a specific antibiotic class or group, without the need to define the columns or antibiotic abbreviations. In short, if you have a column name that resembles an antimicrobial drug, it will be picked up by any of these functions that matches its pharmaceutical class: "cefazolin", "CZO" and "J01DB04" will all be picked up by \code{\link[=cephalosporins]{cephalosporins()}}. +These functions allow for filtering rows and selecting columns based on antibiotic test results that are of a specific antibiotic class or group (according to the \link{antibiotics} data set), without the need to define the columns or antibiotic abbreviations. + +In short, if you have a column name that resembles an antimicrobial drug, it will be picked up by any of these functions that matches its pharmaceutical class: "cefazolin", "kefzol", "CZO" and "J01DB04" will all be picked up by \code{\link[=cephalosporins]{cephalosporins()}}. } \details{ -These functions can be used in data set calls for selecting columns and filtering rows. They are heavily inspired by the \link[tidyselect:language]{Tidyverse selection helpers} such as \code{\link[tidyselect:everything]{everything()}}, but also work in base \R and not only in \code{dplyr} verbs. Nonetheless, they are very convenient to use with \code{dplyr} functions such as \code{\link[dplyr:select]{select()}}, \code{\link[dplyr:filter]{filter()}} and \code{\link[dplyr:summarise]{summarise()}}, see \emph{Examples}. +These functions can be used in data set calls for selecting columns and filtering rows. They work with base \R, the Tidyverse, and \code{data.table}. They are heavily inspired by the \link[tidyselect:language]{Tidyverse selection helpers} such as \code{\link[tidyselect:everything]{everything()}}, but are not limited to \code{dplyr} verbs. Nonetheless, they are very convenient to use with \code{dplyr} functions such as \code{\link[dplyr:select]{select()}}, \code{\link[dplyr:filter]{filter()}} and \code{\link[dplyr:summarise]{summarise()}}, see \emph{Examples}. All columns in the data in which these functions are called will be searched for known antibiotic names, abbreviations, brand names, and codes (ATC, EARS-Net, WHO, etc.) according to the \link{antibiotics} data set. This means that a selector such as \code{\link[=aminoglycosides]{aminoglycosides()}} will pick up column names like 'gen', 'genta', 'J01GB03', 'tobra', 'Tobracin', etc. @@ -174,6 +176,10 @@ All data sets in this \code{AMR} package (about microorganisms, antibiotics, SIR # See ?example_isolates. example_isolates + +# Examples sections below are split into 'base R', 'dplyr', and 'data.table': + + # base R ------------------------------------------------------------------ # select columns 'IPM' (imipenem) and 'MEM' (meropenem) @@ -196,7 +202,7 @@ example_isolates[all(carbapenems()), ] # filter with multiple antibiotic selectors using c() example_isolates[all(c(carbapenems(), aminoglycosides()) == "R"), ] -# filter + select in one go: get penicillins in carbapenems-resistant strains +# filter + select in one go: get penicillins in carbapenem-resistant strains example_isolates[any(carbapenems() == "R"), penicillins()] # You can combine selectors with '&' to be more specific. For example, @@ -206,13 +212,19 @@ example_isolates[any(carbapenems() == "R"), penicillins()] # and erythromycin is not a penicillin: example_isolates[, penicillins() & administrable_per_os()] -# ab_selector() applies a filter in the `antibiotics` data set and is thus very -# flexible. For instance, to select antibiotic columns with an oral DDD of at -# least 1 gram: +# ab_selector() applies a filter in the `antibiotics` data set and is thus +# very flexible. For instance, to select antibiotic columns with an oral DDD +# of at least 1 gram: example_isolates[, ab_selector(oral_ddd > 1 & oral_units == "g")] -# dplyr ------------------------------------------------------------------- \donttest{ +# dplyr ------------------------------------------------------------------- + +if (require("dplyr")) { + tibble(kefzol = random_sir(5)) \%>\% + select(cephalosporins()) +} + if (require("dplyr")) { # get AMR for all aminoglycosides e.g., per ward: example_isolates \%>\% @@ -293,5 +305,34 @@ if (require("dplyr")) { z <- example_isolates \%>\% filter(if_all(carbapenems(), ~ .x == "R")) identical(x, y) && identical(y, z) } + + +# data.table -------------------------------------------------------------- + +# data.table is supported as well, just use it in the same way as with +# base R, but add `with = FALSE` if using a single AB selector: + +if (require("data.table")) { + dt <- as.data.table(example_isolates) + + print( + dt[, carbapenems()] # incorrect, returns column *names* + ) + print( + dt[, carbapenems(), with = FALSE] # so `with = FALSE` is required + ) + + # for multiple selections or AB selectors, `with = FALSE` is not needed: + print( + dt[, c("mo", aminoglycosides())] + ) + print( + dt[, c(carbapenems(), aminoglycosides())] + ) + + # row filters are also supported: + print(dt[any(carbapenems() == "S"), ]) + print(dt[any(carbapenems() == "S"), penicillins(), with = FALSE]) +} } } diff --git a/man/proportion.Rd b/man/proportion.Rd index fedd45fb..da59553f 100644 --- a/man/proportion.Rd +++ b/man/proportion.Rd @@ -29,7 +29,8 @@ sir_confidence_interval( as_percent = FALSE, only_all_tested = FALSE, confidence_level = 0.95, - side = "both" + side = "both", + collapse = FALSE ) proportion_R(..., minimum = 30, as_percent = FALSE, only_all_tested = FALSE) @@ -77,6 +78,8 @@ sir_df( \item{side}{the side of the confidence interval to return. The default is \code{"both"} for a length 2 vector, but can also be (abbreviated as) \code{"min"}/\code{"left"}/\code{"lower"}/\code{"less"} or \code{"max"}/\code{"right"}/\code{"higher"}/\code{"greater"}.} +\item{collapse}{a \link{logical} to indicate whether the output values should be 'collapsed', i.e. be merged together into one value, or a character value to use for collapsing} + \item{data}{a \link{data.frame} containing columns with class \code{\link{sir}} (see \code{\link[=as.sir]{as.sir()}})} \item{translate_ab}{a column name of the \link{antibiotics} data set to translate the antibiotic abbreviations to, using \code{\link[=ab_property]{ab_property()}}} @@ -172,6 +175,10 @@ sir_confidence_interval(example_isolates$AMX) sir_confidence_interval(example_isolates$AMX, confidence_level = 0.975 ) +sir_confidence_interval(example_isolates$AMX, + confidence_level = 0.975, + collapse = ", " +) # determines \%S+I: susceptibility(example_isolates$AMX)