mirror of
https://github.com/msberends/AMR.git
synced 2025-07-09 21:01:58 +02:00
(v2.1.1.9126) implemented WISCA! Also added top_n_microorganisms()
and fixed Python wrapper
This commit is contained in:
606
R/antibiogram.R
606
R/antibiogram.R
@ -29,21 +29,28 @@
|
||||
|
||||
#' Generate Traditional, Combination, Syndromic, or WISCA Antibiograms
|
||||
#'
|
||||
#' Create detailed antibiograms with options for traditional, combination, syndromic, and Bayesian WISCA methods. Based on the approaches of Klinker *et al.*, Barbieri *et al.*, and the Bayesian WISCA model (Weighted-Incidence Syndromic Combination Antibiogram) by Bielicki *et al.*, this function provides flexible output formats including plots and tables, ideal for integration with R Markdown and Quarto reports.
|
||||
#' @param x a [data.frame] containing at least a column with microorganisms and columns with antibiotic results (class 'sir', see [as.sir()])
|
||||
#' @param antibiotics vector of any antibiotic name or code (will be evaluated with [as.ab()], column name of `x`, or (any combinations of) [antimicrobial selectors][antimicrobial_class_selectors] such as [aminoglycosides()] or [carbapenems()]. For combination antibiograms, this can also be set to values separated with `"+"`, such as "TZP+TOB" or "cipro + genta", given that columns resembling such antibiotics exist in `x`. See *Examples*.
|
||||
#' @description
|
||||
#' Create detailed antibiograms with options for traditional, combination, syndromic, and Bayesian WISCA methods.
|
||||
#'
|
||||
#' Adhering to previously described approaches (see *Source*) and especially the Bayesian WISCA model (Weighted-Incidence Syndromic Combination Antibiogram) by Bielicki *et al.*, these functions provides flexible output formats including plots and tables, ideal for integration with R Markdown and Quarto reports.
|
||||
#' @param x a [data.frame] containing at least a column with microorganisms and columns with antimicrobial results (class 'sir', see [as.sir()])
|
||||
#' @param antibiotics vector of any antimicrobial name or code (will be evaluated with [as.ab()], column name of `x`, or (any combinations of) [antimicrobial selectors][antimicrobial_class_selectors] such as [aminoglycosides()] or [carbapenems()]. For combination antibiograms, this can also be set to values separated with `"+"`, such as "TZP+TOB" or "cipro + genta", given that columns resembling such antimicrobials exist in `x`. See *Examples*.
|
||||
#' @param mo_transform a character to transform microorganism input - must be `"name"`, `"shortname"` (default), `"gramstain"`, or one of the column names of the [microorganisms] data set: `r vector_or(colnames(microorganisms), sort = FALSE, quotes = TRUE)`. Can also be `NULL` to not transform the input.
|
||||
#' @param ab_transform a character to transform antibiotic input - must be one of the column names of the [antibiotics] data set (defaults to `"name"`): `r vector_or(colnames(antibiotics), sort = FALSE, quotes = TRUE)`. Can also be `NULL` to not transform the input.
|
||||
#' @param ab_transform a character to transform antimicrobial input - must be one of the column names of the [antibiotics] data set (defaults to `"name"`): `r vector_or(colnames(antibiotics), sort = FALSE, quotes = TRUE)`. Can also be `NULL` to not transform the input.
|
||||
#' @param syndromic_group a column name of `x`, or values calculated to split rows of `x`, e.g. by using [ifelse()] or [`case_when()`][dplyr::case_when()]. See *Examples*.
|
||||
#' @param add_total_n a [logical] to indicate whether total available numbers per pathogen should be added to the table (default is `TRUE`). This will add the lowest and highest number of available isolate per antibiotic (e.g, if for *E. coli* 200 isolates are available for ciprofloxacin and 150 for amoxicillin, the returned number will be "150-200").
|
||||
#' @param only_all_tested (for combination antibiograms): a [logical] to indicate that isolates must be tested for all antibiotics, see *Details*
|
||||
#' @param add_total_n a [logical] to indicate whether total available numbers per pathogen should be added to the table (default is `TRUE`). This will add the lowest and highest number of available isolates per antimicrobial (e.g, if for *E. coli* 200 isolates are available for ciprofloxacin and 150 for amoxicillin, the returned number will be "150-200").
|
||||
#' @param only_all_tested (for combination antibiograms): a [logical] to indicate that isolates must be tested for all antimicrobials, see *Details*
|
||||
#' @param digits number of digits to use for rounding the susceptibility percentage
|
||||
#' @param formatting_type numeric value (1–12) indicating how the 'cells' of the antibiogram table should be formatted. See *Details* > *Formatting Type* for a list of options.
|
||||
#' @param formatting_type numeric value (1–22 for WISCA, 1-12 for non-WISCA) indicating how the 'cells' of the antibiogram table should be formatted. See *Details* > *Formatting Type* for a list of options.
|
||||
#' @param col_mo column name of the names or codes of the microorganisms (see [as.mo()]) - the default is the first column of class [`mo`]. Values will be coerced using [as.mo()].
|
||||
#' @param language language to translate text, which defaults to the system language (see [get_AMR_locale()])
|
||||
#' @param minimum the minimum allowed number of available (tested) isolates. Any isolate count lower than `minimum` will return `NA` with a warning. The default number of `30` isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see *Source*.
|
||||
#' @param combine_SI a [logical] to indicate whether all susceptibility should be determined by results of either S, SDD, or I, instead of only S (default is `TRUE`)
|
||||
#' @param sep a separating character for antibiotic columns in combination antibiograms
|
||||
#' @param sep a separating character for antimicrobial columns in combination antibiograms
|
||||
#' @param wisca a [logical] to indicate whether a Weighted-Incidence Syndromic Combination Antibiogram (WISCA) must be generated (default is `FALSE`). This will use a Bayesian hierarchical model to estimate regimen coverage probabilities using Montecarlo simulations. Set `simulations` to adjust.
|
||||
#' @param simulations (for WISCA) a numerical value to set the number of Montecarlo simulations
|
||||
#' @param conf_interval (for WISCA) a numerical value to set confidence interval (default is `0.95`)
|
||||
#' @param interval_side (for WISCA) the side of the confidence interval, either `"two-tailed"` (default), `"left"` or `"right"`
|
||||
#' @param info a [logical] to indicate info should be printed - the default is `TRUE` only in interactive mode
|
||||
#' @param object an [antibiogram()] object
|
||||
#' @param ... when used in [R Markdown or Quarto][knitr::kable()]: arguments passed on to [knitr::kable()] (otherwise, has no use)
|
||||
@ -51,9 +58,13 @@
|
||||
#'
|
||||
#' **Remember that you should filter your data to let it contain only first isolates!** This is needed to exclude duplicates and to reduce selection bias. Use [first_isolate()] to determine them in your data set with one of the four available algorithms.
|
||||
#'
|
||||
#' For estimating antimicrobial coverage, especially when creating a WISCA, the outcome might become more reliable by only including the top *n* species encountered in the data. You can filter on this top *n* using [top_n_microorganisms()]. For example, use `top_n_microorganisms(your_data, n = 10)` as a pre-processing step to only include the top 10 species in the data.
|
||||
#'
|
||||
#' Using [get_long_numeric_format()], the antibiogram is converted to a long format containing numeric values. This is ideal for e.g. advanced plotting.
|
||||
#'
|
||||
#' ### Formatting Type
|
||||
#'
|
||||
#' The formatting of the 'cells' of the table can be set with the argument `formatting_type`. In these examples, `5` is the susceptibility percentage, `15` the numerator, and `300` the denominator:
|
||||
#' The formatting of the 'cells' of the table can be set with the argument `formatting_type`. In these examples, `5` is the susceptibility percentage (for WISCA: `4-6` indicates the confidence level), `15` the numerator, and `300` the denominator:
|
||||
#'
|
||||
#' 1. 5
|
||||
#' 2. 15
|
||||
@ -64,19 +75,31 @@
|
||||
#' 7. 5 (N=300)
|
||||
#' 8. 5% (N=300)
|
||||
#' 9. 5 (15/300)
|
||||
#' 10. 5% (15/300)
|
||||
#' 10. 5% (15/300) - **default for non-WISCA**
|
||||
#' 11. 5 (N=15/300)
|
||||
#' 12. 5% (N=15/300)
|
||||
#'
|
||||
#' Additional options for WISCA (using `antibiogram(..., wisca = TRUE)` or `wisca()`):
|
||||
#' 13. 5 (4-6)
|
||||
#' 14. 5% (4-6%)
|
||||
#' 15. 5 (4-6,300)
|
||||
#' 16. 5% (4-6%,300)
|
||||
#' 17. 5 (4-6,N=300)
|
||||
#' 18. 5% (4-6%,N=300) - **default for WISCA**
|
||||
#' 19. 5 (4-6,15/300)
|
||||
#' 20. 5% (4-6%,15/300)
|
||||
#' 21. 5 (4-6,N=15/300)
|
||||
#' 22. 5% (4-6%,N=15/300)
|
||||
#'
|
||||
#' The default is `10`, which can be set globally with the package option [`AMR_antibiogram_formatting_type`][AMR-options], e.g. `options(AMR_antibiogram_formatting_type = 5)`.
|
||||
#' The default is `18` for WISCA and `10` for non-WISCA, which can be set globally with the package option [`AMR_antibiogram_formatting_type`][AMR-options], e.g. `options(AMR_antibiogram_formatting_type = 5)`.
|
||||
#'
|
||||
#' Set `digits` (defaults to `0`) to alter the rounding of the susceptibility percentage.
|
||||
#' Set `digits` (defaults to `0`) to alter the rounding of the susceptibility percentages.
|
||||
#'
|
||||
#' ### Antibiogram Types
|
||||
#'
|
||||
#' There are four antibiogram types, as summarised by Klinker *et al.* (2021, \doi{10.1177/20499361211011373}), and they are all supported by [antibiogram()]. Use WISCA whenever possible, since it provides precise coverage estimates by accounting for pathogen incidence and antimicrobial susceptibility. See the section *Why Use WISCA?* on this page.
|
||||
#' There are various antibiogram types, as summarised by Klinker *et al.* (2021, \doi{10.1177/20499361211011373}), and they are all supported by [antibiogram()].
|
||||
#'
|
||||
#' The four antibiogram types:
|
||||
#' **Use WISCA whenever possible**, since it provides more precise coverage estimates by accounting for pathogen incidence and antimicrobial susceptibility, as has been shown by Bielicki *et al.* (2020, \doi{10.1001.jamanetworkopen.2019.21124}). See the section *Why Use WISCA?* on this page.
|
||||
#'
|
||||
#' 1. **Traditional Antibiogram**
|
||||
#'
|
||||
@ -114,28 +137,35 @@
|
||||
#'
|
||||
#' 4. **Weighted-Incidence Syndromic Combination Antibiogram (WISCA)**
|
||||
#'
|
||||
#' WISCA enhances empirical antibiotic selection by weighting the incidence of pathogens in specific clinical syndromes and combining them with their susceptibility data. It provides an estimation of regimen coverage by aggregating pathogen incidences and susceptibilities across potential causative organisms. See also the section *Why Use WISCA?* on this page.
|
||||
#'
|
||||
#' Case example: Susceptibility of *Pseudomonas aeruginosa* to TZP among respiratory specimens (obtained among ICU patients only) for male patients age >=65 years with heart failure
|
||||
#' WISCA can be applied to any antibiogram, see the section *Why Use WISCA?* on this page for more information.
|
||||
#'
|
||||
#' Code example:
|
||||
#'
|
||||
#' ```r
|
||||
#' library(dplyr)
|
||||
#' your_data %>%
|
||||
#' filter(ward == "ICU" & specimen_type == "Respiratory") %>%
|
||||
#' antibiogram(antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"),
|
||||
#' syndromic_group = ifelse(.$age >= 65 &
|
||||
#' .$gender == "Male" &
|
||||
#' .$condition == "Heart Disease",
|
||||
#' "Study Group", "Control Group"))
|
||||
#' antibiogram(your_data,
|
||||
#' antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"),
|
||||
#' wisca = TRUE)
|
||||
#'
|
||||
#' # this is equal to:
|
||||
#' wisca(your_data,
|
||||
#' antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"))
|
||||
#' ```
|
||||
#'
|
||||
#' WISCA uses a sophisticated Bayesian decision model to combine both local and pooled antimicrobial resistance data. This approach not only evaluates local patterns but can also draw on multi-centre datasets to improve regimen accuracy, even in low-incidence infections like paediatric bloodstream infections (BSIs).
|
||||
#'
|
||||
#' Grouped [tibbles][tibble::tibble] can also be used to calculate susceptibilities over various groups.
|
||||
#'
|
||||
#' Code example:
|
||||
#'
|
||||
#' ```r
|
||||
#' your_data %>%
|
||||
#' group_by(has_sepsis, is_neonate, sex) %>%
|
||||
#' wisca(antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"))
|
||||
#' ```
|
||||
#'
|
||||
#' ### Inclusion in Combination Antibiogram and Syndromic Antibiogram
|
||||
#'
|
||||
#' Note that for types 2 and 3 (Combination Antibiogram and Syndromic Antibiogram), it is important to realise that susceptibility can be calculated in two ways, which can be set with the `only_all_tested` argument (default is `FALSE`). See this example for two antibiotics, Drug A and Drug B, about how [antibiogram()] works to calculate the %SI:
|
||||
#' Note that for types 2 and 3 (Combination Antibiogram and Syndromic Antibiogram), it is important to realise that susceptibility can be calculated in two ways, which can be set with the `only_all_tested` argument (default is `FALSE`). See this example for two antimicrobials, Drug A and Drug B, about how [antibiogram()] works to calculate the %SI:
|
||||
#'
|
||||
#' ```
|
||||
#' --------------------------------------------------------------------
|
||||
@ -165,18 +195,57 @@
|
||||
#' You can also use functions from specific 'table reporting' packages to transform the output of [antibiogram()] to your needs, e.g. with `flextable::as_flextable()` or `gt::gt()`.
|
||||
#'
|
||||
#' @section Why Use WISCA?:
|
||||
#' WISCA is a powerful tool for guiding empirical antibiotic therapy because it provides precise coverage estimates by accounting for pathogen incidence and antimicrobial susceptibility. This is particularly important in empirical treatment, where the causative pathogen is often unknown at the outset. Traditional antibiograms do not reflect the weighted likelihood of specific pathogens based on clinical syndromes, which can lead to suboptimal treatment choices.
|
||||
#' WISCA, as outlined by Barbieri *et al.* (\doi{10.1186/s13756-021-00939-2}), stands for
|
||||
#' Weighted-Incidence Syndromic Combination Antibiogram, which estimates the probability
|
||||
#' of adequate empirical antimicrobial regimen coverage for specific infection syndromes.
|
||||
#' This method leverages a Bayesian hierarchical logistic regression framework with random
|
||||
#' effects for pathogens and regimens, enabling robust estimates in the presence of sparse
|
||||
#' data.
|
||||
#'
|
||||
#' The Bayesian model assumes conjugate priors for parameter estimation. For example, the
|
||||
#' coverage probability \ifelse{latex}{\deqn{$theta$}}{$theta$} for a given antimicrobial regimen
|
||||
#' is modeled using a Beta distribution as a prior:
|
||||
#'
|
||||
#' \ifelse{latex}{\deqn{$theta$ \sim \text{Beta}($alpha$_0, $beta$_0)}}{
|
||||
#' \ifelse{html}{\figure{beta_prior.png}{options: width="300" alt="Beta prior"}}{$theta$ ~ Beta($alpha$_0, $beta$_0)}}
|
||||
#'
|
||||
#' where \eqn{$alpha$_0} and \eqn{$beta$_0} represent prior successes and failures, respectively,
|
||||
#' informed by expert knowledge or weakly informative priors (e.g., \eqn{$alpha$_0 = 1, $beta$_0 = 1}).
|
||||
#'
|
||||
#' The Bayesian WISCA, as described by Bielicki *et al.* (2016), improves on earlier methods by handling uncertainties common in smaller datasets, such as low-incidence infections. This method offers a significant advantage by:
|
||||
#' The likelihood function is constructed based on observed data, where the number of covered
|
||||
#' cases for a regimen follows a binomial distribution:
|
||||
#'
|
||||
#' \ifelse{latex}{\deqn{y \sim \text{Binomial}(n, $theta$)}}{
|
||||
#' \ifelse{html}{\figure{binomial_likelihood.png}{options: width="300" alt="Binomial likelihood"}}{y ~ Binomial(n, $theta$)}}
|
||||
#'
|
||||
#' Posterior parameter estimates are obtained by combining the prior and likelihood using
|
||||
#' Bayes' theorem. The posterior distribution of \eqn{$theta$} is also a Beta distribution:
|
||||
#'
|
||||
#' \ifelse{latex}{\deqn{$theta$ | y \sim \text{Beta}($alpha$_0 + y, $beta$_0 + n - y)}}{
|
||||
#' \ifelse{html}{\figure{posterior_beta.png}{options: width="300" alt="Beta posterior"}}{$theta$ | y ~ Beta($alpha$_0 + y, $beta$_0 + n - y)}}
|
||||
#'
|
||||
#' For hierarchical modeling, pathogen-level effects (e.g., differences in resistance
|
||||
#' patterns) and regimen-level effects are modelled using Gaussian priors on log-odds.
|
||||
#' This hierarchical structure ensures partial pooling of estimates across groups,
|
||||
#' improving stability in strata with small sample sizes. The model is implemented using
|
||||
#' Hamiltonian Monte Carlo (HMC) sampling.
|
||||
#'
|
||||
#' Stratified results are provided based on covariates such as age, sex, and clinical
|
||||
#' complexity (e.g., prior antimicrobial treatments or renal/urological comorbidities).
|
||||
#' For example, posterior odds ratios (ORs) are derived to quantify the effect of these
|
||||
#' covariates on coverage probabilities:
|
||||
#'
|
||||
#' \ifelse{latex}{\deqn{\text{OR}_{\text{covariate}} = \frac{\exp($beta$_{\text{covariate}})}{\exp($beta$_0)}}}{
|
||||
#' \ifelse{html}{\figure{odds_ratio.png}{options: width="300" alt="Odds ratio formula"}}{OR_covariate = exp(beta_covariate) / exp(beta_0)}}
|
||||
#'
|
||||
#' By combining empirical data with prior knowledge, WISCA overcomes the limitations
|
||||
#' of traditional combination antibiograms, offering disease-specific, patient-stratified
|
||||
#' estimates with robust uncertainty quantification. This tool is invaluable for antimicrobial
|
||||
#' stewardship programs and empirical treatment guideline refinement.
|
||||
#'
|
||||
#' 1. Pooling Data from Multiple Sources:\cr WISCA uses pooled data from multiple hospitals or surveillance sources to overcome limitations of small sample sizes at individual institutions, allowing for more confident selection of narrow-spectrum antibiotics or combinations.
|
||||
#' 2. Bayesian Framework:\cr The Bayesian decision tree model accounts for both local data and prior knowledge (such as inherent resistance patterns) to estimate regimen coverage. It allows for a more precise estimation of coverage, even in cases where susceptibility data is missing or incomplete.
|
||||
#' 3. Incorporating Pathogen and Regimen Uncertainty:\cr WISCA allows clinicians to see the likelihood that an empirical regimen will be effective against all relevant pathogens, taking into account uncertainties related to both pathogen prevalence and antimicrobial resistance. This leads to better-informed, data-driven clinical decisions.
|
||||
#' 4. Scenarios for Optimising Treatment:\cr For hospitals or settings with low-incidence infections, WISCA helps determine whether local data is sufficient or if pooling with external data is necessary. It also identifies statistically significant differences or similarities between antibiotic regimens, enabling clinicians to choose optimal therapies with greater confidence.
|
||||
#'
|
||||
#' WISCA is essential in optimising empirical treatment by shifting away from broad-spectrum antibiotics, which are often overused in empirical settings. By offering precise estimates based on syndromic patterns and pooled data, WISCA supports antimicrobial stewardship by guiding more targeted therapy, reducing unnecessary broad-spectrum use, and combating the rise of antimicrobial resistance.
|
||||
#' @source
|
||||
#' * Bielicki JA *et al.* (2016). **Selecting appropriate empirical antibiotic regimens for paediatric bloodstream infections: application of a Bayesian decision model to local and pooled antimicrobial resistance surveillance data** *Journal of Antimicrobial Chemotherapy* 71(3); \doi{10.1093/jac/dkv397}
|
||||
#' * Bielicki JA *et al.* (2020). **Evaluation of the coverage of 3 antibiotic regimens for neonatal sepsis in the hospital setting across Asian countries** *JAMA Netw Open.* 3(2):e1921124; \doi{10.1001.jamanetworkopen.2019.21124}
|
||||
#' * Klinker KP *et al.* (2021). **Antimicrobial stewardship and antibiograms: importance of moving beyond traditional antibiograms**. *Therapeutic Advances in Infectious Disease*, May 5;8:20499361211011373; \doi{10.1177/20499361211011373}
|
||||
#' * Barbieri E *et al.* (2021). **Development of a Weighted-Incidence Syndromic Combination Antibiogram (WISCA) to guide the choice of the empiric antibiotic treatment for urinary tract infection in paediatric patients: a Bayesian approach** *Antimicrobial Resistance & Infection Control* May 1;10(1):74; \doi{10.1186/s13756-021-00939-2}
|
||||
#' * **M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 5th Edition**, 2022, *Clinical and Laboratory Standards Institute (CLSI)*. <https://clsi.org/standards/products/microbiology/documents/m39/>.
|
||||
@ -246,19 +315,15 @@
|
||||
#' ),
|
||||
#' language = "es"
|
||||
#' )
|
||||
#'
|
||||
#'
|
||||
#' # WISCA antibiogram ----------------------------------------------------
|
||||
#'
|
||||
#'
|
||||
#' # Weighted-incidence syndromic combination antibiogram (WISCA) ---------
|
||||
#'
|
||||
#' # the data set could contain a filter for e.g. respiratory specimens/ICU
|
||||
#' # can be used for any of the above types - just add `wisca = TRUE`
|
||||
#' antibiogram(example_isolates,
|
||||
#' antibiotics = c("AMC", "AMC+CIP", "TZP", "TZP+TOB"),
|
||||
#' antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"),
|
||||
#' mo_transform = "gramstain",
|
||||
#' minimum = 10, # this should be >=30, but now just as example
|
||||
#' syndromic_group = ifelse(example_isolates$age >= 65 &
|
||||
#' example_isolates$gender == "M",
|
||||
#' "WISCA Group 1", "WISCA Group 2"
|
||||
#' )
|
||||
#' wisca = TRUE
|
||||
#' )
|
||||
#'
|
||||
#'
|
||||
@ -306,29 +371,65 @@ antibiogram <- function(x,
|
||||
add_total_n = FALSE,
|
||||
only_all_tested = FALSE,
|
||||
digits = 0,
|
||||
formatting_type = getOption("AMR_antibiogram_formatting_type", 10),
|
||||
formatting_type = getOption("AMR_antibiogram_formatting_type", ifelse(wisca, 18, 10)),
|
||||
col_mo = NULL,
|
||||
language = get_AMR_locale(),
|
||||
minimum = 30,
|
||||
combine_SI = TRUE,
|
||||
sep = " + ",
|
||||
wisca = FALSE,
|
||||
simulations = 1000,
|
||||
conf_interval = 0.95,
|
||||
interval_side = "two-tailed",
|
||||
info = interactive()) {
|
||||
UseMethod("antibiogram")
|
||||
}
|
||||
|
||||
#' @method antibiogram default
|
||||
#' @export
|
||||
antibiogram.default <- function(x,
|
||||
antibiotics = where(is.sir),
|
||||
mo_transform = "shortname",
|
||||
ab_transform = "name",
|
||||
syndromic_group = NULL,
|
||||
add_total_n = FALSE,
|
||||
only_all_tested = FALSE,
|
||||
digits = 0,
|
||||
formatting_type = getOption("AMR_antibiogram_formatting_type", ifelse(wisca, 18, 10)),
|
||||
col_mo = NULL,
|
||||
language = get_AMR_locale(),
|
||||
minimum = 30,
|
||||
combine_SI = TRUE,
|
||||
sep = " + ",
|
||||
wisca = FALSE,
|
||||
simulations = 1000,
|
||||
conf_interval = 0.95,
|
||||
interval_side = "two-tailed",
|
||||
info = interactive()) {
|
||||
meet_criteria(x, allow_class = "data.frame")
|
||||
x <- ascertain_sir_classes(x, "x")
|
||||
meet_criteria(mo_transform, allow_class = "character", has_length = 1, is_in = c("name", "shortname", "gramstain", colnames(AMR::microorganisms)), allow_NULL = TRUE)
|
||||
meet_criteria(ab_transform, allow_class = "character", has_length = 1, is_in = colnames(AMR::antibiotics), allow_NULL = TRUE)
|
||||
if (!is.function(mo_transform)) {
|
||||
meet_criteria(mo_transform, allow_class = "character", has_length = 1, is_in = c("name", "shortname", "gramstain", colnames(AMR::microorganisms)), allow_NULL = TRUE)
|
||||
}
|
||||
if (!is.function(ab_transform)) {
|
||||
meet_criteria(ab_transform, allow_class = "character", has_length = 1, is_in = colnames(AMR::antibiotics), allow_NULL = TRUE)
|
||||
}
|
||||
meet_criteria(syndromic_group, allow_class = "character", allow_NULL = TRUE, allow_NA = TRUE)
|
||||
meet_criteria(add_total_n, allow_class = "logical", has_length = 1)
|
||||
meet_criteria(only_all_tested, allow_class = "logical", has_length = 1)
|
||||
meet_criteria(digits, allow_class = c("numeric", "integer"), has_length = 1, is_finite = TRUE)
|
||||
meet_criteria(formatting_type, allow_class = c("numeric", "integer"), has_length = 1, is_in = c(1:12))
|
||||
meet_criteria(wisca, allow_class = "logical", has_length = 1)
|
||||
meet_criteria(formatting_type, allow_class = c("numeric", "integer"), has_length = 1, is_in = if (wisca == TRUE) c(1:22) else c(1:12))
|
||||
meet_criteria(col_mo, allow_class = "character", has_length = 1, allow_NULL = TRUE, is_in = colnames(x))
|
||||
language <- validate_language(language)
|
||||
meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE)
|
||||
meet_criteria(combine_SI, allow_class = "logical", has_length = 1)
|
||||
meet_criteria(sep, allow_class = "character", has_length = 1)
|
||||
meet_criteria(simulations, allow_class = c("numeric", "integer"), has_length = 1, is_finite = TRUE, is_positive = TRUE)
|
||||
meet_criteria(conf_interval, allow_class = c("numeric", "integer"), has_length = 1, is_finite = TRUE, is_positive = TRUE)
|
||||
meet_criteria(interval_side, allow_class = "character", has_length = 1, is_in = c("two-tailed", "left", "right"))
|
||||
meet_criteria(info, allow_class = "logical", has_length = 1)
|
||||
|
||||
|
||||
# try to find columns based on type
|
||||
if (is.null(col_mo)) {
|
||||
col_mo <- search_type_in_df(x = x, type = "mo", info = interactive())
|
||||
@ -338,6 +439,8 @@ antibiogram <- function(x,
|
||||
x$`.mo` <- x[, col_mo, drop = TRUE]
|
||||
if (is.null(mo_transform)) {
|
||||
# leave as is
|
||||
} else if (is.function(mo_transform)) {
|
||||
x$`.mo` <- mo_transform(x$`.mo`)
|
||||
} else if (mo_transform == "gramstain") {
|
||||
x$`.mo` <- mo_gramstain(x$`.mo`, language = language)
|
||||
} else if (mo_transform == "shortname") {
|
||||
@ -348,7 +451,7 @@ antibiogram <- function(x,
|
||||
x$`.mo` <- mo_property(x$`.mo`, property = mo_transform, language = language)
|
||||
}
|
||||
x$`.mo`[is.na(x$`.mo`)] <- "(??)"
|
||||
|
||||
|
||||
# get syndromic groups
|
||||
if (!is.null(syndromic_group)) {
|
||||
if (length(syndromic_group) == 1 && syndromic_group %in% colnames(x)) {
|
||||
@ -361,9 +464,11 @@ antibiogram <- function(x,
|
||||
} else {
|
||||
has_syndromic_group <- FALSE
|
||||
}
|
||||
|
||||
|
||||
# get antibiotics
|
||||
if (tryCatch(is.character(antibiotics), error = function(e) FALSE)) {
|
||||
ab_trycatch <- tryCatch(colnames(suppressWarnings(x[, antibiotics, drop = FALSE])), error = function(e) NULL)
|
||||
if (is.null(ab_trycatch)) {
|
||||
stop_ifnot(is.character(suppressMessages(antibiotics)), "`antibiotics` must be an antimicrobial selector, or a character vector.")
|
||||
antibiotics.bak <- antibiotics
|
||||
# split antibiotics on separator and make it a list
|
||||
antibiotics <- strsplit(gsub(" ", "", antibiotics), "+", fixed = TRUE)
|
||||
@ -379,11 +484,11 @@ antibiogram <- function(x,
|
||||
out[!is.na(out)]
|
||||
})
|
||||
user_ab <- user_ab[unlist(lapply(user_ab, length)) > 0]
|
||||
|
||||
|
||||
if (length(non_existing) > 0) {
|
||||
warning_("The following antibiotics were not available and ignored: ", vector_and(ab_name(non_existing, language = NULL, tolower = TRUE), quotes = FALSE))
|
||||
}
|
||||
|
||||
|
||||
# make list unique
|
||||
antibiotics <- unique(user_ab)
|
||||
# go through list to set AMR in combinations
|
||||
@ -418,9 +523,9 @@ antibiogram <- function(x,
|
||||
}
|
||||
antibiotics <- unlist(antibiotics)
|
||||
} else {
|
||||
antibiotics <- colnames(suppressWarnings(x[, antibiotics, drop = FALSE]))
|
||||
antibiotics <- ab_trycatch
|
||||
}
|
||||
|
||||
|
||||
if (isTRUE(has_syndromic_group)) {
|
||||
out <- x %pm>%
|
||||
pm_select(.syndromic_group, .mo, antibiotics) %pm>%
|
||||
@ -429,29 +534,107 @@ antibiogram <- function(x,
|
||||
out <- x %pm>%
|
||||
pm_select(.mo, antibiotics)
|
||||
}
|
||||
|
||||
|
||||
|
||||
# get numbers of S, I, R (per group)
|
||||
out <- out %pm>%
|
||||
bug_drug_combinations(
|
||||
col_mo = ".mo",
|
||||
FUN = function(x) x
|
||||
FUN = function(x) x,
|
||||
include_n_rows = TRUE
|
||||
)
|
||||
counts <- out
|
||||
|
||||
|
||||
|
||||
if (wisca == TRUE) {
|
||||
# WISCA ----
|
||||
|
||||
# set up progress bar
|
||||
progress <- progress_ticker(n = NROW(out[which(out$total > 0), , drop = FALSE]),
|
||||
n_min = 10,
|
||||
print = info,
|
||||
title = "Calculating beta/gamma parameters for WISCA")
|
||||
on.exit(close(progress))
|
||||
|
||||
out$percentage = NA_real_
|
||||
out$lower = NA_real_
|
||||
out$upper = NA_real_
|
||||
|
||||
for (i in seq_len(NROW(out))) {
|
||||
if (out$total[i] == 0) {
|
||||
next
|
||||
}
|
||||
progress$tick()
|
||||
out_current <- out[i, , drop = FALSE]
|
||||
priors <- calculate_priors(out_current, combine_SI = combine_SI)
|
||||
|
||||
# Monte Carlo simulation
|
||||
coverage_simulations <- replicate(simulations, {
|
||||
# simulate pathogen incidence
|
||||
# = Dirichlet (Gamma) parameters
|
||||
simulated_incidence <- stats::rgamma(
|
||||
n = length(priors$gamma_posterior),
|
||||
shape = priors$gamma_posterior,
|
||||
rate = 1 # Scale = 1 for gamma
|
||||
)
|
||||
# normalise
|
||||
simulated_incidence <- simulated_incidence / sum(simulated_incidence)
|
||||
|
||||
# simulate susceptibility
|
||||
# = Beta parameters
|
||||
simulated_susceptibility <- stats::rbeta(
|
||||
n = length(priors$beta_posterior_1),
|
||||
shape1 = priors$beta_posterior_1,
|
||||
shape2 = priors$beta_posterior_2
|
||||
)
|
||||
sum(simulated_incidence * simulated_susceptibility)
|
||||
})
|
||||
|
||||
# calculate coverage statistics
|
||||
coverage_mean <- mean(coverage_simulations)
|
||||
if (interval_side == "two-tailed") {
|
||||
probs <- c((1 - conf_interval) / 2, 1 - (1 - conf_interval) / 2)
|
||||
} else if (interval_side == "left") {
|
||||
probs <- c(0, conf_interval)
|
||||
} else if (interval_side == "right") {
|
||||
probs <- c(1 - conf_interval, 1)
|
||||
}
|
||||
coverage_ci <- unname(stats::quantile(coverage_simulations, probs = probs))
|
||||
|
||||
out$percentage[i] <- coverage_mean
|
||||
out$lower[i] <- coverage_ci[1]
|
||||
out$upper[i] <- coverage_ci[2]
|
||||
}
|
||||
# remove progress bar from console
|
||||
close(progress)
|
||||
}
|
||||
|
||||
if (isTRUE(combine_SI)) {
|
||||
out$numerator <- out$S + out$I + out$SDD
|
||||
} else {
|
||||
out$numerator <- out$S
|
||||
}
|
||||
if (all(out$total < minimum, na.rm = TRUE)) {
|
||||
if (all(out$total < minimum, na.rm = TRUE) && wisca == FALSE) {
|
||||
warning_("All combinations had less than `minimum = ", minimum, "` results, returning an empty antibiogram")
|
||||
return(as_original_data_class(data.frame(), class(out), extra_class = "antibiogram"))
|
||||
} else if (any(out$total < minimum, na.rm = TRUE)) {
|
||||
if (isTRUE(info)) {
|
||||
message_("NOTE: ", sum(out$total < minimum, na.rm = TRUE), " combinations had less than `minimum = ", minimum, "` results and were ignored", add_fn = font_red)
|
||||
}
|
||||
out <- out %pm>%
|
||||
subset(total >= minimum)
|
||||
# also for WISCA, refrain from anything below 15 isolates:
|
||||
subset(total > 15)
|
||||
mins <- sum(out$total < minimum, na.rm = TRUE)
|
||||
if (wisca == FALSE) {
|
||||
out <- out %pm>%
|
||||
subset(total >= minimum)
|
||||
if (isTRUE(info) && mins > 0) {
|
||||
message_("NOTE: ", mins, " combinations had less than `minimum = ", minimum, "` results and were ignored", add_fn = font_red)
|
||||
}
|
||||
} else if (isTRUE(info)) {
|
||||
warning_("Number of tested isolates per regimen should exceed ", minimum, ". Coverage estimates will be inaccurate for ", mins, " regimen", ifelse(mins == 1, "", "s"), ".", call = FALSE)
|
||||
}
|
||||
}
|
||||
|
||||
if (NROW(out) == 0) {
|
||||
return(as_original_data_class(data.frame(), class(out), extra_class = "antibiogram"))
|
||||
}
|
||||
|
||||
# regroup for summarising
|
||||
@ -464,6 +647,21 @@ antibiogram <- function(x,
|
||||
pm_group_by(mo, ab)
|
||||
}
|
||||
|
||||
if (wisca == TRUE) {
|
||||
out_numeric <- out %pm>%
|
||||
pm_summarise(percentage = percentage,
|
||||
lower = lower,
|
||||
upper = upper,
|
||||
numerator = numerator,
|
||||
total = total)
|
||||
} else {
|
||||
out_numeric <- out %pm>%
|
||||
pm_summarise(percentage = numerator / total,
|
||||
numerator = numerator,
|
||||
total = total)
|
||||
}
|
||||
|
||||
out$digits <- digits # since pm_sumarise() cannot work with an object outside the current frame
|
||||
# formatting type:
|
||||
# 1. 5
|
||||
# 2. 15
|
||||
@ -477,11 +675,16 @@ antibiogram <- function(x,
|
||||
# 10. 5% (15/300)
|
||||
# 11. 5 (N=15/300)
|
||||
# 12. 5% (N=15/300)
|
||||
out_numeric <- out %pm>%
|
||||
pm_summarise(percentage = numerator / total,
|
||||
numerator = numerator,
|
||||
total = total)
|
||||
out$digits <- digits # since pm_sumarise() cannot work with an object outside the current frame
|
||||
# 13. 5 (4-6)
|
||||
# 14. 5% (4-6%)
|
||||
# 15. 5 (4-6,300)
|
||||
# 16. 5% (4-6%,300)
|
||||
# 17. 5 (4-6,N=300)
|
||||
# 18. 5% (4-6%,N=300)
|
||||
# 19. 5 (4-6,15/300)
|
||||
# 20. 5% (4-6%,15/300)
|
||||
# 21. 5 (4-6,N=15/300)
|
||||
# 22. 5% (4-6%,N=15/300)
|
||||
if (formatting_type == 1) out <- out %pm>% pm_summarise(out_value = round((numerator / total) * 100, digits = digits))
|
||||
if (formatting_type == 2) out <- out %pm>% pm_summarise(out_value = numerator)
|
||||
if (formatting_type == 3) out <- out %pm>% pm_summarise(out_value = total)
|
||||
@ -494,6 +697,12 @@ antibiogram <- function(x,
|
||||
if (formatting_type == 10) out <- out %pm>% pm_summarise(out_value = paste0(round((numerator / total) * 100, digits = digits), "% (", numerator, "/", total, ")"))
|
||||
if (formatting_type == 11) out <- out %pm>% pm_summarise(out_value = paste0(round((numerator / total) * 100, digits = digits), " (N=", numerator, "/", total, ")"))
|
||||
if (formatting_type == 12) out <- out %pm>% pm_summarise(out_value = paste0(round((numerator / total) * 100, digits = digits), "% (N=", numerator, "/", total, ")"))
|
||||
if (formatting_type == 13) out <- out %pm>% pm_summarise(out_value = paste0(round(percentage * 100, digits = digits), " (", round(lower * 100, digits = digits), "-", round(upper * 100, digits = digits), ")"))
|
||||
if (formatting_type == 14) out <- out %pm>% pm_summarise(out_value = paste0(round(percentage * 100, digits = digits), "% (", round(lower * 100, digits = digits), "-", round(upper * 100, digits = digits), "%)"))
|
||||
if (formatting_type == 15) out <- out %pm>% pm_summarise(out_value = paste0(round(percentage * 100, digits = digits), " (", round(lower * 100, digits = digits), "-", round(upper * 100, digits = digits), ",", total, ")"))
|
||||
if (formatting_type == 16) out <- out %pm>% pm_summarise(out_value = paste0(round(percentage * 100, digits = digits), "% (", round(lower * 100, digits = digits), "-", round(upper * 100, digits = digits), "%,", total, ")"))
|
||||
if (formatting_type == 17) out <- out %pm>% pm_summarise(out_value = paste0(round(percentage * 100, digits = digits), " (", round(lower * 100, digits = digits), "-", round(upper * 100, digits = digits), ",N=", total, ")"))
|
||||
if (formatting_type == 18) out <- out %pm>% pm_summarise(out_value = paste0(round(percentage * 100, digits = digits), "% (", round(lower * 100, digits = digits), "-", round(upper * 100, digits = digits), "%,N=", total, ")"))
|
||||
|
||||
# transform names of antibiotics
|
||||
ab_naming_function <- function(x, t, l, s) {
|
||||
@ -503,6 +712,8 @@ antibiogram <- function(x,
|
||||
a <- x[[i]]
|
||||
if (is.null(t)) {
|
||||
# leave as is
|
||||
} else if (is.function(t)) {
|
||||
a <- t(a)
|
||||
} else if (t == "atc") {
|
||||
a <- ab_atc(a, only_first = TRUE, language = l)
|
||||
} else {
|
||||
@ -527,12 +738,12 @@ antibiogram <- function(x,
|
||||
colnames(object) <- gsub("^out_value?[.]", "", colnames(object))
|
||||
return(object)
|
||||
}
|
||||
|
||||
|
||||
# ungroup for long -> wide transformation
|
||||
attr(out, "pm_groups") <- NULL
|
||||
attr(out, "groups") <- NULL
|
||||
class(out) <- class(out)[!class(out) %in% c("grouped_df", "grouped_data")]
|
||||
|
||||
|
||||
if (isTRUE(has_syndromic_group)) {
|
||||
grps <- unique(out$syndromic_group)
|
||||
for (i in seq_len(length(grps))) {
|
||||
@ -559,7 +770,7 @@ antibiogram <- function(x,
|
||||
new_df <- new_df[, c("mo", sort(colnames(new_df)[colnames(new_df) != "mo"])), drop = FALSE]
|
||||
colnames(new_df)[1] <- translate_AMR("Pathogen", language = language)
|
||||
}
|
||||
|
||||
|
||||
# add total N if indicated
|
||||
if (isTRUE(add_total_n)) {
|
||||
if (isTRUE(has_syndromic_group)) {
|
||||
@ -587,20 +798,200 @@ antibiogram <- function(x,
|
||||
colnames(new_df)[edit_col] <- paste(colnames(new_df)[edit_col], "(N min-max)")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
out <- as_original_data_class(new_df, class(x), extra_class = "antibiogram")
|
||||
rownames(out) <- NULL
|
||||
rownames(out_numeric) <- NULL
|
||||
|
||||
structure(out,
|
||||
has_syndromic_group = has_syndromic_group,
|
||||
out_numeric = out_numeric,
|
||||
combine_SI = combine_SI
|
||||
has_syndromic_group = has_syndromic_group,
|
||||
combine_SI = combine_SI,
|
||||
wisca = wisca,
|
||||
conf_interval = conf_interval,
|
||||
out_numeric = as_original_data_class(out_numeric, class(out))
|
||||
)
|
||||
}
|
||||
|
||||
#' @method antibiogram grouped_df
|
||||
#' @export
|
||||
antibiogram.grouped_df <- function(x,
|
||||
antibiotics = where(is.sir),
|
||||
mo_transform = function (...) "no_mo",
|
||||
ab_transform = "name",
|
||||
syndromic_group = NULL,
|
||||
add_total_n = FALSE,
|
||||
only_all_tested = FALSE,
|
||||
digits = 0,
|
||||
formatting_type = getOption("AMR_antibiogram_formatting_type", ifelse(wisca, 18, 10)),
|
||||
col_mo = NULL,
|
||||
language = get_AMR_locale(),
|
||||
minimum = 30,
|
||||
combine_SI = TRUE,
|
||||
sep = " + ",
|
||||
wisca = FALSE,
|
||||
simulations = 1000,
|
||||
conf_interval = 0.95,
|
||||
interval_side = "two-tailed",
|
||||
info = interactive()) {
|
||||
stop_ifnot(is.null(syndromic_group), "`syndromic_group` must not be set if creating an antibiogram using a grouped tibble. The groups will become the variables over which the antimicrobials are calculated, making `syndromic_groups` redundant.", call = FALSE)
|
||||
groups <- attributes(x)$groups
|
||||
n_groups <- NROW(groups)
|
||||
progress <- progress_ticker(n = n_groups,
|
||||
n_min = 5,
|
||||
print = info,
|
||||
title = paste("Calculating AMR for", n_groups, "groups"))
|
||||
on.exit(close(progress))
|
||||
|
||||
for (i in seq_len(n_groups)) {
|
||||
if (i > 1) progress$tick()
|
||||
rows <- unlist(groups[i, ]$.rows)
|
||||
if (length(rows) == 0) {
|
||||
next
|
||||
}
|
||||
|
||||
new_out <- antibiogram(as.data.frame(x)[rows, , drop = FALSE],
|
||||
antibiotics = antibiotics,
|
||||
mo_transform = function(x) "no_mo",
|
||||
ab_transform = ab_transform,
|
||||
syndromic_group = NULL,
|
||||
add_total_n = add_total_n,
|
||||
only_all_tested = only_all_tested,
|
||||
digits = digits,
|
||||
formatting_type = formatting_type,
|
||||
col_mo = col_mo,
|
||||
language = language,
|
||||
minimum = minimum,
|
||||
combine_SI = combine_SI,
|
||||
sep = sep,
|
||||
wisca = wisca,
|
||||
simulations = simulations,
|
||||
conf_interval = conf_interval,
|
||||
interval_side = interval_side,
|
||||
info = i == 1 && info == TRUE)
|
||||
new_out_numeric <- attributes(new_out)$out_numeric
|
||||
|
||||
if (i == 1) progress$tick()
|
||||
|
||||
if (NROW(new_out) == 0) {
|
||||
next
|
||||
}
|
||||
|
||||
# remove first column 'Pathogen' (in whatever language)
|
||||
new_out <- new_out[, -1, drop = FALSE]
|
||||
new_out_numeric <- new_out_numeric[, -1, drop = FALSE]
|
||||
|
||||
# add group names to data set
|
||||
for (col in rev(seq_len(NCOL(groups) - 1))) {
|
||||
col_name <- colnames(groups)[col]
|
||||
col_value <- groups[i, col, drop = TRUE]
|
||||
new_out[, col_name] <- col_value
|
||||
new_out <- new_out[, c(col_name, setdiff(names(new_out), col_name))] # set place to 1st col
|
||||
new_out_numeric[, col_name] <- col_value
|
||||
new_out_numeric <- new_out_numeric[, c(col_name, setdiff(names(new_out_numeric), col_name))] # set place to 1st col
|
||||
}
|
||||
|
||||
if (i == 1) {
|
||||
# the first go
|
||||
out <- new_out
|
||||
out_numeric <- new_out_numeric
|
||||
} else {
|
||||
out <- rbind_AMR(out, new_out)
|
||||
out_numeric <- rbind_AMR(out_numeric, new_out_numeric)
|
||||
}
|
||||
}
|
||||
|
||||
close(progress)
|
||||
|
||||
out <- structure(as_original_data_class(out, class(x), extra_class = "antibiogram"),
|
||||
has_syndromic_group = FALSE,
|
||||
combine_SI = isTRUE(combine_SI),
|
||||
wisca = isTRUE(wisca),
|
||||
conf_interval = conf_interval,
|
||||
out_numeric = as_original_data_class(out_numeric, class(x)))
|
||||
}
|
||||
|
||||
#' @export
|
||||
#' @rdname antibiogram
|
||||
wisca <- function(x,
|
||||
antibiotics = where(is.sir),
|
||||
mo_transform = "shortname",
|
||||
ab_transform = "name",
|
||||
syndromic_group = NULL,
|
||||
add_total_n = FALSE,
|
||||
only_all_tested = FALSE,
|
||||
digits = 0,
|
||||
formatting_type = getOption("AMR_antibiogram_formatting_type", 18),
|
||||
col_mo = NULL,
|
||||
language = get_AMR_locale(),
|
||||
minimum = 30,
|
||||
combine_SI = TRUE,
|
||||
sep = " + ",
|
||||
simulations = 1000,
|
||||
info = interactive()) {
|
||||
antibiogram(x = x,
|
||||
antibiotics = antibiotics,
|
||||
mo_transform = mo_transform,
|
||||
ab_transform = ab_transform,
|
||||
syndromic_group = syndromic_group,
|
||||
add_total_n = add_total_n,
|
||||
only_all_tested = only_all_tested,
|
||||
digits = digits,
|
||||
formatting_type = formatting_type,
|
||||
col_mo = col_mo,
|
||||
language = language,
|
||||
minimum = minimum,
|
||||
combine_SI = combine_SI,
|
||||
sep = sep,
|
||||
wisca = TRUE,
|
||||
simulations = simulations,
|
||||
info = info)
|
||||
}
|
||||
|
||||
#' @export
|
||||
#' @param antibiogram the outcome of [antibiogram()] or [wisca()]
|
||||
#' @rdname antibiogram
|
||||
get_long_numeric_format <- function(antibiogram) {
|
||||
stop_ifnot(inherits(antibiogram, "antibiogram"), "This function only works for the output of `antibiogram()` and `wisca()`.", call = FALSE)
|
||||
attributes(antibiogram)$out_numeric
|
||||
}
|
||||
|
||||
calculate_priors <- function(data, combine_SI = TRUE) {
|
||||
# Ensure data has required columns
|
||||
stopifnot(all(c("mo", "total_rows", "total", "S") %in% colnames(data)))
|
||||
if (combine_SI == TRUE && "I" %in% colnames(data)) {
|
||||
data$S <- data$S + data$I
|
||||
}
|
||||
if (combine_SI == TRUE && "SDD" %in% colnames(data)) {
|
||||
data$S <- data$S + data$SDD
|
||||
}
|
||||
|
||||
# Pathogen incidence (Dirichlet distribution)
|
||||
gamma_prior <- rep(1, length(unique(data$mo))) # Dirichlet prior
|
||||
gamma_posterior <- gamma_prior + data$total_rows # Posterior parameters
|
||||
|
||||
# Regimen susceptibility (Beta distribution)
|
||||
beta_prior <- rep(1, length(unique(data$mo))) # Beta prior
|
||||
r <- data$S # Number of pathogens tested susceptible
|
||||
n <- data$total # Total tested
|
||||
beta_posterior_1 <- beta_prior + r # Posterior alpha
|
||||
beta_posterior_2 <- beta_prior + (n - r) # Posterior beta
|
||||
|
||||
# Return parameters as a list
|
||||
list(
|
||||
gamma_posterior = gamma_posterior,
|
||||
beta_posterior_1 = beta_posterior_1,
|
||||
beta_posterior_2 = beta_posterior_2
|
||||
)
|
||||
}
|
||||
|
||||
# will be exported in R/zzz.R
|
||||
tbl_sum.antibiogram <- function(x, ...) {
|
||||
dims <- paste(format(NROW(x), big.mark = ","), AMR_env$cross_icon, format(NCOL(x), big.mark = ","))
|
||||
names(dims) <- "An Antibiogram"
|
||||
if (isTRUE(attributes(x)$wisca)) {
|
||||
names(dims) <- paste0("An Antibiogram (WISCA / ", attributes(x)$conf_interval * 100, "% CI)")
|
||||
} else {
|
||||
names(dims) <- "An Antibiogram (non-WISCA)"
|
||||
}
|
||||
dims
|
||||
}
|
||||
|
||||
@ -619,6 +1010,9 @@ tbl_format_footer.antibiogram <- function(x, ...) {
|
||||
#' @rdname antibiogram
|
||||
plot.antibiogram <- function(x, ...) {
|
||||
df <- attributes(x)$out_numeric
|
||||
if (!"mo" %in% colnames(df)) {
|
||||
stop_("Plotting antibiograms using plot() is only possible if they were not created using dplyr groups. Consider using `get_long_numeric_format()` to retrieve raw antibiogram values.")
|
||||
}
|
||||
if ("syndromic_group" %in% colnames(df)) {
|
||||
# barplot in base R does not support facets - paste columns together
|
||||
df$mo <- paste(df$mo, "-", df$syndromic_group)
|
||||
@ -629,11 +1023,12 @@ plot.antibiogram <- function(x, ...) {
|
||||
mfrow_old <- graphics::par()$mfrow
|
||||
sqrt_levels <- sqrt(length(mo_levels))
|
||||
graphics::par(mfrow = c(ceiling(sqrt_levels), floor(sqrt_levels)))
|
||||
|
||||
for (i in seq_along(mo_levels)) {
|
||||
mo <- mo_levels[i]
|
||||
df_sub <- df[df$mo == mo, , drop = FALSE]
|
||||
|
||||
barplot(
|
||||
|
||||
bp <- barplot(
|
||||
height = df_sub$percentage * 100,
|
||||
xlab = NULL,
|
||||
ylab = ifelse(isTRUE(attributes(x)$combine_SI), "%SI", "%S"),
|
||||
@ -643,7 +1038,18 @@ plot.antibiogram <- function(x, ...) {
|
||||
main = mo,
|
||||
legend = NULL
|
||||
)
|
||||
|
||||
if (isTRUE(attributes(x)$wisca)) {
|
||||
lower <- df_sub$lower * 100
|
||||
upper <- df_sub$upper * 100
|
||||
arrows(
|
||||
x0 = bp, y0 = lower, # Start of error bar (lower bound)
|
||||
x1 = bp, y1 = upper, # End of error bar (upper bound)
|
||||
angle = 90, code = 3, length = 0.05, col = "black"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
graphics::par(mfrow = mfrow_old)
|
||||
}
|
||||
|
||||
@ -658,19 +1064,20 @@ barplot.antibiogram <- function(height, ...) {
|
||||
# will be exported using s3_register() in R/zzz.R
|
||||
autoplot.antibiogram <- function(object, ...) {
|
||||
df <- attributes(object)$out_numeric
|
||||
ggplot2::ggplot(df) +
|
||||
ggplot2::geom_col(
|
||||
ggplot2::aes(
|
||||
x = ab,
|
||||
y = percentage * 100,
|
||||
fill = if ("syndromic_group" %in% colnames(df)) {
|
||||
syndromic_group
|
||||
} else {
|
||||
NULL
|
||||
}
|
||||
),
|
||||
position = ggplot2::position_dodge2(preserve = "single")
|
||||
) +
|
||||
if (!"mo" %in% colnames(df)) {
|
||||
stop_("Plotting antibiograms using plot() is only possible if they were not created using dplyr groups. Consider using `get_long_numeric_format()` to retrieve raw antibiogram values.")
|
||||
}
|
||||
out <- ggplot2::ggplot(df,
|
||||
mapping = ggplot2::aes(
|
||||
x = ab,
|
||||
y = percentage * 100,
|
||||
fill = if ("syndromic_group" %in% colnames(df)) {
|
||||
syndromic_group
|
||||
} else {
|
||||
NULL
|
||||
}
|
||||
)) +
|
||||
ggplot2::geom_col(position = ggplot2::position_dodge2(preserve = "single")) +
|
||||
ggplot2::facet_wrap("mo") +
|
||||
ggplot2::labs(
|
||||
y = ifelse(isTRUE(attributes(object)$combine_SI), "%SI", "%S"),
|
||||
@ -681,6 +1088,13 @@ autoplot.antibiogram <- function(object, ...) {
|
||||
NULL
|
||||
}
|
||||
)
|
||||
if (isTRUE(attributes(object)$wisca)) {
|
||||
out <- out +
|
||||
ggplot2::geom_errorbar(mapping = ggplot2::aes(ymin = lower * 100, ymax = upper * 100),
|
||||
position = ggplot2::position_dodge2(preserve = "single"),
|
||||
width = 0.5)
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
# will be exported in zzz.R
|
||||
@ -692,17 +1106,17 @@ knit_print.antibiogram <- function(x, italicise = TRUE, na = getOption("knitr.ka
|
||||
stop_ifnot_installed("knitr")
|
||||
meet_criteria(italicise, allow_class = "logical", has_length = 1)
|
||||
meet_criteria(na, allow_class = "character", has_length = 1, allow_NA = TRUE)
|
||||
|
||||
if (isTRUE(italicise)) {
|
||||
|
||||
if (isTRUE(italicise) && "mo" %in% colnames(attributes(x)$out_numeric)) {
|
||||
# make all microorganism names italic, according to nomenclature
|
||||
names_col <- ifelse(isTRUE(attributes(x)$has_syndromic_group), 2, 1)
|
||||
x[[names_col]] <- italicise_taxonomy(x[[names_col]], type = "markdown")
|
||||
}
|
||||
|
||||
|
||||
old_option <- getOption("knitr.kable.NA")
|
||||
options(knitr.kable.NA = na)
|
||||
on.exit(options(knitr.kable.NA = old_option))
|
||||
|
||||
|
||||
out <- paste(c("", "", knitr::kable(x, ..., output = FALSE)), collapse = "\n")
|
||||
knitr::asis_output(out)
|
||||
}
|
||||
|
@ -70,6 +70,7 @@
|
||||
bug_drug_combinations <- function(x,
|
||||
col_mo = NULL,
|
||||
FUN = mo_shortname,
|
||||
include_n_rows = FALSE,
|
||||
...) {
|
||||
meet_criteria(x, allow_class = "data.frame")
|
||||
x <- ascertain_sir_classes(x, "x")
|
||||
@ -110,6 +111,7 @@ bug_drug_combinations <- function(x,
|
||||
I = integer(0),
|
||||
R = integer(0),
|
||||
total = integer(0),
|
||||
total_rows = integer(0),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
if (data_has_groups) {
|
||||
@ -123,8 +125,14 @@ bug_drug_combinations <- function(x,
|
||||
x_mo_filter <- x[which(x[, col_mo, drop = TRUE] == unique_mo[i]), names(which(vapply(FUN.VALUE = logical(1), x, is.sir))), drop = FALSE]
|
||||
# turn and merge everything
|
||||
pivot <- lapply(x_mo_filter, function(x) {
|
||||
m <- as.matrix(table(as.sir(x)))
|
||||
data.frame(S = m["S", ], SDD = m["SDD", ], I = m["I", ], R = m["R", ], NI = m["NI", ], stringsAsFactors = FALSE)
|
||||
m <- as.matrix(table(as.sir(x), useNA = "always"))
|
||||
data.frame(S = m["S", ],
|
||||
SDD = m["SDD", ],
|
||||
I = m["I", ],
|
||||
R = m["R", ],
|
||||
NI = m["NI", ],
|
||||
na = m[which(is.na(rownames(m))), ],
|
||||
stringsAsFactors = FALSE)
|
||||
})
|
||||
merged <- do.call(rbind_AMR, pivot)
|
||||
out_group <- data.frame(
|
||||
@ -136,6 +144,7 @@ bug_drug_combinations <- function(x,
|
||||
R = merged$R,
|
||||
NI = merged$NI,
|
||||
total = merged$S + merged$SDD + merged$I + merged$R + merged$NI,
|
||||
total_rows = merged$S + merged$SDD + merged$I + merged$R + merged$NI + merged$na,
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
if (data_has_groups) {
|
||||
@ -162,16 +171,21 @@ bug_drug_combinations <- function(x,
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
|
||||
if (include_n_rows == FALSE) {
|
||||
out <- out[, colnames(out)[colnames(out) != "total_rows"], drop = FALSE]
|
||||
}
|
||||
|
||||
if (data_has_groups) {
|
||||
out <- apply_group(x, "run_it", groups)
|
||||
} else {
|
||||
out <- run_it(x)
|
||||
}
|
||||
|
||||
out <- out %pm>% pm_arrange(mo, ab)
|
||||
out <- as_original_data_class(out, class(x.bak)) # will remove tibble groups
|
||||
rownames(out) <- NULL
|
||||
structure(out, class = c("bug_drug_combinations", ifelse(data_has_groups, "grouped", character(0)), class(out)))
|
||||
structure(out, class = c("bug_drug_combinations", if(data_has_groups) "grouped" else NULL, class(out)))
|
||||
}
|
||||
|
||||
#' @method format bug_drug_combinations
|
||||
|
@ -51,6 +51,8 @@
|
||||
#' @param include_untested_sir a [logical] to indicate whether also rows without antibiotic results are still eligible for becoming a first isolate. Use `include_untested_sir = FALSE` to always return `FALSE` for such rows. This checks the data set for columns of class `sir` and consequently requires transforming columns with antibiotic results using [as.sir()] first.
|
||||
#' @param ... arguments passed on to [first_isolate()] when using [filter_first_isolate()], otherwise arguments passed on to [key_antimicrobials()] (such as `universal`, `gram_negative`, `gram_positive`)
|
||||
#' @details
|
||||
#' The methodology implemented in these functions is based on the research overview by Hindler *et al.* (2007, \doi{10.1086/511864}) and the recommendations outlined in the [CLSI Guideline M39](https://clsi.org/standards/products/microbiology/documents/m39).
|
||||
|
||||
#' To conduct epidemiological analyses on antimicrobial resistance data, only so-called first isolates should be included to prevent overestimation and underestimation of antimicrobial resistance. Different methods can be used to do so, see below.
|
||||
#'
|
||||
#' These functions are context-aware. This means that the `x` argument can be left blank if used inside a [data.frame] call, see *Examples*.
|
||||
@ -61,7 +63,7 @@
|
||||
#'
|
||||
#' ### Different methods
|
||||
#'
|
||||
#' According to Hindler *et al.* (2007, \doi{10.1086/511864}), there are different methods (algorithms) to select first isolates with increasing reliability: isolate-based, patient-based, episode-based and phenotype-based. All methods select on a combination of the taxonomic genus and species (not subspecies).
|
||||
#' According to previously-mentioned sources, there are different methods (algorithms) to select first isolates with increasing reliability: isolate-based, patient-based, episode-based and phenotype-based. All methods select on a combination of the taxonomic genus and species (not subspecies).
|
||||
#'
|
||||
#' All mentioned methods are covered in the [first_isolate()] function:
|
||||
#'
|
||||
@ -93,11 +95,11 @@
|
||||
#'
|
||||
#' ### Patient-based
|
||||
#'
|
||||
#' To include every genus-species combination per patient once, set the `episode_days` to `Inf`. Although often inappropriate, this method makes sure that no duplicate isolates are selected from the same patient. In a large longitudinal data set, this could mean that isolates are *excluded* that were found years after the initial isolate.
|
||||
#' To include every genus-species combination per patient once, set the `episode_days` to `Inf`. This method makes sure that no duplicate isolates are selected from the same patient. This method is preferred to e.g. identify the first MRSA finding of each patient to determine the incidence. Conversely, in a large longitudinal data set, this could mean that isolates are *excluded* that were found years after the initial isolate.
|
||||
#'
|
||||
#' ### Episode-based
|
||||
#'
|
||||
#' To include every genus-species combination per patient episode once, set the `episode_days` to a sensible number of days. Depending on the type of analysis, this could be 14, 30, 60 or 365. Short episodes are common for analysing specific hospital or ward data, long episodes are common for analysing regional and national data.
|
||||
#' To include every genus-species combination per patient episode once, set the `episode_days` to a sensible number of days. Depending on the type of analysis, this could be 14, 30, 60 or 365. Short episodes are common for analysing specific hospital or ward data or ICU cases, long episodes are common for analysing regional and national data.
|
||||
#'
|
||||
#' This is the most common method to correct for duplicate isolates. Patients are categorised into episodes based on their ID and dates (e.g., the date of specimen receipt or laboratory result). While this is a common method, it does not take into account antimicrobial test results. This means that e.g. a methicillin-resistant *Staphylococcus aureus* (MRSA) isolate cannot be differentiated from a wildtype *Staphylococcus aureus* isolate.
|
||||
#'
|
||||
|
97
R/top_n_microorganisms.R
Executable file
97
R/top_n_microorganisms.R
Executable file
@ -0,0 +1,97 @@
|
||||
# ==================================================================== #
|
||||
# TITLE: #
|
||||
# AMR: An R Package for Working with Antimicrobial Resistance Data #
|
||||
# #
|
||||
# SOURCE CODE: #
|
||||
# https://github.com/msberends/AMR #
|
||||
# #
|
||||
# PLEASE CITE THIS SOFTWARE AS: #
|
||||
# Berends MS, Luz CF, Friedrich AW, et al. (2022). #
|
||||
# AMR: An R Package for Working with Antimicrobial Resistance Data. #
|
||||
# Journal of Statistical Software, 104(3), 1-31. #
|
||||
# https://doi.org/10.18637/jss.v104.i03 #
|
||||
# #
|
||||
# Developed at the University of Groningen and the University Medical #
|
||||
# Center Groningen in The Netherlands, in collaboration with many #
|
||||
# colleagues from around the world, see our website. #
|
||||
# #
|
||||
# This R package is free software; you can freely use and distribute #
|
||||
# it for both personal and commercial purposes under the terms of the #
|
||||
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
|
||||
# the Free Software Foundation. #
|
||||
# We created this package for both routine data analysis and academic #
|
||||
# research and it was publicly released in the hope that it will be #
|
||||
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
|
||||
# #
|
||||
# Visit our website for the full manual and a complete tutorial about #
|
||||
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
|
||||
# ==================================================================== #
|
||||
|
||||
#' Filter Top *n* Microorganisms
|
||||
#'
|
||||
#' This function filters a data set to include only the top *n* microorganisms based on a specified property, such as taxonomic family or genus. For example, it can filter a data set to the top 3 species, or to any species in the top 5 genera, or to the top 3 species in each of the top 5 genera.
|
||||
#' @param x a data frame containing microbial data
|
||||
#' @param n an integer specifying the maximum number of unique values of the `property` to include in the output
|
||||
#' @param property a character string indicating the microorganism property to use for filtering. Must be one of the column names of the [microorganisms] data set: `r vector_or(colnames(microorganisms), sort = FALSE, quotes = TRUE)`. If `NULL`, the raw values from `col_mo` will be used without transformation.
|
||||
#' @param n_for_each an optional integer specifying the maximum number of rows to retain for each value of the selected property. If `NULL`, all rows within the top *n* groups will be included.
|
||||
#' @param col_mo A character string indicating the column in `x` that contains microorganism names or codes. Defaults to the first column of class [`mo`]. Values will be coerced using [as.mo()].
|
||||
#' @param ... Additional arguments passed on to [mo_property()] when `property` is not `NULL`.
|
||||
#' @details This function is useful for preprocessing data before creating [antibiograms][antibiograms()] or other analyses that require focused subsets of microbial data. For example, it can filter a data set to only include isolates from the top 10 species.
|
||||
#' @export
|
||||
#' @seealso [mo_property()], [as.mo()], [antibiogram()]
|
||||
#' @examples
|
||||
#' # filter to the top 3 species:
|
||||
#' top_n_microorganisms(example_isolates,
|
||||
#' n = 3)
|
||||
#'
|
||||
#' # filter to any species in the top 5 genera:
|
||||
#' top_n_microorganisms(example_isolates,
|
||||
#' n = 5, property = "genus")
|
||||
#'
|
||||
#' # filter to the top 3 species in each of the top 5 genera:
|
||||
#' top_n_microorganisms(example_isolates,
|
||||
#' n = 5, property = "genus", n_for_each = 3)
|
||||
top_n_microorganisms <- function(x, n, property = "fullname", n_for_each = NULL, col_mo = NULL, ...) {
|
||||
meet_criteria(x, allow_class = "data.frame") # also checks dimensions to be >0
|
||||
meet_criteria(n, allow_class = c("numeric", "integer"), has_length = 1, is_finite = TRUE, is_positive = TRUE)
|
||||
meet_criteria(property, allow_class = "character", has_length = 1, is_in = colnames(AMR::microorganisms))
|
||||
meet_criteria(n_for_each, allow_class = c("numeric", "integer"), has_length = 1, is_finite = TRUE, is_positive = TRUE, allow_NULL = TRUE)
|
||||
meet_criteria(col_mo, allow_class = "character", has_length = 1, allow_NULL = TRUE, is_in = colnames(x))
|
||||
if (is.null(col_mo)) {
|
||||
col_mo <- search_type_in_df(x = x, type = "mo", info = TRUE)
|
||||
stop_if(is.null(col_mo), "`col_mo` must be set")
|
||||
}
|
||||
|
||||
x.bak <- x
|
||||
|
||||
x[, col_mo] <- as.mo(x[, col_mo, drop = TRUE], keep_synonyms = TRUE)
|
||||
|
||||
if (is.null(property)) {
|
||||
x$prop_val <- x[[col_mo]]
|
||||
} else {
|
||||
x$prop_val <- mo_property(x[[col_mo]], property = property, ...)
|
||||
}
|
||||
counts <- sort(table(x$prop_val), decreasing = TRUE)
|
||||
|
||||
n <- as.integer(n)
|
||||
if (length(counts) < n) {
|
||||
n <- length(counts)
|
||||
}
|
||||
count_values <- names(counts)[seq_len(n)]
|
||||
filtered_rows <- which(x$prop_val %in% count_values)
|
||||
|
||||
if (!is.null(n_for_each)) {
|
||||
n_for_each <- as.integer(n_for_each)
|
||||
filtered_x <- x[filtered_rows, , drop = FALSE]
|
||||
filtered_rows <- do.call(
|
||||
c,
|
||||
lapply(split(filtered_x, filtered_x$prop_val), function(group) {
|
||||
top_values <- names(sort(table(group[[col_mo]]), decreasing = TRUE)[seq_len(n_for_each)])
|
||||
top_values <- top_values[!is.na(top_values)]
|
||||
which(x[[col_mo]] %in% top_values)
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
x.bak[filtered_rows, , drop = FALSE]
|
||||
}
|
Reference in New Issue
Block a user