1
0
mirror of https://github.com/msberends/AMR.git synced 2025-04-22 17:43:56 +02:00
AMR/R/sir.R

2014 lines
98 KiB
R
Executable File

# ==================================================================== #
# TITLE: #
# AMR: An R Package for Working with Antimicrobial Resistance Data #
# #
# SOURCE CODE: #
# https://github.com/msberends/AMR #
# #
# PLEASE CITE THIS SOFTWARE AS: #
# Berends MS, Luz CF, Friedrich AW, et al. (2022). #
# AMR: An R Package for Working with Antimicrobial Resistance Data. #
# Journal of Statistical Software, 104(3), 1-31. #
# https://doi.org/10.18637/jss.v104.i03 #
# #
# Developed at the University of Groningen and the University Medical #
# Center Groningen in The Netherlands, in collaboration with many #
# colleagues from around the world, see our website. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
# #
# Visit our website for the full manual and a complete tutorial about #
# how to conduct AMR data analysis: https://amr-for-r.org #
# ==================================================================== #
#' Interpret MIC and Disk Diffusion as SIR, or Clean Existing SIR Data
#'
#' @description Clean up existing SIR values, or interpret minimum inhibitory concentration (MIC) values and disk diffusion diameters according to EUCAST or CLSI. [as.sir()] transforms the input to a new class [`sir`], which is an ordered [factor] containing the levels `S`, `SDD`, `I`, `R`, `NI`.
#'
#' Breakpoints are currently implemented from EUCAST (`r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST")$guideline)))`) and CLSI (`r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI")$guideline)))`), see *Details*. All breakpoints used for interpretation are available in our [clinical_breakpoints] data set.
#' @rdname as.sir
#' @param x Vector of values (for class [`mic`]: MIC values in mg/L, for class [`disk`]: a disk diffusion radius in millimetres).
#' @param mo A vector (or column name) with [character]s that can be coerced to valid microorganism codes with [as.mo()], can be left empty to determine it automatically.
#' @param ab A vector (or column name) with [character]s that can be coerced to a valid antimicrobial drug code with [as.ab()].
#' @param uti (Urinary Tract Infection) a vector (or column name) with [logical]s (`TRUE` or `FALSE`) to specify whether a UTI specific interpretation from the guideline should be chosen. For using [as.sir()] on a [data.frame], this can also be a column containing [logical]s or when left blank, the data set will be searched for a column 'specimen', and rows within this column containing 'urin' (such as 'urine', 'urina') will be regarded isolates from a UTI. See *Examples*.
#' @inheritParams first_isolate
#' @param guideline A guideline name (or column name) to use for SIR interpretation. Defaults to `r AMR::clinical_breakpoints$guideline[1]` (the latest implemented EUCAST guideline in the [AMR::clinical_breakpoints] data set), but can be set with the package option [`AMR_guideline`][AMR-options]. Currently supports EUCAST (`r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST")$guideline)))`) and CLSI (`r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI")$guideline)))`), see *Details*. Using a column name allows for straightforward interpretation of historical data, which must be analysed in the context of, for example, different years.
#' @param capped_mic_handling A [character] string that controls how MIC values with a cap (i.e., starting with `<`, `<=`, `>`, or `>=`) are interpreted. Supports the following options:
#'
#' `"none"`
#' * `<=` and `>=` are treated as-is.
#' * `<` and `>` are treated as-is.
#'
#' `"conservative"`
#' * `<=` and `>=` return `"NI"` (non-interpretable) if the MIC is within the breakpoint guideline range.
#' * `<` always returns `"S"`, and `>` always returns `"R"`.
#'
#' `"standard"` (default)
#' * `<=` and `>=` return `"NI"` (non-interpretable) if the MIC is within the breakpoint guideline range.
#' * `<` and `>` are treated as-is.
#'
#' `"inverse"`
#' * `<=` and `>=` are treated as-is.
#' * `<` always returns `"S"`, and `>` always returns `"R"`.
#'
#' The default `"standard"` setting ensures cautious handling of uncertain values while preserving interpretability. This option can also be set with the package option [`AMR_capped_mic_handling`][AMR-options].
#' @param add_intrinsic_resistance *(only useful when using a EUCAST guideline)* a [logical] to indicate whether intrinsic antibiotic resistance must also be considered for applicable bug-drug combinations, meaning that e.g. ampicillin will always return "R" in *Klebsiella* species. Determination is based on the [intrinsic_resistant] data set, that itself is based on `r format_eucast_version_nr(3.3)`.
#' @param substitute_missing_r_breakpoint A [logical] to indicate that a missing clinical breakpoints for R (resistant) must be substituted with R - the default is `FALSE`. Some (especially CLSI) breakpoints only have a breakpoint for S, meaning the outcome can only be `"S"` or `NA`. Setting this to `TRUE` will convert the `NA`s to `"R"` only if the R breakpoint is missing. Can also be set with the package option [`AMR_substitute_missing_r_breakpoint`][AMR-options].
#' @param include_screening A [logical] to indicate that clinical breakpoints for screening are allowed - the default is `FALSE`. Can also be set with the package option [`AMR_include_screening`][AMR-options].
#' @param include_PKPD A [logical] to indicate that PK/PD clinical breakpoints must be applied as a last resort - the default is `TRUE`. Can also be set with the package option [`AMR_include_PKPD`][AMR-options].
#' @param breakpoint_type The type of breakpoints to use, either `r vector_or(clinical_breakpoints$type)`. ECOFF stands for Epidemiological Cut-Off values. The default is `"human"`, which can also be set with the package option [`AMR_breakpoint_type`][AMR-options]. If `host` is set to values of veterinary species, this will automatically be set to `"animal"`.
#' @param host A vector (or column name) with [character]s to indicate the host. Only useful for veterinary breakpoints, as it requires `breakpoint_type = "animal"`. The values can be any text resembling the animal species, even in any of the `r length(LANGUAGES_SUPPORTED)` supported languages of this package. For foreign languages, be sure to set the language with [set_AMR_locale()] (though it will be automatically guessed based on the system language).
#' @param verbose A [logical] to indicate that all notes should be printed during interpretation of MIC values or disk diffusion values.
#' @param reference_data A [data.frame] to be used for interpretation, which defaults to the [clinical_breakpoints] data set. Changing this argument allows for using own interpretation guidelines. This argument must contain a data set that is equal in structure to the [clinical_breakpoints] data set (same column names and column types). Please note that the `guideline` argument will be ignored when `reference_data` is manually set.
#' @param threshold Maximum fraction of invalid antimicrobial interpretations of `x`, see *Examples*.
#' @param conserve_capped_values Deprecated, use `capped_mic_handling` instead.
#' @param ... For using on a [data.frame]: names of columns to apply [as.sir()] on (supports tidy selection such as `column1:column4`). Otherwise: arguments passed on to methods.
#' @details
#' *Note: The clinical breakpoints in this package were validated through, and imported from, [WHONET](https://whonet.org). The public use of this `AMR` package has been endorsed by both CLSI and EUCAST. See [clinical_breakpoints] for more information.*
#'
#' ### How it Works
#'
#' The [as.sir()] function can work in four ways:
#'
#' 1. For **cleaning raw / untransformed data**. The data will be cleaned to only contain valid values, namely: **S** for susceptible, **I** for intermediate or 'susceptible, increased exposure', **R** for resistant, **NI** for non-interpretable, and **SDD** for susceptible dose-dependent. Each of these can be set using a [regular expression][base::regex]. Furthermore, [as.sir()] will try its best to clean with some intelligence. For example, mixed values with SIR interpretations and MIC values such as `"<0.25; S"` will be coerced to `"S"`. Combined interpretations for multiple test methods (as seen in laboratory records) such as `"S; S"` will be coerced to `"S"`, but a value like `"S; I"` will return `NA` with a warning that the input is invalid.
#'
#' 2. For **interpreting minimum inhibitory concentration (MIC) values** according to EUCAST or CLSI. You must clean your MIC values first using [as.mic()], that also gives your columns the new data class [`mic`]. Also, be sure to have a column with microorganism names or codes. It will be found automatically, but can be set manually using the `mo` argument.
#' * Example to apply using `dplyr`:
#' ```r
#' your_data %>% mutate_if(is.mic, as.sir)
#' your_data %>% mutate(across(where(is.mic), as.sir))
#' your_data %>% mutate_if(is.mic, as.sir, ab = "column_with_antibiotics", mo = "column_with_microorganisms")
#' your_data %>% mutate_if(is.mic, as.sir, ab = c("cipro", "ampicillin", ...), mo = c("E. coli", "K. pneumoniae", ...))
#'
#' # for veterinary breakpoints, also set `host`:
#' your_data %>% mutate_if(is.mic, as.sir, host = "column_with_animal_species", guideline = "CLSI")
#' ```
#' * Operators like "<=" will be stripped before interpretation. When using `capped_mic_handling = "conservative"`, an MIC value of e.g. ">2" will always return "R", even if the breakpoint according to the chosen guideline is ">=4". This is to prevent that capped values from raw laboratory data would not be treated conservatively. The default behaviour (`capped_mic_handling = "standard"`) considers ">2" to be lower than ">=4" and might in this case return "S" or "I".
#' * **Note:** When using CLSI as the guideline, MIC values must be log2-based doubling dilutions. Values not in this format, will be automatically rounded up to the nearest log2 level as CLSI instructs, and a warning will be thrown.
#'
#' 3. For **interpreting disk diffusion diameters** according to EUCAST or CLSI. You must clean your disk zones first using [as.disk()], that also gives your columns the new data class [`disk`]. Also, be sure to have a column with microorganism names or codes. It will be found automatically, but can be set manually using the `mo` argument.
#' * Example to apply using `dplyr`:
#' ```r
#' your_data %>% mutate_if(is.disk, as.sir)
#' your_data %>% mutate(across(where(is.disk), as.sir))
#' your_data %>% mutate_if(is.disk, as.sir, ab = "column_with_antibiotics", mo = "column_with_microorganisms")
#' your_data %>% mutate_if(is.disk, as.sir, ab = c("cipro", "ampicillin", ...), mo = c("E. coli", "K. pneumoniae", ...))
#'
#' # for veterinary breakpoints, also set `host`:
#' your_data %>% mutate_if(is.disk, as.sir, host = "column_with_animal_species", guideline = "CLSI")
#' ```
#'
#' 4. For **interpreting a complete data set**, with automatic determination of MIC values, disk diffusion diameters, microorganism names or codes, and antimicrobial test results. This is done very simply by running `as.sir(your_data)`.
#'
#' **For points 2, 3 and 4: Use [sir_interpretation_history()]** to retrieve a [data.frame] with all results of all previous [as.sir()] calls. It also contains notes about interpretation, and the exact input and output values.
#'
#' ### Supported Guidelines
#'
#' For interpreting MIC values as well as disk diffusion diameters, currently implemented guidelines are:
#' - For **clinical microbiology**: EUCAST `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST" & type == "human")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST" & type == "human")$guideline)))` and CLSI `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "human")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "human")$guideline)))`;
#' - For **veterinary microbiology**: EUCAST `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST" & type == "animal")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST" & type == "animal")$guideline)))` and CLSI `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "animal")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "animal")$guideline)))`;
#' - For **ECOFFs** (Epidemiological Cut-off Values): EUCAST `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST" & type == "ECOFF")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST" & type == "ECOFF")$guideline)))` and CLSI `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "ECOFF")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "ECOFF")$guideline)))`.
#'
#' The `guideline` argument must be set to e.g., ``r paste0('"', subset(AMR::clinical_breakpoints, guideline %like% "EUCAST")$guideline[1], '"')`` or ``r paste0('"', subset(AMR::clinical_breakpoints, guideline %like% "CLSI")$guideline[1], '"')``. By simply using `"EUCAST"` (the default) or `"CLSI"` as input, the latest included version of that guideline will automatically be selected. Importantly, using a column name of your data instead, allows for straightforward interpretation of historical data that must be analysed in the context of, for example, different years.
#'
#' You can set your own data set using the `reference_data` argument. The `guideline` argument will then be ignored.
#'
#' It is also possible to set the default guideline with the package option [`AMR_guideline`][AMR-options] (e.g. in your `.Rprofile` file), such as:
#'
#' ```
#' options(AMR_guideline = "CLSI")
#' options(AMR_guideline = "CLSI 2018")
#' options(AMR_guideline = "EUCAST 2020")
#' # or to reset:
#' options(AMR_guideline = NULL)
#' ```
#'
#' For veterinary guidelines, these might be the best options:
#'
#' ```
#' options(AMR_guideline = "CLSI")
#' options(AMR_breakpoint_type = "animal")
#' ```
#'
###### TODO #187 When applying veterinary breakpoints (by setting `host` or by setting `breakpoint_type = "animal"`), the [CLSI VET09 guideline](https://clsi.org/standards/products/veterinary-medicine/documents/vet09/) will be applied to cope with missing animal species-specific breakpoints.
#'
#' ### After Interpretation
#'
#' After using [as.sir()], you can use the [eucast_rules()] defined by EUCAST to (1) apply inferred susceptibility and resistance based on results of other antimicrobials and (2) apply intrinsic resistance based on taxonomic properties of a microorganism.
#'
#' To determine which isolates are multi-drug resistant, be sure to run [mdro()] (which applies the MDR/PDR/XDR guideline from 2012 at default) on a data set that contains S/I/R values. Read more about [interpreting multidrug-resistant organisms here][mdro()].
#'
#' ### Other
#'
#' The function [is.sir()] detects if the input contains class `sir`. If the input is a [data.frame] or [list], it iterates over all columns/items and returns a [logical] vector.
#'
#' The base R function [as.double()] can be used to retrieve quantitative values from a `sir` object: `"S"` = 1, `"I"`/`"SDD"` = 2, `"R"` = 3. All other values are rendered `NA` . **Note:** Do not use `as.integer()`, since that (because of how R works internally) will return the factor level indices, and not these aforementioned quantitative values.
#'
#' The function [is_sir_eligible()] returns `TRUE` when a column contains at most 5% invalid antimicrobial interpretations (not S and/or I and/or R and/or NI and/or SDD), and `FALSE` otherwise. The threshold of 5% can be set with the `threshold` argument. If the input is a [data.frame], it iterates over all columns and returns a [logical] vector.
#' @section Interpretation of SIR:
#' In 2019, the European Committee on Antimicrobial Susceptibility Testing (EUCAST) has decided to change the definitions of susceptibility testing categories S, I, and R (<https://www.eucast.org/newsiandr>).
#'
#' This AMR package follows insight; use [susceptibility()] (equal to [proportion_SI()]) to determine antimicrobial susceptibility and [count_susceptible()] (equal to [count_SI()]) to count susceptible isolates.
#' @return Ordered [factor] with new class `sir`
#' @aliases sir
#' @export
#' @seealso [as.mic()], [as.disk()], [as.mo()]
#' @source
#' For interpretations of minimum inhibitory concentration (MIC) values and disk diffusion diameters:
#'
#' - **CLSI M39: Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data**, `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI")$guideline)))`, *Clinical and Laboratory Standards Institute* (CLSI). <https://clsi.org/standards/products/microbiology/documents/m39/>.
#' - **CLSI M100: Performance Standard for Antimicrobial Susceptibility Testing**, `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type != "animal")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type != "animal")$guideline)))`, *Clinical and Laboratory Standards Institute* (CLSI). <https://clsi.org/standards/products/microbiology/documents/m100/>.
#' - **CLSI VET01: Performance Standards for Antimicrobial Disk and Dilution Susceptibility Tests for Bacteria Isolated From Animals**, `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "animal")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "animal")$guideline)))`, *Clinical and Laboratory Standards Institute* (CLSI). <https://clsi.org/standards/products/veterinary-medicine/documents/vet01/>.
###### TODO - **CLSI VET09: Understanding Susceptibility Test Data as a Component of Antimicrobial Stewardship in Veterinary Settings**, `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "animal")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "animal")$guideline)))`, *Clinical and Laboratory Standards Institute* (CLSI). <https://clsi.org/standards/products/veterinary-medicine/documents/vet09/>.
#' - **EUCAST Breakpoint tables for interpretation of MICs and zone diameters**, `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST")$guideline)))`, *European Committee on Antimicrobial Susceptibility Testing* (EUCAST). <https://www.eucast.org/clinical_breakpoints>.
#' - **WHONET** as a source for machine-reading the clinical breakpoints ([read more here](https://amr-for-r.org/reference/clinical_breakpoints.html#imported-from-whonet)), 1989-`r max(as.integer(gsub("[^0-9]", "", AMR::clinical_breakpoints$guideline)))`, *WHO Collaborating Centre for Surveillance of Antimicrobial Resistance*. <https://whonet.org/>.
#'
#' @inheritSection AMR Download Our Reference Data
#' @examples
#' example_isolates
#' summary(example_isolates) # see all SIR results at a glance
#'
#' # For INTERPRETING disk diffusion and MIC values -----------------------
#'
#' # example data sets, with combined MIC values and disk zones
#' df_wide <- data.frame(
#' microorganism = "Escherichia coli",
#' amoxicillin = as.mic(8),
#' cipro = as.mic(0.256),
#' tobra = as.disk(16),
#' genta = as.disk(18),
#' ERY = "R"
#' )
#' df_long <- data.frame(
#' bacteria = rep("Escherichia coli", 4),
#' antibiotic = c("amoxicillin", "cipro", "tobra", "genta"),
#' mics = as.mic(c(0.01, 1, 4, 8)),
#' disks = as.disk(c(6, 10, 14, 18)),
#' guideline = c("EUCAST 2021", "EUCAST 2022", "EUCAST 2023", "EUCAST 2024")
#' )
#'
#' \donttest{
#' ## Using dplyr -------------------------------------------------
#' if (require("dplyr")) {
#' # approaches that all work without additional arguments:
#' df_wide %>% mutate_if(is.mic, as.sir)
#' df_wide %>% mutate_if(function(x) is.mic(x) | is.disk(x), as.sir)
#' df_wide %>% mutate(across(where(is.mic), as.sir))
#' df_wide %>% mutate_at(vars(amoxicillin:tobra), as.sir)
#' df_wide %>% mutate(across(amoxicillin:tobra, as.sir))
#'
#' # approaches that all work with additional arguments:
#' df_long %>%
#' # given a certain data type, e.g. MIC values
#' mutate_if(is.mic, as.sir,
#' mo = "bacteria",
#' ab = "antibiotic",
#' guideline = "guideline"
#' )
#' df_long %>%
#' mutate(across(
#' where(is.mic),
#' function(x) {
#' as.sir(x,
#' mo = "bacteria",
#' ab = "antibiotic",
#' guideline = "CLSI"
#' )
#' }
#' ))
#' df_wide %>%
#' # given certain columns, e.g. from 'cipro' to 'genta'
#' mutate_at(vars(cipro:genta), as.sir,
#' mo = "bacteria",
#' guideline = "CLSI"
#' )
#' df_wide %>%
#' mutate(across(
#' cipro:genta,
#' function(x) {
#' as.sir(x,
#' mo = "bacteria",
#' guideline = "CLSI"
#' )
#' }
#' ))
#'
#' # for veterinary breakpoints, add 'host':
#' df_long$animal_species <- c("cats", "dogs", "horses", "cattle")
#' df_long %>%
#' # given a certain data type, e.g. MIC values
#' mutate_if(is.mic, as.sir,
#' mo = "bacteria",
#' ab = "antibiotic",
#' host = "animal_species",
#' guideline = "CLSI"
#' )
#' df_long %>%
#' mutate(across(
#' where(is.mic),
#' function(x) {
#' as.sir(x,
#' mo = "bacteria",
#' ab = "antibiotic",
#' host = "animal_species",
#' guideline = "CLSI"
#' )
#' }
#' ))
#' df_wide %>%
#' mutate_at(vars(cipro:genta), as.sir,
#' mo = "bacteria",
#' ab = "antibiotic",
#' host = "animal_species",
#' guideline = "CLSI"
#' )
#' df_wide %>%
#' mutate(across(
#' cipro:genta,
#' function(x) {
#' as.sir(x,
#' mo = "bacteria",
#' host = "animal_species",
#' guideline = "CLSI"
#' )
#' }
#' ))
#'
#' # to include information about urinary tract infections (UTI)
#' data.frame(
#' mo = "E. coli",
#' nitrofuratoin = c("<= 2", 32),
#' from_the_bladder = c(TRUE, FALSE)
#' ) %>%
#' as.sir(uti = "from_the_bladder")
#'
#' data.frame(
#' mo = "E. coli",
#' nitrofuratoin = c("<= 2", 32),
#' specimen = c("urine", "blood")
#' ) %>%
#' as.sir() # automatically determines urine isolates
#'
#' df_wide %>%
#' mutate_at(vars(cipro:genta), as.sir, mo = "E. coli", uti = TRUE)
#' }
#'
#'
#' ## Using base R ------------------------------------------------
#'
#' as.sir(df_wide)
#'
#' # return a 'logbook' about the results:
#' sir_interpretation_history()
#'
#' # for single values
#' as.sir(
#' x = as.mic(2),
#' mo = as.mo("S. pneumoniae"),
#' ab = "AMP",
#' guideline = "EUCAST"
#' )
#'
#' as.sir(
#' x = as.disk(18),
#' mo = "Strep pneu", # `mo` will be coerced with as.mo()
#' ab = "ampicillin", # and `ab` with as.ab()
#' guideline = "EUCAST"
#' )
#'
#'
#' # For CLEANING existing SIR values ------------------------------------
#'
#' as.sir(c("S", "SDD", "I", "R", "NI", "A", "B", "C"))
#' as.sir("<= 0.002; S") # will return "S"
#' sir_data <- as.sir(c(rep("S", 474), rep("I", 36), rep("R", 370)))
#' is.sir(sir_data)
#' plot(sir_data) # for percentages
#' barplot(sir_data) # for frequencies
#'
#' # as common in R, you can use as.integer() to return factor indices:
#' as.integer(as.sir(c("S", "SDD", "I", "R", "NI", NA)))
#' # but for computational use, as.double() will return 1 for S, 2 for I/SDD, and 3 for R:
#' as.double(as.sir(c("S", "SDD", "I", "R", "NI", NA)))
#'
#' # the dplyr way
#' if (require("dplyr")) {
#' example_isolates %>%
#' mutate_at(vars(PEN:RIF), as.sir)
#' # same:
#' example_isolates %>%
#' as.sir(PEN:RIF)
#'
#' # fastest way to transform all columns with already valid AMR results to class `sir`:
#' example_isolates %>%
#' mutate_if(is_sir_eligible, as.sir)
#'
#' # since dplyr 1.0.0, this can also be:
#' # example_isolates %>%
#' # mutate(across(where(is_sir_eligible), as.sir))
#' }
#' }
as.sir <- function(x, ...) {
UseMethod("as.sir")
}
as_sir_structure <- function(x,
guideline = NULL,
mo = NULL,
ab = NULL,
method = NULL,
ref_tbl = NULL,
ref_breakpoints = NULL) {
out <- structure(
factor(as.character(unlist(unname(x))),
levels = c("S", "SDD", "I", "R", "NI"),
ordered = TRUE
),
# TODO for #170
# guideline = guideline,
# mo = mo,
# ab = ab,
# method = method,
# ref_tbl = ref_tbl,
# ref_breakpoints = ref_breakpoints,
class = c("sir", "ordered", "factor")
)
}
#' @rdname as.sir
#' @details `NA_sir_` is a missing value of the new `sir` class, analogous to e.g. base \R's [`NA_character_`][base::NA].
#' @format NULL
#' @export
NA_sir_ <- as_sir_structure(NA_character_)
#' @rdname as.sir
#' @export
is.sir <- function(x) {
if (identical(typeof(x), "list")) {
unname(vapply(FUN.VALUE = logical(1), x, is.sir))
} else {
isTRUE(inherits(x, "sir"))
}
}
#' @rdname as.sir
#' @export
is_sir_eligible <- function(x, threshold = 0.05) {
meet_criteria(threshold, allow_class = "numeric", has_length = 1)
if (identical(typeof(x), "list")) {
# iterate this function over all columns
return(unname(vapply(FUN.VALUE = logical(1), x, is_sir_eligible)))
}
stop_if(NCOL(x) > 1, "`x` must be a one-dimensional vector.")
if (any(c(
"numeric",
"integer",
"mo",
"ab",
"Date",
"POSIXt",
"raw",
"hms",
"mic",
"disk"
)
%in% class(x))) {
# no transformation needed
return(FALSE)
} else if (all(x %in% c("S", "SDD", "I", "R", "NI", NA)) & !all(is.na(x))) {
return(TRUE)
} else if (!any(c("S", "SDD", "I", "R", "NI") %in% x, na.rm = TRUE) & !all(is.na(x))) {
return(FALSE)
} else {
x <- x[!is.na(x) & !is.null(x) & !x %in% c("", "-", "NULL")]
if (length(x) == 0) {
# no other values than empty
cur_col <- get_current_column()
if (!is.null(cur_col)) {
ab <- suppressWarnings(as.ab(cur_col, fast_mode = TRUE, info = FALSE))
if (!is.na(ab)) {
# this is a valid antibiotic drug code
message_(
"Column '", font_bold(cur_col), "' is SIR eligible (despite only having empty values), since it seems to be ",
ab_name(ab, language = NULL, tolower = TRUE), " (", ab, ")"
)
return(TRUE)
}
}
# all values empty and no antibiotic col name - return FALSE
return(FALSE)
}
# transform all values and see if it meets the set threshold
checked <- suppressWarnings(as.sir(x))
outcome <- sum(is.na(checked)) / length(x)
outcome <= threshold
}
}
#' @rdname as.sir
#' @export
#' @param S,I,R,NI,SDD A case-independent [regular expression][base::regex] to translate input to this result. This regular expression will be run *after* all non-letters and whitespaces are removed from the input.
#' @param info A [logical] to print information about the process.
# extra param: warn (logical, to never throw a warning)
as.sir.default <- function(x,
S = "^(S|U)+$",
I = "^(I)+$",
R = "^(R)+$",
NI = "^(N|NI|V)+$",
SDD = "^(SDD|D|H)+$",
info = TRUE,
...) {
meet_criteria(S, allow_class = "character", has_length = 1)
meet_criteria(I, allow_class = "character", has_length = 1)
meet_criteria(R, allow_class = "character", has_length = 1)
meet_criteria(NI, allow_class = "character", has_length = 1)
meet_criteria(SDD, allow_class = "character", has_length = 1)
meet_criteria(info, allow_class = "logical", has_length = 1)
if (inherits(x, "sir")) {
return(as_sir_structure(x))
}
x.bak <- x
x <- as.character(x) # this is needed to prevent the vctrs pkg from throwing an error
if (inherits(x.bak, c("numeric", "integer")) && all(x %in% c(1:3, NA))) {
# support haven package for importing e.g., from SPSS - it adds the 'labels' attribute
lbls <- attributes(x.bak)$labels
if (!is.null(lbls) && all(c("S", "I", "R") %in% names(lbls)) && all(c(1:3) %in% lbls)) {
x[x.bak == 1] <- names(lbls[lbls == 1])
x[x.bak == 2] <- names(lbls[lbls == 2])
x[x.bak == 3] <- names(lbls[lbls == 3])
} else {
x[x.bak == 1] <- "S"
x[x.bak == 2] <- "I"
x[x.bak == 3] <- "R"
}
} else if (inherits(x.bak, "character") && all(x %in% c("1", "2", "3", "S", "I", "R", NA_character_))) {
x[x.bak == "1"] <- "S"
x[x.bak == "2"] <- "I"
x[x.bak == "3"] <- "R"
} else if (inherits(x.bak, "character") && all(x %in% c("1", "2", "3", "4", "5", "S", "SDD", "I", "R", "NI", NA_character_))) {
x[x.bak == "1"] <- "S"
x[x.bak == "2"] <- "SDD"
x[x.bak == "3"] <- "I"
x[x.bak == "4"] <- "R"
x[x.bak == "5"] <- "NI"
} else if (!all(is.na(x)) && !identical(levels(x), c("S", "SDD", "I", "R", "NI")) && !all(x %in% c("S", "SDD", "I", "R", "NI", NA))) {
if (all(x %unlike% "(S|I|R)", na.rm = TRUE)) {
# check if they are actually MICs or disks
if (all_valid_mics(x)) {
warning_("in `as.sir()`: input values were guessed to be MIC values - preferably transform them with `as.mic()` before running `as.sir()`.")
return(as.sir(as.mic(x), ...))
} else if (all_valid_disks(x)) {
warning_("in `as.sir()`: input values were guessed to be disk diffusion values - preferably transform them with `as.disk()` before running `as.sir()`.")
return(as.sir(as.disk(x), ...))
}
}
# trim leading and trailing spaces, new lines, etc.
x <- trimws2(as.character(unlist(x)))
x[x %in% c(NA, "", "-", "NULL")] <- NA_character_
x.bak <- x
na_before <- length(x[is.na(x)])
# correct for translations
trans_R <- unlist(TRANSLATIONS[
which(TRANSLATIONS$pattern == "Resistant"),
LANGUAGES_SUPPORTED[LANGUAGES_SUPPORTED %in% colnames(TRANSLATIONS)]
])
trans_S <- unlist(TRANSLATIONS[
which(TRANSLATIONS$pattern == "Susceptible"),
LANGUAGES_SUPPORTED[LANGUAGES_SUPPORTED %in% colnames(TRANSLATIONS)]
])
trans_I <- unlist(TRANSLATIONS[
which(TRANSLATIONS$pattern %in% c("Incr. exposure", "Susceptible, incr. exp.", "Intermediate")),
LANGUAGES_SUPPORTED[LANGUAGES_SUPPORTED %in% colnames(TRANSLATIONS)]
])
x <- gsub(paste0(unique(trans_R[!is.na(trans_R)]), collapse = "|"), "R", x, ignore.case = TRUE)
x <- gsub(paste0(unique(trans_S[!is.na(trans_S)]), collapse = "|"), "S", x, ignore.case = TRUE)
x <- gsub(paste0(unique(trans_I[!is.na(trans_I)]), collapse = "|"), "I", x, ignore.case = TRUE)
# replace all English textual input
x[x %like% "([^a-z]|^)res(is(tant)?)?"] <- "R"
x[x %like% "([^a-z]|^)sus(cep(tible)?)?"] <- "S"
x[x %like% "not|non"] <- "NI"
x[x %like% "([^a-z]|^)int(er(mediate)?)?|incr.*exp"] <- "I"
x[x %like% "dose"] <- "SDD"
x <- gsub("[^A-Z]+", "", x, perl = TRUE)
# apply regexes set by user
x[x %like% S] <- "S"
x[x %like% I] <- "I"
x[x %like% R] <- "R"
x[x %like% NI] <- "NI"
x[x %like% SDD] <- "SDD"
x[!x %in% c("S", "SDD", "I", "R", "NI")] <- NA_character_
na_after <- length(x[is.na(x) | x == ""])
if (!isFALSE(list(...)$warn)) { # so as.sir(..., warn = FALSE) will never throw a warning
if (na_before != na_after) {
list_missing <- x.bak[is.na(x) & !is.na(x.bak) & x.bak != ""] %pm>%
unique() %pm>%
sort() %pm>%
vector_and(quotes = TRUE)
cur_col <- get_current_column()
warning_("in `as.sir()`: ", na_after - na_before, " result",
ifelse(na_after - na_before > 1, "s", ""),
ifelse(is.null(cur_col), "", paste0(" in index '", cur_col, "'")),
" truncated (",
round(((na_after - na_before) / length(x)) * 100),
"%) that were invalid antimicrobial interpretations: ",
list_missing,
call = FALSE
)
}
}
}
as_sir_structure(x)
}
#' @rdname as.sir
#' @export
as.sir.mic <- function(x,
mo = NULL,
ab = deparse(substitute(x)),
guideline = getOption("AMR_guideline", "EUCAST"),
uti = NULL,
capped_mic_handling = getOption("AMR_capped_mic_handling", "standard"),
add_intrinsic_resistance = FALSE,
reference_data = AMR::clinical_breakpoints,
substitute_missing_r_breakpoint = getOption("AMR_substitute_missing_r_breakpoint", FALSE),
include_screening = getOption("AMR_include_screening", FALSE),
include_PKPD = getOption("AMR_include_PKPD", TRUE),
breakpoint_type = getOption("AMR_breakpoint_type", "human"),
host = NULL,
verbose = FALSE,
info = TRUE,
conserve_capped_values = NULL,
...) {
as_sir_method(
method_short = "mic",
method_long = "MIC values",
x = x,
mo = mo,
ab = ab,
guideline = guideline,
uti = uti,
capped_mic_handling = capped_mic_handling,
add_intrinsic_resistance = add_intrinsic_resistance,
reference_data = reference_data,
substitute_missing_r_breakpoint = substitute_missing_r_breakpoint,
include_screening = include_screening,
include_PKPD = include_PKPD,
breakpoint_type = breakpoint_type,
host = host,
verbose = verbose,
info = info,
conserve_capped_values = conserve_capped_values,
...
)
}
#' @rdname as.sir
#' @export
as.sir.disk <- function(x,
mo = NULL,
ab = deparse(substitute(x)),
guideline = getOption("AMR_guideline", "EUCAST"),
uti = NULL,
add_intrinsic_resistance = FALSE,
reference_data = AMR::clinical_breakpoints,
substitute_missing_r_breakpoint = getOption("AMR_substitute_missing_r_breakpoint", FALSE),
include_screening = getOption("AMR_include_screening", FALSE),
include_PKPD = getOption("AMR_include_PKPD", TRUE),
breakpoint_type = getOption("AMR_breakpoint_type", "human"),
host = NULL,
verbose = FALSE,
info = TRUE,
...) {
as_sir_method(
method_short = "disk",
method_long = "disk diffusion zones",
x = x,
mo = mo,
ab = ab,
guideline = guideline,
uti = uti,
capped_mic_handling = "standard", # will be ignored for non-MIC anyway
add_intrinsic_resistance = add_intrinsic_resistance,
reference_data = reference_data,
substitute_missing_r_breakpoint = substitute_missing_r_breakpoint,
include_screening = include_screening,
include_PKPD = include_PKPD,
breakpoint_type = breakpoint_type,
host = host,
verbose = verbose,
info = info,
...
)
}
#' @rdname as.sir
#' @export
as.sir.data.frame <- function(x,
...,
col_mo = NULL,
guideline = getOption("AMR_guideline", "EUCAST"),
uti = NULL,
capped_mic_handling = getOption("AMR_capped_mic_handling", "standard"),
add_intrinsic_resistance = FALSE,
reference_data = AMR::clinical_breakpoints,
substitute_missing_r_breakpoint = getOption("AMR_substitute_missing_r_breakpoint", FALSE),
include_screening = getOption("AMR_include_screening", FALSE),
include_PKPD = getOption("AMR_include_PKPD", TRUE),
breakpoint_type = getOption("AMR_breakpoint_type", "human"),
host = NULL,
verbose = FALSE,
info = TRUE,
conserve_capped_values = NULL) {
meet_criteria(x, allow_class = "data.frame") # will also check for dimensions > 0
meet_criteria(col_mo, allow_class = "character", is_in = colnames(x), allow_NULL = TRUE)
meet_criteria(guideline, allow_class = "character")
meet_criteria(uti, allow_class = c("logical", "character"), allow_NULL = TRUE, allow_NA = TRUE)
meet_criteria(capped_mic_handling, allow_class = "character", has_length = 1, is_in = c("standard", "conservative", "none", "inverse"))
meet_criteria(add_intrinsic_resistance, allow_class = "logical", has_length = 1)
meet_criteria(reference_data, allow_class = "data.frame")
meet_criteria(substitute_missing_r_breakpoint, allow_class = "logical", has_length = 1)
meet_criteria(include_screening, allow_class = "logical", has_length = 1)
meet_criteria(include_PKPD, allow_class = "logical", has_length = 1)
meet_criteria(breakpoint_type, allow_class = "character", is_in = reference_data$type, has_length = 1)
meet_criteria(host, allow_class = c("character", "factor"), allow_NULL = TRUE, allow_NA = TRUE)
meet_criteria(verbose, allow_class = "logical", has_length = 1)
meet_criteria(info, allow_class = "logical", has_length = 1)
x.bak <- x
for (i in seq_len(ncol(x))) {
# don't keep factors, overwriting them is hard
if (is.factor(x[, i, drop = TRUE])) {
x[, i] <- as.character(x[, i, drop = TRUE])
}
}
# -- MO
col_mo.bak <- col_mo
if (is.null(col_mo)) {
col_mo <- search_type_in_df(x = x, type = "mo", info = FALSE)
}
# -- host
if (missing(breakpoint_type) && any(host %in% clinical_breakpoints$host[!clinical_breakpoints$host %in% c("human", "ECOFF")], na.rm = TRUE)) {
if (isTRUE(info)) message_("Assuming `breakpoint_type = \"animal\"` since `host` contains animal species.")
breakpoint_type <- "animal"
} else if (any(!suppressMessages(convert_host(host)) %in% c("human", "ECOFF"), na.rm = TRUE)) {
if (isTRUE(info)) message_("Assuming `breakpoint_type = \"animal\"`.")
breakpoint_type <- "animal"
}
if (breakpoint_type == "animal") {
if (is.null(host)) {
host <- search_type_in_df(x = x, type = "host", add_col_prefix = FALSE)
} else if (length(host) == 1 && as.character(host) %in% colnames(x)) {
host <- x[[as.character(host)]]
}
} else {
host <- breakpoint_type
}
# -- UTIs
col_uti <- uti
if (is.null(col_uti)) {
col_uti <- search_type_in_df(x = x, type = "uti", add_col_prefix = FALSE)
}
if (!is.null(col_uti)) {
if (is.logical(col_uti)) {
# already a logical vector as input
if (length(col_uti) == 1) {
uti <- rep(col_uti, NROW(x))
} else {
uti <- col_uti
}
} else {
# column found, transform to logical
stop_if(
length(col_uti) != 1 | !col_uti %in% colnames(x),
"argument `uti` must be a [logical] vector, of must be a single column name of `x`"
)
uti <- as.logical(x[, col_uti, drop = TRUE])
}
} else {
# col_uti is still NULL - look for specimen column and make logicals of the urines
col_specimen <- suppressMessages(search_type_in_df(x = x, type = "specimen"))
if (!is.null(col_specimen)) {
uti <- x[, col_specimen, drop = TRUE] %like% "urin"
values <- sort(unique(x[uti, col_specimen, drop = TRUE]))
if (length(values) > 1) {
plural <- c("s", "", "")
} else {
plural <- c("", "s", "a ")
}
if (isTRUE(info)) {
message_(
"Assuming value", plural[1], " ",
vector_and(values, quotes = TRUE),
" in column '", font_bold(col_specimen),
"' reflect", plural[2], " ", plural[3], "urinary tract infection", plural[1],
".\n Use `as.sir(uti = FALSE)` to prevent this."
)
}
} else {
# no data about UTI's found
uti <- NULL
}
}
i <- 0
if (tryCatch(length(list(...)) > 0, error = function(e) TRUE)) {
sel <- colnames(pm_select(x, ...))
} else {
sel <- colnames(x)
}
if (!is.null(col_mo)) {
sel <- sel[sel != col_mo]
}
ab_cols <- colnames(x)[vapply(FUN.VALUE = logical(1), x, function(y) {
i <<- i + 1
check <- is.mic(y) | is.disk(y)
ab <- colnames(x)[i]
if (!is.null(col_mo) && ab == col_mo) {
return(FALSE)
}
if (!is.null(col_uti) && ab == col_uti) {
return(FALSE)
}
if (length(sel) == 0 || (length(sel) > 0 && ab %in% sel)) {
ab_coerced <- suppressWarnings(as.ab(ab))
if (is.na(ab_coerced) || (length(sel) > 0 & !ab %in% sel)) {
# not even a valid AB code
return(FALSE)
} else {
return(TRUE)
}
} else {
return(FALSE)
}
})]
stop_if(
length(ab_cols) == 0,
"no columns with MIC values, disk zones or antibiotic column names found in this data set. Use as.mic() or as.disk() to transform antimicrobial columns."
)
# set type per column
types <- character(length(ab_cols))
types[vapply(FUN.VALUE = logical(1), x.bak[, ab_cols, drop = FALSE], is.disk)] <- "disk"
types[vapply(FUN.VALUE = logical(1), x.bak[, ab_cols, drop = FALSE], is.mic)] <- "mic"
types[types == "" & vapply(FUN.VALUE = logical(1), x[, ab_cols, drop = FALSE], all_valid_disks)] <- "disk"
types[types == "" & vapply(FUN.VALUE = logical(1), x[, ab_cols, drop = FALSE], all_valid_mics)] <- "mic"
types[types == "" & !vapply(FUN.VALUE = logical(1), x.bak[, ab_cols, drop = FALSE], is.sir)] <- "sir"
if (any(types %in% c("mic", "disk"), na.rm = TRUE)) {
# now we need an mo column
stop_if(is.null(col_mo), "`col_mo` must be set")
# if not null, we already found it, now find again so a message will show
if (is.null(col_mo.bak)) {
col_mo <- search_type_in_df(x = x, type = "mo")
}
x_mo <- as.mo(x[, col_mo, drop = TRUE])
}
for (i in seq_along(ab_cols)) {
if (types[i] == "mic") {
x[, ab_cols[i]] <- x %pm>%
pm_pull(ab_cols[i]) %pm>%
as.character() %pm>%
as.mic() %pm>%
as.sir(
mo = x_mo,
mo.bak = x[, col_mo, drop = TRUE],
ab = ab_cols[i],
guideline = guideline,
uti = uti,
capped_mic_handling = capped_mic_handling,
add_intrinsic_resistance = add_intrinsic_resistance,
reference_data = reference_data,
substitute_missing_r_breakpoint = substitute_missing_r_breakpoint,
include_screening = include_screening,
include_PKPD = include_PKPD,
breakpoint_type = breakpoint_type,
host = host,
verbose = verbose,
info = info,
conserve_capped_values = conserve_capped_values,
is_data.frame = TRUE
)
} else if (types[i] == "disk") {
x[, ab_cols[i]] <- x %pm>%
pm_pull(ab_cols[i]) %pm>%
as.character() %pm>%
as.disk() %pm>%
as.sir(
mo = x_mo,
mo.bak = x[, col_mo, drop = TRUE],
ab = ab_cols[i],
guideline = guideline,
uti = uti,
add_intrinsic_resistance = add_intrinsic_resistance,
reference_data = reference_data,
substitute_missing_r_breakpoint = substitute_missing_r_breakpoint,
include_screening = include_screening,
include_PKPD = include_PKPD,
breakpoint_type = breakpoint_type,
host = host,
verbose = verbose,
info = info,
is_data.frame = TRUE
)
} else if (types[i] == "sir") {
show_message <- FALSE
ab <- ab_cols[i]
ab_coerced <- suppressWarnings(as.ab(ab))
if (!all(x[, ab_cols[i], drop = TRUE] %in% c("S", "SDD", "I", "R", "NI", NA), na.rm = TRUE)) {
show_message <- TRUE
# only print message if values are not already clean
if (isTRUE(info)) {
message_("Cleaning values in column '", font_bold(ab), "' (",
ifelse(ab_coerced != toupper(ab), paste0(ab_coerced, ", "), ""),
ab_name(ab_coerced, tolower = TRUE), ")... ",
appendLF = FALSE,
as_note = FALSE
)
}
} else if (!is.sir(x.bak[, ab_cols[i], drop = TRUE])) {
show_message <- TRUE
# only print message if class not already set
if (isTRUE(info)) {
message_("Assigning class 'sir' to already clean column '", font_bold(ab), "' (",
ifelse(ab_coerced != toupper(ab), paste0(ab_coerced, ", "), ""),
ab_name(ab_coerced, tolower = TRUE, language = NULL), ")... ",
appendLF = FALSE,
as_note = FALSE
)
}
}
x[, ab_cols[i]] <- as.sir.default(x = as.character(x[, ab_cols[i], drop = TRUE]))
if (show_message == TRUE && isTRUE(info)) {
message(font_green_bg(" OK "))
}
}
}
x
}
get_guideline <- function(guideline, reference_data) {
if (!identical(reference_data, AMR::clinical_breakpoints)) {
return(guideline)
}
guideline_param <- trimws2(toupper(guideline))
latest_clsi <- rev(sort(subset(reference_data, guideline %like% "CLSI")$guideline))[1L]
latest_eucast <- rev(sort(subset(reference_data, guideline %like% "EUCAST")$guideline))[1L]
guideline_param[guideline_param == "CLSI"] <- latest_clsi
guideline_param[guideline_param == "EUCAST"] <- latest_eucast
# like 'EUCAST2020', should be 'EUCAST 2020'
guideline_param[guideline_param %unlike% " "] <- gsub("([a-z]+)([0-9]+)", "\\1 \\2", guideline_param[guideline_param %unlike% " "], ignore.case = TRUE)
stop_ifnot(guideline_param %in% reference_data$guideline,
"invalid guideline: '", guideline,
"'.\nValid guidelines are: ", vector_and(reference_data$guideline, quotes = TRUE, reverse = TRUE),
call = FALSE
)
guideline_param
}
convert_host <- function(x, lang = get_AMR_locale()) {
x <- gsub("[^a-zA-Z ]", "", trimws2(tolower(as.character(x))), perl = TRUE)
x_out <- rep(NA_character_, length(x))
x_out[trimws2(tolower(x)) == "human"] <- "human"
x_out[trimws2(tolower(x)) == "ecoff"] <- "ecoff"
# this order is based on: clinical_breakpoints |> filter(type == "animal") |> count(host, sort = TRUE)
x_out[is.na(x_out) & (x %like% "dog|canine|Canis lupus" | x %like% translate_AMR("dog|dogs|canine", lang))] <- "dogs"
x_out[is.na(x_out) & (x %like% "cattle|bovine|Bos taurus" | x %like% translate_AMR("cattle|bovine", lang))] <- "cattle"
x_out[is.na(x_out) & (x %like% "swine|suida(e)?|Sus scrofa" | x %like% translate_AMR("swine|swines", lang))] <- "swine"
x_out[is.na(x_out) & (x %like% "cat|feline|Felis catus" | x %like% translate_AMR("cat|cats|feline", lang))] <- "cats"
x_out[is.na(x_out) & (x %like% "horse|equine|Equus ferus" | x %like% translate_AMR("horse|horses|equine", lang))] <- "horse"
x_out[is.na(x_out) & (x %like% "aqua|fish|Pisces" | x %like% translate_AMR("aquatic|fish", lang))] <- "aquatic"
x_out[is.na(x_out) & (x %like% "bird|chicken|poultry|avia|Gallus gallus" | x %like% translate_AMR("bird|birds|poultry", lang))] <- "poultry"
# additional animals, not necessarily currently in breakpoint guidelines:
x_out[is.na(x_out) & (x %like% "camel|camelid|Camelus dromedarius" | x %like% translate_AMR("camel|camels|camelid", lang))] <- "camels"
x_out[is.na(x_out) & (x %like% "deer|cervine|Cervidae" | x %like% translate_AMR("deer|deers|cervine", lang))] <- "deer"
x_out[is.na(x_out) & (x %like% "donkey|asinine|Equus africanus" | x %like% translate_AMR("donkey|donkeys|asinine", lang))] <- "donkeys"
x_out[is.na(x_out) & (x %like% "ferret|musteline|Mustela putorius" | x %like% translate_AMR("ferret|ferrets|musteline", lang))] <- "ferrets"
x_out[is.na(x_out) & (x %like% "goat|caprine|Capra aegagrus" | x %like% translate_AMR("goat|goats|caprine", lang))] <- "goats"
x_out[is.na(x_out) & (x %like% "guinea pig|caviine|Cavia porcellus" | x %like% translate_AMR("guinea pig|guinea pigs|caviine", lang))] <- "guinea pigs"
x_out[is.na(x_out) & (x %like% "hamster|cricetine|Cricetinae" | x %like% translate_AMR("hamster|hamsters|cricetine", lang))] <- "hamsters"
x_out[is.na(x_out) & (x %like% "monkey|simian|Simia" | x %like% translate_AMR("monkey|monkeys|simian", lang))] <- "monkeys"
x_out[is.na(x_out) & (x %like% "mouse|murine|Mus musculus" | x %like% translate_AMR("mouse|mice|murine", lang))] <- "mice"
x_out[is.na(x_out) & (x %like% "pig|porcine|Sus scrofa" | x %like% translate_AMR("pig|pigs|porcine", lang))] <- "pigs"
x_out[is.na(x_out) & (x %like% "rabbit|leporine|Oryctolagus cuniculus" | x %like% translate_AMR("rabbit|rabbits|leporine", lang))] <- "rabbits"
x_out[is.na(x_out) & (x %like% "rat|ratine|Rattus" | x %like% translate_AMR("rat|rats|ratine", lang))] <- "rats"
x_out[is.na(x_out) & (x %like% "sheep|ovine|Ovis aries" | x %like% translate_AMR("sheep|sheeps|ovine", lang))] <- "sheep"
x_out[is.na(x_out) & (x %like% "snake|serpentine|Serpentes" | x %like% translate_AMR("snake|snakes|serpentine", lang))] <- "snakes"
x_out[is.na(x_out) & (x %like% "turkey|meleagrine|Meleagris gallopavo" | x %like% translate_AMR("turkey|turkeys|meleagrine", lang))] <- "turkey"
x_out[x_out == "ecoff"] <- "ECOFF"
x_out
}
as_sir_method <- function(method_short,
method_long,
x,
mo,
ab,
guideline,
uti,
capped_mic_handling,
add_intrinsic_resistance,
reference_data,
substitute_missing_r_breakpoint,
include_screening,
include_PKPD,
breakpoint_type,
host,
verbose,
info,
conserve_capped_values = NULL,
...) {
if (isTRUE(conserve_capped_values)) {
deprecation_warning(old = "conserve_capped_values", new = "capped_mic_handling", fn = "as.sir", is_argument = TRUE)
capped_mic_handling <- "conservative"
}
meet_criteria(x, allow_NA = TRUE, .call_depth = -2)
meet_criteria(mo, allow_class = c("mo", "character"), has_length = c(1, length(x)), allow_NULL = TRUE, .call_depth = -2)
meet_criteria(ab, allow_class = c("ab", "character"), has_length = c(1, length(x)), .call_depth = -2)
meet_criteria(guideline, allow_class = "character", has_length = c(1, length(x)), .call_depth = -2)
meet_criteria(uti, allow_class = c("logical", "character"), has_length = c(1, length(x)), allow_NULL = TRUE, allow_NA = TRUE, .call_depth = -2)
meet_criteria(capped_mic_handling, allow_class = "character", has_length = 1, is_in = c("standard", "conservative", "none", "inverse"), .call_depth = -2)
meet_criteria(add_intrinsic_resistance, allow_class = "logical", has_length = 1, .call_depth = -2)
meet_criteria(reference_data, allow_class = "data.frame", .call_depth = -2)
meet_criteria(substitute_missing_r_breakpoint, allow_class = "logical", has_length = 1, .call_depth = -2)
meet_criteria(include_screening, allow_class = "logical", has_length = 1, .call_depth = -2)
meet_criteria(include_PKPD, allow_class = "logical", has_length = 1, .call_depth = -2)
check_reference_data(reference_data, .call_depth = -2)
meet_criteria(breakpoint_type, allow_class = "character", is_in = reference_data$type, has_length = 1, .call_depth = -2)
meet_criteria(host, allow_class = c("character", "factor"), allow_NULL = TRUE, allow_NA = TRUE, .call_depth = -2)
meet_criteria(verbose, allow_class = "logical", has_length = 1, .call_depth = -2)
meet_criteria(info, allow_class = "logical", has_length = 1, .call_depth = -2)
# backward compatibilty
dots <- list(...)
dots <- dots[which(!names(dots) %in% c("warn", "mo.bak", "is_data.frame"))]
if (length(dots) != 0) {
warning_("These arguments in `as.sir()` are no longer used: ", vector_and(names(dots), quotes = "`"), ".", call = FALSE)
}
current_sir_interpretation_history <- NROW(AMR_env$sir_interpretation_history)
if (isTRUE(info) && message_not_thrown_before("as.sir", "sir_interpretation_history")) {
message()
message_("Run `sir_interpretation_history()` afterwards to retrieve a logbook with all the details of the breakpoint interpretations.\n\n", add_fn = font_green)
}
current_df <- tryCatch(get_current_data(NA, 0), error = function(e) NULL)
# get guideline
if (!is.null(current_df) && length(guideline) == 1 && guideline %in% colnames(current_df) && any(current_df[[guideline]] %like% "CLSI|EUCAST", na.rm = TRUE)) {
guideline <- current_df[[guideline]]
}
guideline_coerced <- get_guideline(guideline, reference_data)
# get host
if (breakpoint_type == "animal") {
if (is.null(host)) {
host <- "dogs"
if (isTRUE(info) && message_not_thrown_before("as.sir", "host_missing")) {
message_("Animal hosts not set in `host`, assuming `host = \"dogs\"`, since these have the highest breakpoint availability.\n\n")
}
}
} else {
if (!is.null(host) && !all(toupper(as.character(host)) %in% c("HUMAN", "ECOFF"))) {
if (isTRUE(info) && message_not_thrown_before("as.sir", "assumed_breakpoint_animal")) {
message_("Assuming `breakpoint_type = \"animal\"`, since `host` is set.", ifelse(guideline_coerced %like% "EUCAST", " Do you also need to set `guideline = \"CLSI\"`?", ""), "\n\n")
}
breakpoint_type <- "animal"
} else {
host <- breakpoint_type
}
}
if (!is.null(host) && !all(toupper(as.character(host)) %in% c("HUMAN", "ECOFF"))) {
if (!is.null(current_df) && length(host) == 1 && host %in% colnames(current_df) && any(current_df[[host]] %like% "[A-Z]", na.rm = TRUE)) {
host <- current_df[[host]]
} else if (length(host) != length(x)) {
# for dplyr's across()
cur_column_dplyr <- import_fn("cur_column", "dplyr", error_on_fail = FALSE)
if (!is.null(cur_column_dplyr) && is.data.frame(current_df)) {
# try to get current column, which will only be available when in across()
host <- tryCatch(cur_column_dplyr(),
error = function(e) host
)
}
}
}
host.bak <- host
host <- convert_host(host)
if (any(is.na(host) & !is.na(host.bak)) && isTRUE(info) && message_not_thrown_before("as.sir", "missing_hosts")) {
warning_("The following animal host(s) could not be coerced: ", vector_and(host.bak[is.na(host) & !is.na(host.bak)]), immediate = TRUE)
message() # new line
}
# TODO add a switch to turn this off? In interactive sessions perhaps ask the user. Default should be On.
# if (breakpoint_type == "animal" && isTRUE(info) && message_not_thrown_before("as.sir", "host_missing_breakpoints")) {
# if (guideline_coerced %like% "CLSI") {
# message_("Please note that in the absence of specific veterinary breakpoints for certain animal hosts, the CLSI guideline VET09 will be applied where possible.\n\n")
# }
# }
# get ab
if (!is.null(current_df) && length(ab) == 1 && ab %in% colnames(current_df) && any(current_df[[ab]] %like% "[A-Z]", na.rm = TRUE)) {
ab <- current_df[[ab]]
} else if (length(ab) != length(x)) {
# for dplyr's across()
cur_column_dplyr <- import_fn("cur_column", "dplyr", error_on_fail = FALSE)
if (!is.null(cur_column_dplyr) && is.data.frame(current_df)) {
# try to get current column, which will only be available when in across()
ab <- tryCatch(cur_column_dplyr(),
error = function(e) ab
)
}
}
# get mo
if (!is.null(current_df) && length(mo) == 1 && mo %in% colnames(current_df)) {
mo_var_found <- paste0(" based on column '", font_bold(mo), "'")
mo <- current_df[[mo]]
} else if (length(mo) != length(x)) {
mo_var_found <- ""
if (is.null(mo)) {
tryCatch(
{
df <- get_current_data(arg_name = "mo", call = -3) # will return an error if not found
mo <- NULL
try(
{
mo <- suppressMessages(search_type_in_df(df, "mo", add_col_prefix = FALSE))
},
silent = TRUE
)
if (!is.null(df) && !is.null(mo) && is.data.frame(df)) {
mo_var_found <- paste0(" based on column '", font_bold(mo), "'")
mo <- df[, mo, drop = TRUE]
}
},
error = function(e) {
mo <- NULL
}
)
}
} else {
mo_var_found <- ""
}
if (is.null(mo)) {
stop_("No information was supplied about the microorganisms (missing argument `mo` and no column of class 'mo' found). See ?as.sir.\n\n",
"To transform certain columns with e.g. mutate(), use `data %>% mutate(across(..., as.sir, mo = x))`, where x is your column with microorganisms.\n",
"To transform all ", method_long, " in a data set, use `data %>% as.sir()` or `data %>% mutate_if(is.", method_short, ", as.sir)`.",
call = FALSE
)
}
# get uti
if (!is.null(current_df) && length(uti) == 1 && uti %in% colnames(current_df)) {
uti <- current_df[[uti]]
} else if (length(uti) != length(x)) {
if (is.null(uti)) {
tryCatch(
{
df <- get_current_data(arg_name = "uti", call = -3) # will return an error if not found
uti <- NULL
try(
{
uti <- suppressMessages(search_type_in_df(df, "uti", add_col_prefix = FALSE))
},
silent = TRUE
)
if (!is.null(df) && !is.null(uti) && is.data.frame(df)) {
uti <- df[, uti, drop = TRUE]
}
},
error = function(e) {
uti <- NULL
}
)
}
}
# TODO set uti to specimen column here
if (length(ab) == 1 && ab %like% paste0("as.", method_short)) {
stop_("No unambiguous name was supplied about the antibiotic (argument `ab`). See ?as.sir.", call = FALSE)
}
ab.bak <- trimws2(ab)
ab <- suppressWarnings(as.ab(ab))
if (!is.null(list(...)$mo.bak)) {
mo.bak <- list(...)$mo.bak
} else {
mo.bak <- mo
}
mo.bak <- trimws2(mo.bak)
# be sure to take current taxonomy, as the 'clinical_breakpoints' data set only contains current taxonomy
mo <- suppressWarnings(suppressMessages(as.mo(mo, keep_synonyms = FALSE, info = FALSE)))
if (all(is.na(ab))) {
if (isTRUE(info)) {
message_("Returning NAs for unknown antibiotic: ", vector_and(ab.bak, sort = FALSE, quotes = TRUE),
". Rename this column to a valid name or code, and check the output with `as.ab()`.",
add_fn = font_red,
as_note = FALSE
)
}
return(as.sir(rep(NA, length(x))))
}
if (length(mo) == 1) {
mo <- rep(mo, length(x))
}
if (length(ab) == 1) {
ab <- rep(ab, length(x))
ab.bak <- rep(ab.bak, length(ab))
}
if (length(host) == 1) {
host <- rep(host, length(x))
}
if (is.null(uti)) {
uti <- NA
}
if (length(uti) == 1) {
uti <- rep(uti, length(x))
}
if (isTRUE(add_intrinsic_resistance) && guideline_coerced %unlike% "EUCAST") {
if (isTRUE(info) && message_not_thrown_before("as.sir", "intrinsic")) {
message_("in `as.sir()`: using 'add_intrinsic_resistance' is only useful when using EUCAST guidelines, since the rules for intrinsic resistance are based on EUCAST.",
add_fn = font_red
)
}
}
# format agents ----
agent_formatted <- paste0("'", font_bold(ab.bak, collapse = NULL), "'")
agent_name <- ab_name(ab, tolower = TRUE, language = NULL)
same_ab <- generalise_antibiotic_name(ab) == generalise_antibiotic_name(agent_name)
same_ab.bak <- generalise_antibiotic_name(ab.bak) == generalise_antibiotic_name(agent_name)
agent_formatted[same_ab.bak] <- paste0(agent_formatted[same_ab.bak], " (", ab[same_ab.bak], ")")
agent_formatted[!same_ab.bak & !same_ab] <- paste0(
agent_formatted[!same_ab.bak & !same_ab],
" (", ifelse(ab.bak[!same_ab.bak & !same_ab] == ab[!same_ab.bak & !same_ab],
"",
paste0(ab[!same_ab.bak & !same_ab], ", ")
),
agent_name[!same_ab.bak & !same_ab],
")"
)
# this intro text will also be printed in the progress bar if the `progress` package is installed
intro_txt <- paste0(
"Interpreting ", method_long, ": ", ifelse(isTRUE(list(...)$is_data.frame), "column ", ""),
ifelse(length(unique(agent_formatted)) == 1, unique(agent_formatted), paste0(vector_and(agent_formatted, quotes = FALSE, sort = FALSE))),
mo_var_found,
ifelse(identical(reference_data, AMR::clinical_breakpoints),
paste0(", ", vector_and(font_bold(guideline_coerced, collapse = NULL), quotes = FALSE)),
""
),
"... "
)
# prepare used arguments ----
method <- method_short
metadata_mo <- get_mo_uncertainties()
rise_warning <- FALSE
rise_notes <- FALSE
method_coerced <- toupper(method)
ab_coerced <- as.ab(ab)
if (identical(reference_data, AMR::clinical_breakpoints)) {
breakpoints <- reference_data %pm>%
subset(guideline %in% guideline_coerced & method == method_coerced & ab %in% ab_coerced)
if (any(ab_coerced == "AMX") && nrow(breakpoints[breakpoints$ab == "AMX", , drop = FALSE]) == 0) {
ab_coerced[ab_coerced == "AMX"] <- "AMP"
breakpoints <- reference_data %pm>%
subset(guideline %in% guideline_coerced & method == method_coerced & ab %in% ab_coerced)
}
} else {
breakpoints <- reference_data %pm>%
subset(method == method_coerced & ab %in% ab_coerced)
}
# create the unique data frame to be filled to save time
df <- data.frame(
values = x,
values_bak = x,
guideline = guideline_coerced,
mo = mo,
ab = ab,
result = NA_sir_,
uti = uti,
host = host,
stringsAsFactors = FALSE
)
if (method == "mic") {
if (any(guideline_coerced %like% "CLSI")) {
# CLSI says: if MIC is not a log2 value it must be rounded up to the nearest log2 value
log2_levels <- 2^c(-9:12)
df$values[which(df$guideline %like% "CLSI")] <- vapply(
FUN.VALUE = character(1),
df$values[which(df$guideline %like% "CLSI")],
function(mic_val) {
if (is.na(mic_val)) {
return(NA_character_)
} else {
# find the smallest log2 level that is >= mic_val
log2_val <- log2_levels[which(log2_levels >= as.double(mic_val))][1]
if (!is.na(log2_val) && as.double(mic_val) != log2_val) {
if (message_not_thrown_before("as.sir", "CLSI", "MICupscaling")) {
warning_("Some MICs were converted to the nearest higher log2 level, following the CLSI interpretation guideline.")
}
return(as.character(log2_val)) # will be MIC later
} else {
return(as.character(mic_val))
}
}
}
)
}
df$values <- as.mic(df$values)
} else if (method == "disk") {
# when as.sir.disk is called directly
df$values <- as.disk(df$values)
}
df_unique <- unique(df[, c("guideline", "mo", "ab", "uti", "host"), drop = FALSE])
mo_grams <- suppressWarnings(suppressMessages(mo_gramstain(df_unique$mo, language = NULL, keep_synonyms = FALSE)))
# get all breakpoints, use humans as backup for animals
breakpoint_type_lookup <- breakpoint_type
if (breakpoint_type == "animal") {
breakpoint_type_lookup <- c(breakpoint_type, "human")
}
breakpoints <- breakpoints %pm>%
subset(type %in% breakpoint_type_lookup)
if (isFALSE(include_screening)) {
# remove screening rules from the breakpoints table
breakpoints <- breakpoints %pm>%
subset(site %unlike% "screen" & ref_tbl %unlike% "screen")
}
if (isFALSE(include_PKPD)) {
# remove PKPD rules from the breakpoints table
breakpoints <- breakpoints %pm>%
subset(mo != "UNKNOWN" & ref_tbl %unlike% "PK.*PD")
}
notes <- character(0)
if (any(guideline_coerced %like% "EUCAST")) {
any_is_intrinsic_resistant <- FALSE
add_intrinsic_resistance_to_AMR_env()
}
if (isTRUE(info) && nrow(df_unique) < 10 || nrow(breakpoints) == 0) {
# only print intro under 10 items, otherwise progressbar will print this and then it will be printed double
message_(intro_txt, appendLF = FALSE, as_note = FALSE)
}
p <- progress_ticker(n = nrow(df_unique), n_min = 10, title = font_blue(intro_txt), only_bar_percent = TRUE)
has_progress_bar <- !is.null(import_fn("progress_bar", "progress", error_on_fail = FALSE)) && nrow(df_unique) >= 10
on.exit(close(p))
if (nrow(breakpoints) == 0) {
# apparently no breakpoints found
if (isTRUE(info)) {
message(
paste0(font_rose_bg(" WARNING "), "\n"),
font_black(paste0(
" ", AMR_env$bullet_icon, " No ", method_coerced, " breakpoints available for ",
suppressMessages(suppressWarnings(ab_name(unique(ab_coerced), language = NULL, tolower = TRUE))),
" (", unique(ab_coerced), ")."
), collapse = "\n")
)
}
load_mo_uncertainties(metadata_mo)
return(rep(NA_sir_, nrow(df)))
}
vectorise_log_entry <- function(x, len) {
if (length(x) == 1 && len > 1) {
rep(x, len)
} else {
x
}
}
# run the rules (df_unique is a row combination per mo/ab/uti/host) ----
for (i in seq_len(nrow(df_unique))) {
p$tick()
guideline_current <- df_unique[i, "guideline", drop = TRUE]
mo_current <- df_unique[i, "mo", drop = TRUE]
mo_gram_current <- mo_grams[i]
ab_current <- df_unique[i, "ab", drop = TRUE]
host_current <- df_unique[i, "host", drop = TRUE]
uti_current <- df_unique[i, "uti", drop = TRUE]
notes_current <- character(0)
rows <- which(as.character(df$mo) == mo_current & df$ab == ab_current & df$host == host_current & df$guideline == guideline_current)
if (!is.na(uti_current)) {
# also filter on UTIs
rows <- rows[df$uti[rows] == uti_current]
}
if (length(rows) == 0) {
# this can happen if a host is unavailable, just continue with the next one, since a note about hosts having NA are already given at this point
next
}
values <- df[rows, "values", drop = TRUE]
values_bak <- df[rows, "values_bak", drop = TRUE]
notes_current <- rep("", length(rows))
new_sir <- rep(NA_sir_, length(rows))
# find different mo properties, as fast as possible
# TODO in case of VET09, we need to keep E. coli, also when users have Proteus in their data set
# TODO look up which species, at least E. coli - also Staph or Strep?
mo_current_genus <- AMR_env$MO_lookup$mo[match(AMR_env$MO_lookup$genus[match(mo_current, AMR_env$MO_lookup$mo)], AMR_env$MO_lookup$fullname)]
mo_current_family <- AMR_env$MO_lookup$mo[match(AMR_env$MO_lookup$family[match(mo_current, AMR_env$MO_lookup$mo)], AMR_env$MO_lookup$fullname)]
mo_current_order <- AMR_env$MO_lookup$mo[match(AMR_env$MO_lookup$order[match(mo_current, AMR_env$MO_lookup$mo)], AMR_env$MO_lookup$fullname)]
mo_current_class <- AMR_env$MO_lookup$mo[match(AMR_env$MO_lookup$class[match(mo_current, AMR_env$MO_lookup$mo)], AMR_env$MO_lookup$fullname)]
mo_current_rank <- AMR_env$MO_lookup$rank[match(mo_current, AMR_env$MO_lookup$mo)]
mo_current_name <- AMR_env$MO_lookup$fullname[match(mo_current, AMR_env$MO_lookup$mo)]
mo_current_oxygen_tolerance <- AMR_env$MO_lookup$oxygen_tolerance[match(mo_current, AMR_env$MO_lookup$mo)]
if (mo_current %in% AMR::microorganisms.groups$mo) {
# get the species group (might be more than 1 entry)
mo_current_species_group <- AMR::microorganisms.groups$mo_group[which(AMR::microorganisms.groups$mo == mo_current)]
} else {
mo_current_species_group <- NULL
}
mo_current_gram <- structure(character(0), class = c("mo", "character"))
if (identical(mo_gram_current, "Gram-negative")) {
mo_current_gram <- c(mo_current_gram, "B_GRAMN")
if (identical(mo_current_oxygen_tolerance, "anaerobe")) {
mo_current_gram <- c(mo_current_gram, "B_ANAER", "B_ANAER-NEG")
}
} else if (identical(mo_gram_current, "Gram-positive")) {
mo_current_gram <- c(mo_current_gram, "B_GRAMP")
if (identical(mo_current_oxygen_tolerance, "anaerobe")) {
mo_current_gram <- c(mo_current_gram, "B_ANAER", "B_ANAER-POS")
}
}
mo_current_other <- structure("UNKNOWN", class = c("mo", "character"))
# formatted for notes
mo_formatted <- mo_current_name
if (!mo_current_rank %in% c("kingdom", "phylum", "class", "order")) {
mo_formatted <- font_italic(mo_formatted, collapse = NULL)
}
ab_formatted <- paste0(
suppressMessages(suppressWarnings(ab_name(ab_current, language = NULL, tolower = TRUE))),
" (", ab_current, ")"
)
# gather all available breakpoints for current MO
# TODO for VET09 do not filter out E. coli and such
breakpoints_current <- breakpoints %pm>%
subset(ab == ab_current & guideline == guideline_current) %pm>%
subset(mo %in% c(
mo_current, mo_current_genus, mo_current_family,
mo_current_order, mo_current_class,
mo_current_species_group,
mo_current_gram,
mo_current_other
))
# TODO are operators considered??
# This seems to not work well: as.sir(as.mic(c(4, ">4", ">=4", 8, ">8", ">=8")), ab = "AMC", mo = "E. coli", breakpoint_type = "animal", host = "dogs", guideline = "CLSI 2024")
if (breakpoint_type == "animal") {
# 2025-03-13 for now, only strictly follow guideline for current host, no extrapolation
breakpoints_current <- breakpoints_current[which(breakpoints_current$host == host_current), , drop = FALSE]
}
## fall-back methods for veterinary guidelines ----
## TODO actually implement this well
if (FALSE) {
# if (breakpoint_type == "animal" && !host_current %in% breakpoints_current$host) {
if (guideline_coerced %like% "CLSI") {
# VET09 says that staph/strep/enterococcus BP can be extrapolated to all Gr+ cocci except for intrinsic resistance, so take all Gr+ cocci:
gram_plus_cocci_vet09 <- microorganisms$mo[microorganisms$genus %in% c("Staphylococcus", "Streptococcus", "Peptostreptococcus", "Aerococcus", "Micrococcus") & microorganisms$rank == "genus"] # TODO should probably include genera that were either of these before
# HUMAN SUBSTITUTES
if (ab_current == "AZM" && mo_current_genus %in% gram_plus_cocci_vet09 && host_current %in% c("dogs", "cats", "horse")) {
# azithro can take human breakpoints for these agents
breakpoints_current <- breakpoints_current %pm>% subset(host == "human")
notes_current <- c(notes_current, paste0("Using ", font_bold("human"), " breakpoints for ", ab_formatted, " in Gram-positive cocci based on CLSI VET09."))
} else if (ab_current == "CTX" && mo_current_order == "B_[ORD]_ENTRBCTR" && host_current %in% c("dogs", "cats", "horse")) {
# cefotax can take human breakpoints for these agents
breakpoints_current <- breakpoints_current %pm>% subset(host == "human")
notes_current <- c(notes_current, paste0("Using ", font_bold("human"), " breakpoints for ", ab_formatted, " in Enterobacterales based on CLSI VET09."))
} else if (ab_current == "CAZ" && (mo_current_order == "B_[ORD]_ENTRBCTR" | mo_current == "B_PSDMN_AERG") && host_current %in% c("dogs", "cats", "horse")) {
# cefta can take human breakpoints for these agents
breakpoints_current <- breakpoints_current %pm>% subset(host == "human")
notes_current <- c(notes_current, paste0("Using ", font_bold("human"), " breakpoints for ", ab_formatted, " in Enterobacterales and ", font_italic("P. aeruginosa"), " based on CLSI VET09."))
} else if (ab_current == "ERY" && mo_current_genus %in% gram_plus_cocci_vet09 && host_current %in% c("dogs", "cats", "horse")) {
# erythro can take human breakpoints for these agents
breakpoints_current <- breakpoints_current %pm>% subset(host == "human")
notes_current <- c(notes_current, paste0("Using ", font_bold("human"), " breakpoints for ", ab_formatted, " in Gram-positive cocci based on CLSI VET09."))
} else if (ab_current == "IPM" && (mo_current_order == "B_[ORD]_ENTRBCTR" | mo_current == "B_PSDMN_AERG") && host_current %in% c("dogs", "cats", "horse")) {
# imipenem can take human breakpoints for these agents
breakpoints_current <- breakpoints_current %pm>% subset(host == "human")
notes_current <- c(notes_current, paste0("Using ", font_bold("human"), " breakpoints for ", ab_formatted, " in Enterobacterales and ", font_italic("P. aeruginosa"), " based on CLSI VET09."))
} else if (ab_current == "LNZ" && mo_current_genus %in% gram_plus_cocci_vet09 && host_current %in% c("dogs", "cats")) {
# linezolid can take human breakpoints for these agents
breakpoints_current <- breakpoints_current %pm>% subset(host == "human")
notes_current <- c(notes_current, paste0("Using ", font_bold("human"), " breakpoints for ", ab_formatted, " in staphylococci/enterococci based on CLSI VET09."))
} else if (ab_current == "NIT" && host_current %in% c("dogs", "cats")) {
# nitro can take human breakpoints for these agents
breakpoints_current <- breakpoints_current %pm>% subset(host == "human")
notes_current <- c(notes_current, paste0("Using ", font_bold("human"), " breakpoints for ", ab_formatted, " based on CLSI VET09."))
} else if (ab_current == "PEN" && mo_current_genus %in% gram_plus_cocci_vet09 && host_current %in% c("dogs", "cats")) {
# penicillin can take human breakpoints for these agents
breakpoints_current <- breakpoints_current %pm>% subset(host == "human")
notes_current <- c(notes_current, paste0("Using ", font_bold("human"), " breakpoints for ", ab_formatted, " in Gram-positive cocci based on CLSI VET09."))
} else if (ab_current == "RIF" && mo_current_genus %in% gram_plus_cocci_vet09 && host_current %in% c("dogs", "cats")) {
# rifampicin can take human breakpoints for staphylococci
breakpoints_current <- breakpoints_current %pm>% subset(host == "human")
notes_current <- c(notes_current, paste0("Using ", font_bold("human"), " breakpoints for ", ab_formatted, " in staphylococci based on CLSI VET09."))
} else if (ab_current == "SXT" && host_current %in% c("dogs", "cats", "horse")) {
# trimethoprim-sulfamethoxazole (TMS) can take human breakpoints for these agents
breakpoints_current <- breakpoints_current %pm>% subset(host == "human")
notes_current <- c(notes_current, paste0("Using ", font_bold("human"), " breakpoints for ", ab_formatted, " based on CLSI VET09."))
} else if (ab_current == "VAN" && host_current %in% c("dogs", "cats", "horse")) {
# vancomycin can take human breakpoints in these hosts
breakpoints_current <- breakpoints_current %pm>% subset(host == "human")
notes_current <- c(notes_current, paste0("Using ", font_bold("human"), " breakpoints for ", ab_formatted, " based on CLSI VET09."))
} else if (host_current %in% c("dogs", "cats") && (mo_current_genus %in% c("B_AMYCS", "B_NOCRD", "B_CMPYL", "B_CRYNB", "B_ENTRC", "B_MYCBC", "B_PSDMN", "B_AERMN") | mo_current_class == "B_[CLS]_BTPRTBCT" | mo_current == "B_LISTR_MNCY")) {
# dog breakpoints if no canine/feline
# TODO do we still have dogs breakpoints at this point???
breakpoints_current <- breakpoints_current %pm>% subset(host == "human") # WRONG
notes_current <- c(notes_current, paste0("Using ", font_bold("human"), " breakpoints for ", mo_formatted, " based on CLSI VET09."))
} else {
# no specific CLSI solution for this, so only filter on current host (if no breakpoints available -> too bad)
breakpoints_current <- breakpoints_current %pm>%
subset(host == host_current)
}
}
}
if (NROW(breakpoints_current) == 0) {
out <- data.frame(
# recycling 1 to 2 rows does not always seem to work, which is why vectorise_log_entry() was added
datetime = vectorise_log_entry(Sys.time(), length(rows)),
index = rows,
method = vectorise_log_entry(method_coerced, length(rows)),
ab_given = vectorise_log_entry(ab.bak[match(ab_current, df$ab)][1], length(rows)),
mo_given = vectorise_log_entry(mo.bak[match(mo_current, df$mo)][1], length(rows)),
host_given = vectorise_log_entry(host.bak[match(host_current, df$host)][1], length(rows)),
input_given = vectorise_log_entry(as.character(values_bak), length(rows)),
ab = vectorise_log_entry(ab_current, length(rows)),
mo = vectorise_log_entry(mo_current, length(rows)),
host = vectorise_log_entry(host_current, length(rows)),
input = vectorise_log_entry(as.character(values), length(rows)),
outcome = vectorise_log_entry(NA_sir_, length(rows)),
notes = vectorise_log_entry("No breakpoint available", length(rows)),
guideline = vectorise_log_entry(guideline_current, length(rows)),
ref_table = vectorise_log_entry(NA_character_, length(rows)),
uti = vectorise_log_entry(uti_current, length(rows)),
breakpoint_S_R = vectorise_log_entry(NA_character_, length(rows)),
stringsAsFactors = FALSE
)
out <- subset(out, !is.na(input_given))
AMR_env$sir_interpretation_history <- rbind_AMR(AMR_env$sir_interpretation_history, out)
notes <- c(notes, notes_current)
next
}
# sort on host and taxonomic rank
# (this will e.g. prefer 'species' breakpoints over 'order' breakpoints)
if (is.na(uti_current)) {
breakpoints_current <- breakpoints_current %pm>%
# `uti` is a column in the data set
# this will put UTI = FALSE first, then UTI = NA, then UTI = TRUE
pm_mutate(uti_index = ifelse(!is.na(uti) & uti == FALSE, 1,
ifelse(is.na(uti), 2,
3
)
)) %pm>%
# be as specific as possible (i.e. prefer species over genus):
pm_arrange(rank_index, uti_index)
} else if (uti_current == TRUE) {
breakpoints_current <- breakpoints_current %pm>%
subset(uti == TRUE) %pm>%
# be as specific as possible (i.e. prefer species over genus):
pm_arrange(rank_index)
}
# throw messages for different body sites
site <- breakpoints_current[1L, "site", drop = FALSE] # this is the one we'll take
if (is.na(site)) {
site <- paste0("an unspecified body site")
} else {
site <- paste0("body site '", site, "'")
}
if (nrow(breakpoints_current) == 1 && all(breakpoints_current$uti == TRUE) && is.na(uti_current) && message_not_thrown_before("as.sir", "uti", ab_current)) {
# only UTI breakpoints available
notes_current <- paste0(
notes_current, "\n",
paste0("Breakpoints for ", font_bold(ab_formatted), " in ", mo_formatted, " are only available for (uncomplicated) urinary tract infections (UTI); assuming `uti = TRUE`.")
)
} else if (nrow(breakpoints_current) > 1 && length(unique(breakpoints_current$site)) > 1 && any(is.na(uti_current)) && all(c(TRUE, FALSE) %in% breakpoints_current$uti, na.rm = TRUE) && message_not_thrown_before("as.sir", "siteUTI", mo_current, ab_current)) {
# both UTI and Non-UTI breakpoints available
breakpoints_current <- breakpoints_current %pm>%
pm_filter(uti == FALSE)
notes_current <- paste0(
notes_current, "\n",
paste0("Breakpoints for UTI ", font_bold("and"), " non-UTI available for ", ab_formatted, " in ", mo_formatted, " - assuming ", site, ". Use argument `uti` to set which isolates are from urine. See `?as.sir`.")
)
} else if (nrow(breakpoints_current) > 1 && length(unique(breakpoints_current$site)) > 1 && all(breakpoints_current$uti == FALSE, na.rm = TRUE) && message_not_thrown_before("as.sir", "siteOther", mo_current, ab_current)) {
# breakpoints for multiple body sites available
notes_current <- paste0(
notes_current, "\n",
paste0("Multiple breakpoints available for ", font_bold(ab_formatted), " in ", mo_formatted, " - assuming ", site, ".")
)
}
# first check if mo is intrinsic resistant
if (isTRUE(add_intrinsic_resistance) && guideline_current %like% "EUCAST" && paste(mo_current, ab_current) %in% AMR_env$intrinsic_resistant) {
new_sir <- rep(as.sir("R"), length(rows))
notes_current <- paste0(
notes_current, "\n",
paste0("Intrinsic resistance applied for ", ab_formatted, " in ", mo_formatted, "")
)
} else if (nrow(breakpoints_current) == 0) {
# no rules available
new_sir <- rep(NA_sir_, length(rows))
} else {
# then run the rules
breakpoints_current <- breakpoints_current[1L, , drop = FALSE]
notes_current <- paste0(
notes_current, "\n",
ifelse(breakpoints_current$mo == "UNKNOWN" | breakpoints_current$ref_tbl %like% "PK.*PD",
"Some PK/PD breakpoints were applied - use `include_PKPD = FALSE` to prevent this",
""
),
"\n",
ifelse(breakpoints_current$site %like% "screen" | breakpoints_current$ref_tbl %like% "screen",
"Some screening breakpoints were applied - use `include_screening = FALSE` to prevent this",
""
),
"\n",
ifelse(method == "mic" & capped_mic_handling %in% c("conservative", "inverse") & as.character(values_bak) %like% "^[<][0-9]",
paste0("MIC values with the operator '<' are all considered 'S' since capped_mic_handling = \"", capped_mic_handling, "\""),
""
),
"\n",
ifelse(method == "mic" & capped_mic_handling %in% c("conservative", "inverse") & as.character(values_bak) %like% "^[>][0-9]",
paste0("MIC values with the operator '>' are all considered 'R' since capped_mic_handling = \"", capped_mic_handling, "\""),
""
),
"\n",
ifelse(method == "mic" & capped_mic_handling %in% c("conservative", "standard") & as.character(values_bak) %like% "^[><]=[0-9]" & as.double(values) > breakpoints_current$breakpoint_S & as.double(values) < breakpoints_current$breakpoint_R,
paste0("MIC values within the breakpoint guideline range with the operator '<=' or '>=' are considered 'NI' since capped_mic_handling = \"", capped_mic_handling, "\""),
""
)
)
if (isTRUE(substitute_missing_r_breakpoint) && !is.na(breakpoints_current$breakpoint_S) && is.na(breakpoints_current$breakpoint_R)) {
# breakpoints_current only has 1 row at this moment
breakpoints_current$breakpoint_R <- breakpoints_current$breakpoint_S
notes_current <- paste0(
notes_current, "\n",
ifelse(!is.na(breakpoints_current$breakpoint_S) & is.na(breakpoints_current$breakpoint_R),
"NAs because of missing R breakpoints were substituted with R since substitute_missing_r_breakpoint = TRUE",
""
)
)
}
## actual interpretation ----
if (method == "mic") {
new_sir <- case_when_AMR(
is.na(values) ~ NA_sir_,
capped_mic_handling %in% c("conservative", "inverse") & as.character(values_bak) %like% "^[<][0-9]" ~ as.sir("S"),
capped_mic_handling %in% c("conservative", "inverse") & as.character(values_bak) %like% "^[>][0-9]" ~ as.sir("R"),
capped_mic_handling %in% c("conservative", "standard") & as.character(values_bak) %like% "^[><]=[0-9]" & as.double(values) > breakpoints_current$breakpoint_S & as.double(values) < breakpoints_current$breakpoint_R ~ as.sir("NI"),
values <= breakpoints_current$breakpoint_S ~ as.sir("S"),
guideline_current %like% "EUCAST" & values > breakpoints_current$breakpoint_R ~ as.sir("R"),
guideline_current %like% "CLSI" & values >= breakpoints_current$breakpoint_R ~ as.sir("R"),
# return "I" or "SDD" when breakpoints are in the middle
!is.na(breakpoints_current$breakpoint_S) & !is.na(breakpoints_current$breakpoint_R) & breakpoints_current$is_SDD == TRUE ~ as.sir("SDD"),
!is.na(breakpoints_current$breakpoint_S) & !is.na(breakpoints_current$breakpoint_R) & breakpoints_current$is_SDD == FALSE ~ as.sir("I"),
# and NA otherwise
TRUE ~ NA_sir_
)
} else if (method == "disk") {
new_sir <- case_when_AMR(
is.na(values) ~ NA_sir_,
as.double(values) >= as.double(breakpoints_current$breakpoint_S) ~ as.sir("S"),
guideline_current %like% "EUCAST" & as.double(values) < as.double(breakpoints_current$breakpoint_R) ~ as.sir("R"),
guideline_current %like% "CLSI" & as.double(values) <= as.double(breakpoints_current$breakpoint_R) ~ as.sir("R"),
# return "I" or "SDD" when breakpoints are in the middle
!is.na(breakpoints_current$breakpoint_S) & !is.na(breakpoints_current$breakpoint_R) & breakpoints_current$is_SDD == TRUE ~ as.sir("SDD"),
!is.na(breakpoints_current$breakpoint_S) & !is.na(breakpoints_current$breakpoint_R) & breakpoints_current$is_SDD == FALSE ~ as.sir("I"),
# and NA otherwise
TRUE ~ NA_sir_
)
}
# write to verbose output
notes_current <- trimws2(notes_current)
notes_current[notes_current == ""] <- NA_character_
out <- data.frame(
# recycling 1 to 2 rows does not always seem to work, which is why vectorise_log_entry() was added
datetime = vectorise_log_entry(Sys.time(), length(rows)),
index = rows,
method = vectorise_log_entry(method_coerced, length(rows)),
ab_given = vectorise_log_entry(ab.bak[match(ab_current, df$ab)][1], length(rows)),
mo_given = vectorise_log_entry(mo.bak[match(mo_current, df$mo)][1], length(rows)),
host_given = vectorise_log_entry(host.bak[match(host_current, df$host)][1], length(rows)),
input_given = vectorise_log_entry(as.character(values_bak), length(rows)),
ab = vectorise_log_entry(breakpoints_current[, "ab", drop = TRUE], length(rows)),
mo = vectorise_log_entry(breakpoints_current[, "mo", drop = TRUE], length(rows)),
host = vectorise_log_entry(breakpoints_current[, "host", drop = TRUE], length(rows)),
input = vectorise_log_entry(as.character(values), length(rows)),
outcome = vectorise_log_entry(as.sir(new_sir), length(rows)),
notes = font_stripstyle(notes_current), # vectorise_log_entry(paste0(font_stripstyle(notes_current), collapse = "\n"), length(rows)),
guideline = vectorise_log_entry(guideline_current, length(rows)),
ref_table = vectorise_log_entry(breakpoints_current[, "ref_tbl", drop = TRUE], length(rows)),
uti = vectorise_log_entry(breakpoints_current[, "uti", drop = TRUE], length(rows)),
breakpoint_S_R = vectorise_log_entry(paste0(breakpoints_current[, "breakpoint_S", drop = TRUE], "-", breakpoints_current[, "breakpoint_R", drop = TRUE]), length(rows)),
stringsAsFactors = FALSE
)
out <- subset(out, !is.na(input_given))
AMR_env$sir_interpretation_history <- rbind_AMR(AMR_env$sir_interpretation_history, out)
}
notes <- c(notes, notes_current)
df[rows, "result"] <- new_sir
}
close(p)
# printing messages
if (isTRUE(info)) {
if (has_progress_bar == TRUE) {
# the progress bar has overwritten the intro text, so:
message_(intro_txt, appendLF = FALSE, as_note = FALSE)
}
notes <- notes[!trimws2(notes) %in% c("", NA_character_)]
if (length(notes) > 0) {
if (isTRUE(rise_warning)) {
message(font_rose_bg(" WARNING "))
} else {
message(font_yellow_bg(" NOTE "))
}
notes <- unique(notes)
if (isTRUE(verbose) || length(notes) == 1 || NROW(AMR_env$sir_interpretation_history) == 0) {
for (i in seq_along(notes)) {
message(word_wrap(" ", AMR_env$bullet_icon, " ", notes[i], add_fn = font_black))
}
} else {
message(word_wrap(" ", AMR_env$bullet_icon, " There were multiple notes. Print or View `sir_interpretation_history()` to examine them, or use `as.sir(..., verbose = TRUE)` next time to directly print them here.", add_fn = font_black))
}
} else {
message(font_green_bg(" OK "))
}
}
load_mo_uncertainties(metadata_mo)
# reorder AMR_env$sir_interpretation_history to get a clean ordering on index
old_part <- AMR_env$sir_interpretation_history[seq_len(current_sir_interpretation_history), , drop = FALSE]
new_part <- AMR_env$sir_interpretation_history[c((current_sir_interpretation_history + 1):NROW(AMR_env$sir_interpretation_history)), , drop = FALSE]
new_part <- new_part[order(new_part$index), , drop = FALSE]
AMR_env$sir_interpretation_history <- rbind_AMR(old_part, new_part)
df$result
}
#' @rdname as.sir
#' @param clean A [logical] to indicate whether previously stored results should be forgotten after returning the 'logbook' with results.
#' @export
sir_interpretation_history <- function(clean = FALSE) {
meet_criteria(clean, allow_class = "logical", has_length = 1)
out <- AMR_env$sir_interpretation_history
out$outcome <- as.sir(out$outcome)
if (isTRUE(clean)) {
AMR_env$sir_interpretation_history <- AMR_env$sir_interpretation_history[0, , drop = FALSE]
}
if (pkg_is_available("tibble")) {
out <- import_fn("as_tibble", "tibble")(out)
}
as_original_data_class(out, class(out), extra_class = "sir_log")
}
#' @method print sir_log
#' @export
#' @noRd
print.sir_log <- function(x, ...) {
if (NROW(x) == 0) {
message_("No results to print. Run `as.sir()` on MIC values or disk diffusion zones first to print a 'logbook' data set here.")
return(invisible(NULL))
}
class(x) <- class(x)[class(x) != "sir_log"]
print(x, ...)
}
# will be exported using s3_register() in R/zzz.R
pillar_shaft.sir <- function(x, ...) {
out <- trimws(format(x))
if (has_colour()) {
# colours will anyway not work when has_colour() == FALSE,
# but then the indentation should also not be applied
out[is.na(x)] <- font_grey(" NA")
out[x == "NI"] <- font_grey_bg(" NI ")
out[x == "S"] <- font_green_bg(" S ")
out[x == "I"] <- font_orange_bg(" I ")
out[x == "SDD"] <- font_orange_bg(" SDD ")
out[x == "R"] <- font_rose_bg(" R ")
}
create_pillar_column(out, align = "left", width = 5)
}
# will be exported using s3_register() in R/zzz.R
type_sum.sir <- function(x, ...) {
"sir"
}
# will be exported using s3_register() in R/zzz.R
freq.sir <- function(x, ...) {
x_name <- deparse(substitute(x))
x_name <- gsub(".*[$]", "", x_name)
if (x_name %in% c("x", ".")) {
# try again going through system calls
x_name <- stats::na.omit(vapply(
FUN.VALUE = character(1),
sys.calls(),
function(call) {
call_txt <- as.character(call)
ifelse(call_txt[1] %like% "freq$", call_txt[length(call_txt)], character(0))
}
))[1L]
}
ab <- suppressMessages(suppressWarnings(as.ab(x_name)))
digits <- list(...)$digits
if (is.null(digits)) {
digits <- 2
}
if (!is.na(ab)) {
cleaner::freq.default(
x = x, ...,
.add_header = list(
Drug = paste0(ab_name(ab, language = NULL), " (", ab, ", ", paste(ab_atc(ab), collapse = "/"), ")"),
`Drug group` = ab_group(ab, language = NULL),
`%SI` = trimws(percentage(susceptibility(x, minimum = 0, as_percent = FALSE),
digits = digits
))
)
)
} else {
cleaner::freq.default(
x = x, ...,
.add_header = list(
`%SI` = trimws(percentage(susceptibility(x, minimum = 0, as_percent = FALSE),
digits = digits
))
)
)
}
}
# will be exported using s3_register() in R/zzz.R
get_skimmers.sir <- function(column) {
# get the variable name 'skim_variable'
name_call <- function(.data) {
calls <- sys.calls()
frms <- sys.frames()
calls_txt <- vapply(calls, function(x) paste(deparse(x), collapse = ""), FUN.VALUE = character(1))
if (any(calls_txt %like% "skim_variable", na.rm = TRUE)) {
ind <- which(calls_txt %like% "skim_variable")[1L]
vars <- tryCatch(eval(parse(text = ".data$skim_variable$sir"), envir = frms[[ind]]),
error = function(e) NULL
)
tryCatch(ab_name(as.character(calls[[length(calls)]][[2]]), language = NULL),
error = function(e) NA_character_
)
} else {
NA_character_
}
}
skimr::sfl(
skim_type = "sir",
ab_name = name_call,
count_R = count_R,
count_S = count_susceptible,
count_I = count_I,
prop_R = ~ proportion_R(., minimum = 0),
prop_S = ~ susceptibility(., minimum = 0),
prop_I = ~ proportion_I(., minimum = 0)
)
}
#' @method print sir
#' @export
#' @noRd
print.sir <- function(x, ...) {
x_name <- deparse(substitute(x))
cat("Class 'sir'\n")
# TODO for #170
# if (!is.null(attributes(x)$guideline) && !all(is.na(attributes(x)$guideline))) {
# cat(font_blue(word_wrap("These values were interpreted using ",
# font_bold(vector_and(attributes(x)$guideline, quotes = FALSE)),
# " based on ",
# vector_and(attributes(x)$method, quotes = FALSE),
# " values. ",
# "Use `sir_interpretation_history(", x_name, ")` to return a full logbook.")))
# cat("\n")
# }
print(as.character(x), quote = FALSE)
}
#' @method as.double sir
#' @export
as.double.sir <- function(x, ...) {
dbls <- rep(NA_real_, length(x))
dbls[x == "S"] <- 1
dbls[x %in% c("SDD", "I")] <- 2
dbls[x == "R"] <- 3
dbls
}
#' @method droplevels sir
#' @export
#' @noRd
droplevels.sir <- function(x, exclude = if (any(is.na(levels(x)))) NULL else NA, ...) {
x <- droplevels.factor(x, exclude = exclude, ...)
class(x) <- c("sir", "ordered", "factor")
x
}
#' @method summary sir
#' @export
#' @noRd
summary.sir <- function(object, ...) {
x <- object
n <- sum(!is.na(x))
S <- sum(x == "S", na.rm = TRUE)
SDD <- sum(x == "SDD", na.rm = TRUE)
I <- sum(x == "I", na.rm = TRUE)
R <- sum(x == "R", na.rm = TRUE)
NI <- sum(x == "NI", na.rm = TRUE)
pad <- function(x) {
if (is.na(x)) {
return("??")
}
if (x == "0%") {
x <- " 0.0%"
}
if (nchar(x) < 5) {
x <- paste0(rep(" ", 5 - nchar(x)), x)
}
x
}
value <- c(
"Class" = "sir",
"%S" = paste0(pad(percentage(S / n, digits = 1)), " (n=", S, ")"),
"%SDD" = paste0(pad(percentage(SDD / n, digits = 1)), " (n=", SDD, ")"),
"%I" = paste0(pad(percentage(I / n, digits = 1)), " (n=", I, ")"),
"%R" = paste0(pad(percentage(R / n, digits = 1)), " (n=", R, ")"),
"%NI" = paste0(pad(percentage(NI / n, digits = 1)), " (n=", NI, ")")
)
class(value) <- c("summaryDefault", "table")
value
}
#' @method [<- sir
#' @export
#' @noRd
"[<-.sir" <- function(i, j, ..., value) {
value <- as.sir(value)
y <- NextMethod()
attributes(y) <- attributes(i)
y
}
#' @method [[<- sir
#' @export
#' @noRd
"[[<-.sir" <- function(i, j, ..., value) {
value <- as.sir(value)
y <- NextMethod()
attributes(y) <- attributes(i)
y
}
#' @method c sir
#' @export
#' @noRd
c.sir <- function(...) {
lst <- list(...)
# TODO for #170
# guideline <- vapply(FUN.VALUE = character(1), lst, function(x) attributes(x)$guideline %or% NA_character_)
# mo <- vapply(FUN.VALUE = character(1), lst, function(x) attributes(x)$mo %or% NA_character_)
# ab <- vapply(FUN.VALUE = character(1), lst, function(x) attributes(x)$ab %or% NA_character_)
# method <- vapply(FUN.VALUE = character(1), lst, function(x) attributes(x)$method %or% NA_character_)
# ref_tbl <- vapply(FUN.VALUE = character(1), lst, function(x) attributes(x)$ref_tbl %or% NA_character_)
# ref_breakpoints <- vapply(FUN.VALUE = character(1), lst, function(x) attributes(x)$ref_breakpoints %or% NA_character_)
out <- as.sir(unlist(lapply(list(...), as.character)))
# TODO for #170
# if (!all(is.na(guideline))) {
# attributes(out)$guideline <- guideline
# attributes(out)$mo <- mo
# attributes(out)$ab <- ab
# attributes(out)$method <- method
# attributes(out)$ref_tbl <- ref_tbl
# attributes(out)$ref_breakpoints <- ref_breakpoints
# }
out
}
#' @method unique sir
#' @export
#' @noRd
unique.sir <- function(x, incomparables = FALSE, ...) {
y <- NextMethod()
attributes(y) <- attributes(x)
y
}
#' @method rep sir
#' @export
#' @noRd
rep.sir <- function(x, ...) {
y <- NextMethod()
attributes(y) <- attributes(x)
y
}
check_reference_data <- function(reference_data, .call_depth) {
if (!identical(reference_data, AMR::clinical_breakpoints)) {
class_sir <- vapply(FUN.VALUE = character(1), AMR::clinical_breakpoints, function(x) paste0("<", class(x), ">", collapse = " and "))
class_ref <- vapply(FUN.VALUE = character(1), reference_data, function(x) paste0("<", class(x), ">", collapse = " and "))
if (!all(names(class_sir) == names(class_ref))) {
stop_("`reference_data` must have the same column names as the 'clinical_breakpoints' data set.", call = .call_depth)
}
if (!all(class_sir == class_ref)) {
stop_("`reference_data` must be the same structure as the 'clinical_breakpoints' data set. Column '", names(class_ref[class_sir != class_ref][1]), "' is of class ", class_ref[class_sir != class_ref][1], ", but should be of class ", class_sir[class_sir != class_ref][1], ".", call = .call_depth)
}
}
}