AMR/R/sir.R

# ==================================================================== #
# TITLE:                                                               #
# AMR: An R Package for Working with Antimicrobial Resistance Data     #
#                                                                      #
# SOURCE CODE:                                                         #
# https://github.com/msberends/AMR                                     #
#                                                                      #
# PLEASE CITE THIS SOFTWARE AS:                                        #
# Berends MS, Luz CF, Friedrich AW, et al. (2022).                     #
# AMR: An R Package for Working with Antimicrobial Resistance Data.    #
# Journal of Statistical Software, 104(3), 1-31.                       #
# https://doi.org/10.18637/jss.v104.i03                                #
#                                                                      #
# Developed at the University of Groningen and the University Medical  #
# Center Groningen in The Netherlands, in collaboration with many      #
# colleagues from around the world, see our website.                   #
#                                                                      #
# This R package is free software; you can freely use and distribute   #
# it for both personal and commercial purposes under the terms of the  #
# GNU General Public License version 2.0 (GNU GPL-2), as published by  #
# the Free Software Foundation.                                        #
# We created this package for both routine data analysis and academic  #
# research and it was publicly released in the hope that it will be    #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY.              #
#                                                                      #
# Visit our website for the full manual and a complete tutorial about  #
# how to conduct AMR data analysis: https://amr-for-r.org              #
# ==================================================================== #

#' Interpret MIC and Disk Diffusion as SIR, or Clean Existing SIR Data
#'
#' @description Clean up existing SIR values, or interpret minimum inhibitory concentration (MIC) values and disk diffusion diameters according to EUCAST or CLSI. [as.sir()] transforms the input to a new class [`sir`], which is an ordered [factor] containing the levels `S`, `SDD`, `I`, `R`, `NI`.
#'
#' Breakpoints are currently implemented from EUCAST (`r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST")$guideline)))`) and CLSI (`r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI")$guideline)))`), see *Details*. All breakpoints used for interpretation are available in our [clinical_breakpoints] data set.
#' @rdname as.sir
#' @param x Vector of values (for class [`mic`]: MIC values in mg/L, for class [`disk`]: a disk diffusion radius in millimetres).
#' @param mo A vector (or column name) with [character]s that can be coerced to valid microorganism codes with [as.mo()], can be left empty to determine it automatically.
#' @param ab A vector (or column name) with [character]s that can be coerced to a valid antimicrobial drug code with [as.ab()].
#' @param uti (Urinary Tract Infection) a vector (or column name) with [logical]s (`TRUE` or `FALSE`) to specify whether a UTI specific interpretation from the guideline should be chosen. For using [as.sir()] on a [data.frame], this can also be a column containing [logical]s or when left blank, the data set will be searched for a column 'specimen', and rows within this column containing 'urin' (such as 'urine', 'urina') will be regarded isolates from a UTI. See *Examples*.
#' @inheritParams first_isolate
#' @param guideline A guideline name (or column name) to use for SIR interpretation. Defaults to `r AMR::clinical_breakpoints$guideline[1]` (the latest implemented EUCAST guideline in the [AMR::clinical_breakpoints] data set), but can be set with the package option [`AMR_guideline`][AMR-options]. Currently supports EUCAST (`r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST")$guideline)))`) and CLSI (`r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI")$guideline)))`), see *Details*. Using a column name allows for straightforward interpretation of historical data, which must be analysed in the context of, for example, different years.
#' @param capped_mic_handling A [character] string that controls how MIC values with a cap (i.e., starting with `<`, `<=`, `>`, or `>=`) are interpreted. Supports the following options:
#'
#' `"none"`
#'   * `<=` and `>=` are treated as-is.
#'   * `<` and `>` are treated as-is.
#'
#' `"conservative"`
#'   * `<=` and `>=` return `"NI"` (non-interpretable) if the MIC is within the breakpoint guideline range.
#'   * `<` always returns `"S"`, and `>` always returns `"R"`.
#'
#' `"standard"` (default)
#'   * `<=` and `>=` return `"NI"` (non-interpretable) if the MIC is within the breakpoint guideline range.
#'   * `<` and `>` are treated as-is.
#'
#' `"inverse"`
#'   * `<=` and `>=` are treated as-is.
#'   * `<` always returns `"S"`, and `>` always returns `"R"`.
#'
#' The default `"standard"` setting ensures cautious handling of uncertain values while preserving interpretability. This option can also be set with the package option [`AMR_capped_mic_handling`][AMR-options].
#' @param add_intrinsic_resistance *(only useful when using a EUCAST guideline)* a [logical] to indicate whether intrinsic antibiotic resistance must also be considered for applicable bug-drug combinations, meaning that e.g. ampicillin will always return "R" in *Klebsiella* species. Determination is based on the [intrinsic_resistant] data set, that itself is based on `r format_eucast_version_nr(3.3)`.
#' @param substitute_missing_r_breakpoint A [logical] to indicate that a missing clinical breakpoints for R (resistant) must be substituted with R - the default is `FALSE`. Some (especially CLSI) breakpoints only have a breakpoint for S, meaning the outcome can only be `"S"` or `NA`. Setting this to `TRUE` will convert the `NA`s to `"R"` only if the R breakpoint is missing. Can also be set with the package option [`AMR_substitute_missing_r_breakpoint`][AMR-options].
#' @param include_screening A [logical] to indicate that clinical breakpoints for screening are allowed - the default is `FALSE`. Can also be set with the package option [`AMR_include_screening`][AMR-options].
#' @param include_PKPD A [logical] to indicate that PK/PD clinical breakpoints must be applied as a last resort - the default is `TRUE`. Can also be set with the package option [`AMR_include_PKPD`][AMR-options].
#' @param breakpoint_type The type of breakpoints to use, either `r vector_or(clinical_breakpoints$type)`. ECOFF stands for Epidemiological Cut-Off values. The default is `"human"`, which can also be set with the package option [`AMR_breakpoint_type`][AMR-options]. If `host` is set to values of veterinary species, this will automatically be set to `"animal"`.
#' @param host A vector (or column name) with [character]s to indicate the host. Only useful for veterinary breakpoints, as it requires `breakpoint_type = "animal"`. The values can be any text resembling the animal species, even in any of the `r length(LANGUAGES_SUPPORTED)` supported languages of this package. For foreign languages, be sure to set the language with [set_AMR_locale()] (though it will be automatically guessed based on the system language).
#' @param verbose A [logical] to indicate that all notes should be printed during interpretation of MIC values or disk diffusion values.
#' @param reference_data A [data.frame] to be used for interpretation, which defaults to the [clinical_breakpoints] data set. Changing this argument allows for using own interpretation guidelines. This argument must contain a data set that is equal in structure to the [clinical_breakpoints] data set (same column names and column types). Please note that the `guideline` argument will be ignored when `reference_data` is manually set.
#' @param threshold Maximum fraction of invalid antimicrobial interpretations of `x`, see *Examples*.
#' @param conserve_capped_values Deprecated, use `capped_mic_handling` instead.
#' @param ... For using on a [data.frame]: names of columns to apply [as.sir()] on (supports tidy selection such as `column1:column4`). Otherwise: arguments passed on to methods.
#' @details
#' *Note: The clinical breakpoints in this package were validated through, and imported from, [WHONET](https://whonet.org). The public use of this `AMR` package has been endorsed by both CLSI and EUCAST. See [clinical_breakpoints] for more information.*
#'
#' ### How it Works
#'
#' The [as.sir()] function can work in four ways:
#'
#' 1. For **cleaning raw / untransformed data**. The data will be cleaned to only contain valid values, namely: **S** for susceptible, **I** for intermediate or 'susceptible, increased exposure', **R** for resistant, **NI** for non-interpretable, and **SDD** for susceptible dose-dependent. Each of these can be set using a [regular expression][base::regex]. Furthermore, [as.sir()] will try its best to clean with some intelligence. For example, mixed values with SIR interpretations and MIC values such as `"<0.25; S"` will be coerced to `"S"`. Combined interpretations for multiple test methods (as seen in laboratory records) such as `"S; S"` will be coerced to `"S"`, but a value like `"S; I"` will return `NA` with a warning that the input is invalid.
#'
#' 2. For **interpreting minimum inhibitory concentration (MIC) values** according to EUCAST or CLSI. You must clean your MIC values first using [as.mic()], that also gives your columns the new data class [`mic`]. Also, be sure to have a column with microorganism names or codes. It will be found automatically, but can be set manually using the `mo` argument.
#'    * Example to apply using `dplyr`:
#'      ```r
#'      your_data %>% mutate_if(is.mic, as.sir)
#'      your_data %>% mutate(across(where(is.mic), as.sir))
#'      your_data %>% mutate_if(is.mic, as.sir, ab = "column_with_antibiotics", mo = "column_with_microorganisms")
#'      your_data %>% mutate_if(is.mic, as.sir, ab = c("cipro", "ampicillin", ...), mo = c("E. coli", "K. pneumoniae", ...))
#'
#'      # for veterinary breakpoints, also set `host`:
#'      your_data %>% mutate_if(is.mic, as.sir, host = "column_with_animal_species", guideline = "CLSI")
#'      ```
#'    * Operators like "<=" will be stripped before interpretation. When using `capped_mic_handling = "conservative"`, an MIC value of e.g. ">2" will always return "R", even if the breakpoint according to the chosen guideline is ">=4". This is to prevent that capped values from raw laboratory data would not be treated conservatively. The default behaviour (`capped_mic_handling = "standard"`) considers ">2" to be lower than ">=4" and might in this case return "S" or "I".
#'    * **Note:** When using CLSI as the guideline, MIC values must be log2-based doubling dilutions. Values not in this format, will be automatically rounded up to the nearest log2 level as CLSI instructs, and a warning will be thrown.
#'
#' 3. For **interpreting disk diffusion diameters** according to EUCAST or CLSI. You must clean your disk zones first using [as.disk()], that also gives your columns the new data class [`disk`]. Also, be sure to have a column with microorganism names or codes. It will be found automatically, but can be set manually using the `mo` argument.
#'    * Example to apply using `dplyr`:
#'      ```r
#'      your_data %>% mutate_if(is.disk, as.sir)
#'      your_data %>% mutate(across(where(is.disk), as.sir))
#'      your_data %>% mutate_if(is.disk, as.sir, ab = "column_with_antibiotics", mo = "column_with_microorganisms")
#'      your_data %>% mutate_if(is.disk, as.sir, ab = c("cipro", "ampicillin", ...), mo = c("E. coli", "K. pneumoniae", ...))
#'
#'      # for veterinary breakpoints, also set `host`:
#'      your_data %>% mutate_if(is.disk, as.sir, host = "column_with_animal_species", guideline = "CLSI")
#'      ```
#'
#' 4. For **interpreting a complete data set**, with automatic determination of MIC values, disk diffusion diameters, microorganism names or codes, and antimicrobial test results. This is done very simply by running `as.sir(your_data)`.
#'
#' **For points 2, 3 and 4: Use [sir_interpretation_history()]** to retrieve a [data.frame] with all results of all previous [as.sir()] calls. It also contains notes about interpretation, and the exact input and output values.
#'
#' ### Supported Guidelines
#'
#' For interpreting MIC values as well as disk diffusion diameters, currently implemented guidelines are:
#' - For **clinical microbiology**: EUCAST `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST" & type == "human")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST" & type == "human")$guideline)))` and CLSI `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "human")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "human")$guideline)))`;
#' - For **veterinary microbiology**: EUCAST `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST" & type == "animal")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST" & type == "animal")$guideline)))` and CLSI `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "animal")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "animal")$guideline)))`;
#' - For **ECOFFs** (Epidemiological Cut-off Values): EUCAST `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST" & type == "ECOFF")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST" & type == "ECOFF")$guideline)))` and CLSI `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "ECOFF")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "ECOFF")$guideline)))`.
#'
#' The `guideline` argument must be set to e.g., ``r paste0('"', subset(AMR::clinical_breakpoints, guideline %like% "EUCAST")$guideline[1], '"')`` or ``r paste0('"', subset(AMR::clinical_breakpoints, guideline %like% "CLSI")$guideline[1], '"')``. By simply using `"EUCAST"` (the default) or `"CLSI"` as input, the latest included version of that guideline will automatically be selected. Importantly, using a column name of your data instead, allows for straightforward interpretation of historical data that must be analysed in the context of, for example, different years.
#'
#' You can set your own data set using the `reference_data` argument. The `guideline` argument will then be ignored.
#'
#' It is also possible to set the default guideline with the package option [`AMR_guideline`][AMR-options] (e.g. in your `.Rprofile` file), such as:
#'
#' ```
#'   options(AMR_guideline = "CLSI")
#'   options(AMR_guideline = "CLSI 2018")
#'   options(AMR_guideline = "EUCAST 2020")
#'   # or to reset:
#'   options(AMR_guideline = NULL)
#' ```
#'
#' For veterinary guidelines, these might be the best options:
#'
#' ```
#'   options(AMR_guideline = "CLSI")
#'   options(AMR_breakpoint_type = "animal")
#' ```
#'
###### TODO #187 When applying veterinary breakpoints (by setting `host` or by setting `breakpoint_type = "animal"`), the [CLSI VET09 guideline](https://clsi.org/standards/products/veterinary-medicine/documents/vet09/) will be applied to cope with missing animal species-specific breakpoints.
#'
#' ### After Interpretation
#'
#' After using [as.sir()], you can use the [eucast_rules()] defined by EUCAST to (1) apply inferred susceptibility and resistance based on results of other antimicrobials and (2) apply intrinsic resistance based on taxonomic properties of a microorganism.
#'
#' To determine which isolates are multi-drug resistant, be sure to run [mdro()] (which applies the MDR/PDR/XDR guideline from 2012 at default) on a data set that contains S/I/R values. Read more about [interpreting multidrug-resistant organisms here][mdro()].
#'
#' ### Other
#'
#' The function [is.sir()] detects if the input contains class `sir`. If the input is a [data.frame] or [list], it iterates over all columns/items and returns a [logical] vector.
#'
#' The base R function [as.double()] can be used to retrieve quantitative values from a `sir` object: `"S"` = 1, `"I"`/`"SDD"` = 2, `"R"` = 3. All other values are rendered `NA` . **Note:** Do not use `as.integer()`, since that (because of how R works internally) will return the factor level indices, and not these aforementioned quantitative values.
#'
#' The function [is_sir_eligible()] returns `TRUE` when a column contains at most 5% invalid antimicrobial interpretations (not S and/or I and/or R and/or NI and/or SDD), and `FALSE` otherwise. The threshold of 5% can be set with the `threshold` argument. If the input is a [data.frame], it iterates over all columns and returns a [logical] vector.
#' @section Interpretation of SIR:
#' In 2019, the European Committee on Antimicrobial Susceptibility Testing (EUCAST) has decided to change the definitions of susceptibility testing categories S, I, and R (<https://www.eucast.org/newsiandr>).
#'
#' This AMR package follows insight; use [susceptibility()] (equal to [proportion_SI()]) to determine antimicrobial susceptibility and [count_susceptible()] (equal to [count_SI()]) to count susceptible isolates.
#' @return Ordered [factor] with new class `sir`
#' @aliases sir
#' @export
#' @seealso [as.mic()], [as.disk()], [as.mo()]
#' @source
#' For interpretations of minimum inhibitory concentration (MIC) values and disk diffusion diameters:
#'
#' - **CLSI M39: Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data**, `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI")$guideline)))`, *Clinical and Laboratory Standards Institute* (CLSI). <https://clsi.org/standards/products/microbiology/documents/m39/>.
#' - **CLSI M100: Performance Standard for Antimicrobial Susceptibility Testing**, `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type != "animal")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type != "animal")$guideline)))`, *Clinical and Laboratory Standards Institute* (CLSI). <https://clsi.org/standards/products/microbiology/documents/m100/>.
#' - **CLSI VET01: Performance Standards for Antimicrobial Disk and Dilution Susceptibility Tests for Bacteria Isolated From Animals**, `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "animal")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "animal")$guideline)))`, *Clinical and Laboratory Standards Institute* (CLSI). <https://clsi.org/standards/products/veterinary-medicine/documents/vet01/>.
###### TODO  - **CLSI VET09: Understanding Susceptibility Test Data as a Component of Antimicrobial Stewardship in Veterinary Settings**, `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "animal")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "animal")$guideline)))`, *Clinical and Laboratory Standards Institute* (CLSI). <https://clsi.org/standards/products/veterinary-medicine/documents/vet09/>.
#' - **EUCAST Breakpoint tables for interpretation of MICs and zone diameters**, `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST")$guideline)))`, *European Committee on Antimicrobial Susceptibility Testing* (EUCAST). <https://www.eucast.org/clinical_breakpoints>.
#' - **WHONET** as a source for machine-reading the clinical breakpoints ([read more here](https://amr-for-r.org/reference/clinical_breakpoints.html#imported-from-whonet)), 1989-`r max(as.integer(gsub("[^0-9]", "", AMR::clinical_breakpoints$guideline)))`, *WHO Collaborating Centre for Surveillance of Antimicrobial Resistance*. <https://whonet.org/>.
#'
#' @inheritSection AMR Download Our Reference Data
#' @examples
#' example_isolates
#' summary(example_isolates) # see all SIR results at a glance
#'
#' # For INTERPRETING disk diffusion and MIC values -----------------------
#'
#' # example data sets, with combined MIC values and disk zones
#' df_wide <- data.frame(
#'   microorganism = "Escherichia coli",
#'   amoxicillin = as.mic(8),
#'   cipro = as.mic(0.256),
#'   tobra = as.disk(16),
#'   genta = as.disk(18),
#'   ERY = "R"
#' )
#' df_long <- data.frame(
#'   bacteria = rep("Escherichia coli", 4),
#'   antibiotic = c("amoxicillin", "cipro", "tobra", "genta"),
#'   mics = as.mic(c(0.01, 1, 4, 8)),
#'   disks = as.disk(c(6, 10, 14, 18)),
#'   guideline = c("EUCAST 2021", "EUCAST 2022", "EUCAST 2023", "EUCAST 2024")
#' )
#'
#' \donttest{
#' ## Using dplyr -------------------------------------------------
#' if (require("dplyr")) {
#'   # approaches that all work without additional arguments:
#'   df_wide %>% mutate_if(is.mic, as.sir)
#'   df_wide %>% mutate_if(function(x) is.mic(x) | is.disk(x), as.sir)
#'   df_wide %>% mutate(across(where(is.mic), as.sir))
#'   df_wide %>% mutate_at(vars(amoxicillin:tobra), as.sir)
#'   df_wide %>% mutate(across(amoxicillin:tobra, as.sir))
#'
#'   # approaches that all work with additional arguments:
#'   df_long %>%
#'     # given a certain data type, e.g. MIC values
#'     mutate_if(is.mic, as.sir,
#'       mo = "bacteria",
#'       ab = "antibiotic",
#'       guideline = "guideline"
#'     )
#'   df_long %>%
#'     mutate(across(
#'       where(is.mic),
#'       function(x) {
#'         as.sir(x,
#'           mo = "bacteria",
#'           ab = "antibiotic",
#'           guideline = "CLSI"
#'         )
#'       }
#'     ))
#'   df_wide %>%
#'     # given certain columns, e.g. from 'cipro' to 'genta'
#'     mutate_at(vars(cipro:genta), as.sir,
#'       mo = "bacteria",
#'       guideline = "CLSI"
#'     )
#'   df_wide %>%
#'     mutate(across(
#'       cipro:genta,
#'       function(x) {
#'         as.sir(x,
#'           mo = "bacteria",
#'           guideline = "CLSI"
#'         )
#'       }
#'     ))
#'
#'   # for veterinary breakpoints, add 'host':
#'   df_long$animal_species <- c("cats", "dogs", "horses", "cattle")
#'   df_long %>%
#'     # given a certain data type, e.g. MIC values
#'     mutate_if(is.mic, as.sir,
#'       mo = "bacteria",
#'       ab = "antibiotic",
#'       host = "animal_species",
#'       guideline = "CLSI"
#'     )
#'   df_long %>%
#'     mutate(across(
#'       where(is.mic),
#'       function(x) {
#'         as.sir(x,
#'           mo = "bacteria",
#'           ab = "antibiotic",
#'           host = "animal_species",
#'           guideline = "CLSI"
#'         )
#'       }
#'     ))
#'   df_wide %>%
#'     mutate_at(vars(cipro:genta), as.sir,
#'       mo = "bacteria",
#'       ab = "antibiotic",
#'       host = "animal_species",
#'       guideline = "CLSI"
#'     )
#'   df_wide %>%
#'     mutate(across(
#'       cipro:genta,
#'       function(x) {
#'         as.sir(x,
#'           mo = "bacteria",
#'           host = "animal_species",
#'           guideline = "CLSI"
#'         )
#'       }
#'     ))
#'
#'   # to include information about urinary tract infections (UTI)
#'   data.frame(
#'     mo = "E. coli",
#'     nitrofuratoin = c("<= 2", 32),
#'     from_the_bladder = c(TRUE, FALSE)
#'   ) %>%
#'     as.sir(uti = "from_the_bladder")
#'
#'   data.frame(
#'     mo = "E. coli",
#'     nitrofuratoin = c("<= 2", 32),
#'     specimen = c("urine", "blood")
#'   ) %>%
#'     as.sir() # automatically determines urine isolates
#'
#'   df_wide %>%
#'     mutate_at(vars(cipro:genta), as.sir, mo = "E. coli", uti = TRUE)
#' }
#'
#'
#' ## Using base R ------------------------------------------------
#'
#' as.sir(df_wide)
#'
#' # return a 'logbook' about the results:
#' sir_interpretation_history()
#'
#' # for single values
#' as.sir(
#'   x = as.mic(2),
#'   mo = as.mo("S. pneumoniae"),
#'   ab = "AMP",
#'   guideline = "EUCAST"
#' )
#'
#' as.sir(
#'   x = as.disk(18),
#'   mo = "Strep pneu", # `mo` will be coerced with as.mo()
#'   ab = "ampicillin", # and `ab` with as.ab()
#'   guideline = "EUCAST"
#' )
#'
#'
#' # For CLEANING existing SIR values ------------------------------------
#'
#' as.sir(c("S", "SDD", "I", "R", "NI", "A", "B", "C"))
#' as.sir("<= 0.002; S") # will return "S"
#' sir_data <- as.sir(c(rep("S", 474), rep("I", 36), rep("R", 370)))
#' is.sir(sir_data)
#' plot(sir_data) # for percentages
#' barplot(sir_data) # for frequencies
#'
#' # as common in R, you can use as.integer() to return factor indices:
#' as.integer(as.sir(c("S", "SDD", "I", "R", "NI", NA)))
#' # but for computational use, as.double() will return 1 for S, 2 for I/SDD, and 3 for R:
#' as.double(as.sir(c("S", "SDD", "I", "R", "NI", NA)))
#'
#' # the dplyr way
#' if (require("dplyr")) {
#'   example_isolates %>%
#'     mutate_at(vars(PEN:RIF), as.sir)
#'   # same:
#'   example_isolates %>%
#'     as.sir(PEN:RIF)
#'
#'   # fastest way to transform all columns with already valid AMR results to class `sir`:
#'   example_isolates %>%
#'     mutate_if(is_sir_eligible, as.sir)
#'
#'   # since dplyr 1.0.0, this can also be:
#'   # example_isolates %>%
#'   #   mutate(across(where(is_sir_eligible), as.sir))
#' }
#' }
as.sir <- function(x, ...) {
  UseMethod("as.sir")
}

as_sir_structure <- function(x,
                             guideline = NULL,
                             mo = NULL,
                             ab = NULL,
                             method = NULL,
                             ref_tbl = NULL,
                             ref_breakpoints = NULL) {
  out <- structure(
    factor(as.character(unlist(unname(x))),
      levels = c("S", "SDD", "I", "R", "NI"),
      ordered = TRUE
    ),
    # TODO for #170
    # guideline = guideline,
    # mo = mo,
    # ab = ab,
    # method = method,
    # ref_tbl = ref_tbl,
    # ref_breakpoints = ref_breakpoints,
    class = c("sir", "ordered", "factor")
  )
}

#' @rdname as.sir
#' @details `NA_sir_` is a missing value of the new `sir` class, analogous to e.g. base \R's [`NA_character_`][base::NA].
#' @format NULL
#' @export
NA_sir_ <- as_sir_structure(NA_character_)

#' @rdname as.sir
#' @export
is.sir <- function(x) {
  if (identical(typeof(x), "list")) {
    unname(vapply(FUN.VALUE = logical(1), x, is.sir))
  } else {
    isTRUE(inherits(x, "sir"))
  }
}

#' @rdname as.sir
#' @export
is_sir_eligible <- function(x, threshold = 0.05) {
  meet_criteria(threshold, allow_class = "numeric", has_length = 1)

  if (identical(typeof(x), "list")) {
    # iterate this function over all columns
    return(unname(vapply(FUN.VALUE = logical(1), x, is_sir_eligible)))
  }

  stop_if(NCOL(x) > 1, "`x` must be a one-dimensional vector.")
  if (any(c(
    "numeric",
    "integer",
    "mo",
    "ab",
    "Date",
    "POSIXt",
    "raw",
    "hms",
    "mic",
    "disk"
  )
  %in% class(x))) {
    # no transformation needed
    return(FALSE)
  } else if (all(x %in% c("S", "SDD", "I", "R", "NI", NA)) & !all(is.na(x))) {
    return(TRUE)
  } else if (!any(c("S", "SDD", "I", "R", "NI") %in% x, na.rm = TRUE) & !all(is.na(x))) {
    return(FALSE)
  } else {
    x <- x[!is.na(x) & !is.null(x) & !x %in% c("", "-", "NULL")]
    if (length(x) == 0) {
      # no other values than empty
      cur_col <- get_current_column()
      if (!is.null(cur_col)) {
        ab <- suppressWarnings(as.ab(cur_col, fast_mode = TRUE, info = FALSE))
        if (!is.na(ab)) {
          # this is a valid antibiotic drug code
          message_(
            "Column '", font_bold(cur_col), "' is SIR eligible (despite only having empty values), since it seems to be ",
            ab_name(ab, language = NULL, tolower = TRUE), " (", ab, ")"
          )
          return(TRUE)
        }
      }
      # all values empty and no antibiotic col name - return FALSE
      return(FALSE)
    }
    # transform all values and see if it meets the set threshold
    checked <- suppressWarnings(as.sir(x))
    outcome <- sum(is.na(checked)) / length(x)
    outcome <= threshold
  }
}

#' @rdname as.sir
#' @export
#' @param S,I,R,NI,SDD A case-independent [regular expression][base::regex] to translate input to this result. This regular expression will be run *after* all non-letters and whitespaces are removed from the input.
#' @param info A [logical] to print information about the process.
# extra param: warn (logical, to never throw a warning)
as.sir.default <- function(x,
                           S = "^(S|U)+$",
                           I = "^(I)+$",
                           R = "^(R)+$",
                           NI = "^(N|NI|V)+$",
                           SDD = "^(SDD|D|H)+$",
                           info = TRUE,
                           ...) {
  meet_criteria(S, allow_class = "character", has_length = 1)
  meet_criteria(I, allow_class = "character", has_length = 1)
  meet_criteria(R, allow_class = "character", has_length = 1)
  meet_criteria(NI, allow_class = "character", has_length = 1)
  meet_criteria(SDD, allow_class = "character", has_length = 1)
  meet_criteria(info, allow_class = "logical", has_length = 1)
  if (inherits(x, "sir")) {
    return(as_sir_structure(x))
  }

  x.bak <- x
  x <- as.character(x) # this is needed to prevent the vctrs pkg from throwing an error

  if (inherits(x.bak, c("numeric", "integer")) && all(x %in% c(1:3, NA))) {
    # support haven package for importing e.g., from SPSS - it adds the 'labels' attribute
    lbls <- attributes(x.bak)$labels
    if (!is.null(lbls) && all(c("S", "I", "R") %in% names(lbls)) && all(c(1:3) %in% lbls)) {
      x[x.bak == 1] <- names(lbls[lbls == 1])
      x[x.bak == 2] <- names(lbls[lbls == 2])
      x[x.bak == 3] <- names(lbls[lbls == 3])
    } else {
      x[x.bak == 1] <- "S"
      x[x.bak == 2] <- "I"
      x[x.bak == 3] <- "R"
    }
  } else if (inherits(x.bak, "character") && all(x %in% c("1", "2", "3", "S", "I", "R", NA_character_))) {
    x[x.bak == "1"] <- "S"
    x[x.bak == "2"] <- "I"
    x[x.bak == "3"] <- "R"
  } else if (inherits(x.bak, "character") && all(x %in% c("1", "2", "3", "4", "5", "S", "SDD", "I", "R", "NI", NA_character_))) {
    x[x.bak == "1"] <- "S"
    x[x.bak == "2"] <- "SDD"
    x[x.bak == "3"] <- "I"
    x[x.bak == "4"] <- "R"
    x[x.bak == "5"] <- "NI"
  } else if (!all(is.na(x)) && !identical(levels(x), c("S", "SDD", "I", "R", "NI")) && !all(x %in% c("S", "SDD", "I", "R", "NI", NA))) {
    if (all(x %unlike% "(S|I|R)", na.rm = TRUE)) {
      # check if they are actually MICs or disks
      if (all_valid_mics(x)) {
        warning_("in `as.sir()`: input values were guessed to be MIC values - preferably transform them with `as.mic()` before running `as.sir()`.")
        return(as.sir(as.mic(x), ...))
      } else if (all_valid_disks(x)) {
        warning_("in `as.sir()`: input values were guessed to be disk diffusion values - preferably transform them with `as.disk()` before running `as.sir()`.")
        return(as.sir(as.disk(x), ...))
      }
    }

    # trim leading and trailing spaces, new lines, etc.
    x <- trimws2(as.character(unlist(x)))
    x[x %in% c(NA, "", "-", "NULL")] <- NA_character_
    x.bak <- x

    na_before <- length(x[is.na(x)])

    # correct for translations
    trans_R <- unlist(TRANSLATIONS[
      which(TRANSLATIONS$pattern == "Resistant"),
      LANGUAGES_SUPPORTED[LANGUAGES_SUPPORTED %in% colnames(TRANSLATIONS)]
    ])
    trans_S <- unlist(TRANSLATIONS[
      which(TRANSLATIONS$pattern == "Susceptible"),
      LANGUAGES_SUPPORTED[LANGUAGES_SUPPORTED %in% colnames(TRANSLATIONS)]
    ])
    trans_I <- unlist(TRANSLATIONS[
      which(TRANSLATIONS$pattern %in% c("Incr. exposure", "Susceptible, incr. exp.", "Intermediate")),
      LANGUAGES_SUPPORTED[LANGUAGES_SUPPORTED %in% colnames(TRANSLATIONS)]
    ])
    x <- gsub(paste0(unique(trans_R[!is.na(trans_R)]), collapse = "|"), "R", x, ignore.case = TRUE)
    x <- gsub(paste0(unique(trans_S[!is.na(trans_S)]), collapse = "|"), "S", x, ignore.case = TRUE)
    x <- gsub(paste0(unique(trans_I[!is.na(trans_I)]), collapse = "|"), "I", x, ignore.case = TRUE)
    # replace all English textual input
    x[x %like% "([^a-z]|^)res(is(tant)?)?"] <- "R"
    x[x %like% "([^a-z]|^)sus(cep(tible)?)?"] <- "S"
    x[x %like% "not|non"] <- "NI"
    x[x %like% "([^a-z]|^)int(er(mediate)?)?|incr.*exp"] <- "I"
    x[x %like% "dose"] <- "SDD"
    x <- gsub("[^A-Z]+", "", x, perl = TRUE)
    # apply regexes set by user
    x[x %like% S] <- "S"
    x[x %like% I] <- "I"
    x[x %like% R] <- "R"
    x[x %like% NI] <- "NI"
    x[x %like% SDD] <- "SDD"
    x[!x %in% c("S", "SDD", "I", "R", "NI")] <- NA_character_
    na_after <- length(x[is.na(x) | x == ""])

    if (!isFALSE(list(...)$warn)) { # so as.sir(..., warn = FALSE) will never throw a warning
      if (na_before != na_after) {
        list_missing <- x.bak[is.na(x) & !is.na(x.bak) & x.bak != ""] %pm>%
          unique() %pm>%
          sort() %pm>%
          vector_and(quotes = TRUE)
        cur_col <- get_current_column()
        warning_("in `as.sir()`: ", na_after - na_before, " result",
          ifelse(na_after - na_before > 1, "s", ""),
          ifelse(is.null(cur_col), "", paste0(" in index '", cur_col, "'")),
          " truncated (",
          round(((na_after - na_before) / length(x)) * 100),
          "%) that were invalid antimicrobial interpretations: ",
          list_missing,
          call = FALSE
        )
      }
    }
  }

  as_sir_structure(x)
}

#' @rdname as.sir
#' @export
as.sir.mic <- function(x,
                       mo = NULL,
                       ab = deparse(substitute(x)),
                       guideline = getOption("AMR_guideline", "EUCAST"),
                       uti = NULL,
                       capped_mic_handling = getOption("AMR_capped_mic_handling", "standard"),
                       add_intrinsic_resistance = FALSE,
                       reference_data = AMR::clinical_breakpoints,
                       substitute_missing_r_breakpoint = getOption("AMR_substitute_missing_r_breakpoint", FALSE),
                       include_screening = getOption("AMR_include_screening", FALSE),
                       include_PKPD = getOption("AMR_include_PKPD", TRUE),
                       breakpoint_type = getOption("AMR_breakpoint_type", "human"),
                       host = NULL,
                       verbose = FALSE,
                       info = TRUE,
                       conserve_capped_values = NULL,
                       ...) {
  as_sir_method(
    method_short = "mic",
    method_long = "MIC values",
    x = x,
    mo = mo,
    ab = ab,
    guideline = guideline,
    uti = uti,
    capped_mic_handling = capped_mic_handling,
    add_intrinsic_resistance = add_intrinsic_resistance,
    reference_data = reference_data,
    substitute_missing_r_breakpoint = substitute_missing_r_breakpoint,
    include_screening = include_screening,
    include_PKPD = include_PKPD,
    breakpoint_type = breakpoint_type,
    host = host,
    verbose = verbose,
    info = info,
    conserve_capped_values = conserve_capped_values,
    ...
  )
}

#' @rdname as.sir
#' @export
as.sir.disk <- function(x,
                        mo = NULL,
                        ab = deparse(substitute(x)),
                        guideline = getOption("AMR_guideline", "EUCAST"),
                        uti = NULL,
                        add_intrinsic_resistance = FALSE,
                        reference_data = AMR::clinical_breakpoints,
                        substitute_missing_r_breakpoint = getOption("AMR_substitute_missing_r_breakpoint", FALSE),
                        include_screening = getOption("AMR_include_screening", FALSE),
                        include_PKPD = getOption("AMR_include_PKPD", TRUE),
                        breakpoint_type = getOption("AMR_breakpoint_type", "human"),
                        host = NULL,
                        verbose = FALSE,
                        info = TRUE,
                        ...) {
  as_sir_method(
    method_short = "disk",
    method_long = "disk diffusion zones",
    x = x,
    mo = mo,
    ab = ab,
    guideline = guideline,
    uti = uti,
    capped_mic_handling = "standard", # will be ignored for non-MIC anyway
    add_intrinsic_resistance = add_intrinsic_resistance,
    reference_data = reference_data,
    substitute_missing_r_breakpoint = substitute_missing_r_breakpoint,
    include_screening = include_screening,
    include_PKPD = include_PKPD,
    breakpoint_type = breakpoint_type,
    host = host,
    verbose = verbose,
    info = info,
    ...
  )
}

#' @rdname as.sir
#' @export
as.sir.data.frame <- function(x,
                              ...,
                              col_mo = NULL,
                              guideline = getOption("AMR_guideline", "EUCAST"),
                              uti = NULL,
                              capped_mic_handling = getOption("AMR_capped_mic_handling", "standard"),
                              add_intrinsic_resistance = FALSE,
                              reference_data = AMR::clinical_breakpoints,
                              substitute_missing_r_breakpoint = getOption("AMR_substitute_missing_r_breakpoint", FALSE),
                              include_screening = getOption("AMR_include_screening", FALSE),
                              include_PKPD = getOption("AMR_include_PKPD", TRUE),
                              breakpoint_type = getOption("AMR_breakpoint_type", "human"),
                              host = NULL,
                              verbose = FALSE,
                              info = TRUE,
                              conserve_capped_values = NULL) {
  meet_criteria(x, allow_class = "data.frame") # will also check for dimensions > 0
  meet_criteria(col_mo, allow_class = "character", is_in = colnames(x), allow_NULL = TRUE)
  meet_criteria(guideline, allow_class = "character")
  meet_criteria(uti, allow_class = c("logical", "character"), allow_NULL = TRUE, allow_NA = TRUE)
  meet_criteria(capped_mic_handling, allow_class = "character", has_length = 1, is_in = c("standard", "conservative", "none", "inverse"))
  meet_criteria(add_intrinsic_resistance, allow_class = "logical", has_length = 1)
  meet_criteria(reference_data, allow_class = "data.frame")
  meet_criteria(substitute_missing_r_breakpoint, allow_class = "logical", has_length = 1)
  meet_criteria(include_screening, allow_class = "logical", has_length = 1)
  meet_criteria(include_PKPD, allow_class = "logical", has_length = 1)
  meet_criteria(breakpoint_type, allow_class = "character", is_in = reference_data$type, has_length = 1)
  meet_criteria(host, allow_class = c("character", "factor"), allow_NULL = TRUE, allow_NA = TRUE)
  meet_criteria(verbose, allow_class = "logical", has_length = 1)
  meet_criteria(info, allow_class = "logical", has_length = 1)
  x.bak <- x
  for (i in seq_len(ncol(x))) {
    # don't keep factors, overwriting them is hard
    if (is.factor(x[, i, drop = TRUE])) {
      x[, i] <- as.character(x[, i, drop = TRUE])
    }
  }

  # -- MO
  col_mo.bak <- col_mo
  if (is.null(col_mo)) {
    col_mo <- search_type_in_df(x = x, type = "mo", info = FALSE)
  }

  # -- host
  if (missing(breakpoint_type) && any(host %in% clinical_breakpoints$host[!clinical_breakpoints$host %in% c("human", "ECOFF")], na.rm = TRUE)) {
    if (isTRUE(info)) message_("Assuming `breakpoint_type = \"animal\"` since `host` contains animal species.")
    breakpoint_type <- "animal"
  } else if (any(!suppressMessages(convert_host(host)) %in% c("human", "ECOFF"), na.rm = TRUE)) {
    if (isTRUE(info)) message_("Assuming `breakpoint_type = \"animal\"`.")
    breakpoint_type <- "animal"
  }
  if (breakpoint_type == "animal") {
    if (is.null(host)) {
      host <- search_type_in_df(x = x, type = "host", add_col_prefix = FALSE)
    } else if (length(host) == 1 && as.character(host) %in% colnames(x)) {
      host <- x[[as.character(host)]]
    }
  } else {
    host <- breakpoint_type
  }

  # -- UTIs
  col_uti <- uti
  if (is.null(col_uti)) {
    col_uti <- search_type_in_df(x = x, type = "uti", add_col_prefix = FALSE)
  }
  if (!is.null(col_uti)) {
    if (is.logical(col_uti)) {
      # already a logical vector as input
      if (length(col_uti) == 1) {
        uti <- rep(col_uti, NROW(x))
      } else {
        uti <- col_uti
      }
    } else {
      # column found, transform to logical
      stop_if(
        length(col_uti) != 1 | !col_uti %in% colnames(x),
        "argument `uti` must be a [logical] vector, of must be a single column name of `x`"
      )
      uti <- as.logical(x[, col_uti, drop = TRUE])
    }
  } else {
    # col_uti is still NULL - look for specimen column and make logicals of the urines
    col_specimen <- suppressMessages(search_type_in_df(x = x, type = "specimen"))
    if (!is.null(col_specimen)) {
      uti <- x[, col_specimen, drop = TRUE] %like% "urin"
      values <- sort(unique(x[uti, col_specimen, drop = TRUE]))
      if (length(values) > 1) {
        plural <- c("s", "", "")
      } else {
        plural <- c("", "s", "a ")
      }
      if (isTRUE(info)) {
        message_(
          "Assuming value", plural[1], " ",
          vector_and(values, quotes = TRUE),
          " in column '", font_bold(col_specimen),
          "' reflect", plural[2], " ", plural[3], "urinary tract infection", plural[1],
          ".\n  Use `as.sir(uti = FALSE)` to prevent this."
        )
      }
    } else {
      # no data about UTI's found
      uti <- NULL
    }
  }

  i <- 0
  if (tryCatch(length(list(...)) > 0, error = function(e) TRUE)) {
    sel <- colnames(pm_select(x, ...))
  } else {
    sel <- colnames(x)
  }
  if (!is.null(col_mo)) {
    sel <- sel[sel != col_mo]
  }

  ab_cols <- colnames(x)[vapply(FUN.VALUE = logical(1), x, function(y) {
    i <<- i + 1
    check <- is.mic(y) | is.disk(y)
    ab <- colnames(x)[i]
    if (!is.null(col_mo) && ab == col_mo) {
      return(FALSE)
    }
    if (!is.null(col_uti) && ab == col_uti) {
      return(FALSE)
    }
    if (length(sel) == 0 || (length(sel) > 0 && ab %in% sel)) {
      ab_coerced <- suppressWarnings(as.ab(ab))
      if (is.na(ab_coerced) || (length(sel) > 0 & !ab %in% sel)) {
        # not even a valid AB code
        return(FALSE)
      } else {
        return(TRUE)
      }
    } else {
      return(FALSE)
    }
  })]

  stop_if(
    length(ab_cols) == 0,
    "no columns with MIC values, disk zones or antibiotic column names found in this data set. Use as.mic() or as.disk() to transform antimicrobial columns."
  )
  # set type per column
  types <- character(length(ab_cols))
  types[vapply(FUN.VALUE = logical(1), x.bak[, ab_cols, drop = FALSE], is.disk)] <- "disk"
  types[vapply(FUN.VALUE = logical(1), x.bak[, ab_cols, drop = FALSE], is.mic)] <- "mic"
  types[types == "" & vapply(FUN.VALUE = logical(1), x[, ab_cols, drop = FALSE], all_valid_disks)] <- "disk"
  types[types == "" & vapply(FUN.VALUE = logical(1), x[, ab_cols, drop = FALSE], all_valid_mics)] <- "mic"
  types[types == "" & !vapply(FUN.VALUE = logical(1), x.bak[, ab_cols, drop = FALSE], is.sir)] <- "sir"
  if (any(types %in% c("mic", "disk"), na.rm = TRUE)) {
    # now we need an mo column
    stop_if(is.null(col_mo), "`col_mo` must be set")
    # if not null, we already found it, now find again so a message will show
    if (is.null(col_mo.bak)) {
      col_mo <- search_type_in_df(x = x, type = "mo")
    }
    x_mo <- as.mo(x[, col_mo, drop = TRUE])
  }

  for (i in seq_along(ab_cols)) {
    if (types[i] == "mic") {
      x[, ab_cols[i]] <- x %pm>%
        pm_pull(ab_cols[i]) %pm>%
        as.character() %pm>%
        as.mic() %pm>%
        as.sir(
          mo = x_mo,
          mo.bak = x[, col_mo, drop = TRUE],
          ab = ab_cols[i],
          guideline = guideline,
          uti = uti,
          capped_mic_handling = capped_mic_handling,
          add_intrinsic_resistance = add_intrinsic_resistance,
          reference_data = reference_data,
          substitute_missing_r_breakpoint = substitute_missing_r_breakpoint,
          include_screening = include_screening,
          include_PKPD = include_PKPD,
          breakpoint_type = breakpoint_type,
          host = host,
          verbose = verbose,
          info = info,
          conserve_capped_values = conserve_capped_values,
          is_data.frame = TRUE
        )
    } else if (types[i] == "disk") {
      x[, ab_cols[i]] <- x %pm>%
        pm_pull(ab_cols[i]) %pm>%
        as.character() %pm>%
        as.disk() %pm>%
        as.sir(
          mo = x_mo,
          mo.bak = x[, col_mo, drop = TRUE],
          ab = ab_cols[i],
          guideline = guideline,
          uti = uti,
          add_intrinsic_resistance = add_intrinsic_resistance,
          reference_data = reference_data,
          substitute_missing_r_breakpoint = substitute_missing_r_breakpoint,
          include_screening = include_screening,
          include_PKPD = include_PKPD,
          breakpoint_type = breakpoint_type,
          host = host,
          verbose = verbose,
          info = info,
          is_data.frame = TRUE
        )
    } else if (types[i] == "sir") {
      show_message <- FALSE
      ab <- ab_cols[i]
      ab_coerced <- suppressWarnings(as.ab(ab))
      if (!all(x[, ab_cols[i], drop = TRUE] %in% c("S", "SDD", "I", "R", "NI", NA), na.rm = TRUE)) {
        show_message <- TRUE
        # only print message if values are not already clean
        if (isTRUE(info)) {
          message_("Cleaning values in column '", font_bold(ab), "' (",
            ifelse(ab_coerced != toupper(ab), paste0(ab_coerced, ", "), ""),
            ab_name(ab_coerced, tolower = TRUE), ")... ",
            appendLF = FALSE,
            as_note = FALSE
          )
        }
      } else if (!is.sir(x.bak[, ab_cols[i], drop = TRUE])) {
        show_message <- TRUE
        # only print message if class not already set
        if (isTRUE(info)) {
          message_("Assigning class 'sir' to already clean column '", font_bold(ab), "' (",
            ifelse(ab_coerced != toupper(ab), paste0(ab_coerced, ", "), ""),
            ab_name(ab_coerced, tolower = TRUE, language = NULL), ")... ",
            appendLF = FALSE,
            as_note = FALSE
          )
        }
      }
      x[, ab_cols[i]] <- as.sir.default(x = as.character(x[, ab_cols[i], drop = TRUE]))
      if (show_message == TRUE && isTRUE(info)) {
        message(font_green_bg(" OK "))
      }
    }
  }

  x
}

get_guideline <- function(guideline, reference_data) {
  if (!identical(reference_data, AMR::clinical_breakpoints)) {
    return(guideline)
  }
  guideline_param <- trimws2(toupper(guideline))
  latest_clsi <- rev(sort(subset(reference_data, guideline %like% "CLSI")$guideline))[1L]
  latest_eucast <- rev(sort(subset(reference_data, guideline %like% "EUCAST")$guideline))[1L]
  guideline_param[guideline_param == "CLSI"] <- latest_clsi
  guideline_param[guideline_param == "EUCAST"] <- latest_eucast
  # like 'EUCAST2020', should be 'EUCAST 2020'
  guideline_param[guideline_param %unlike% " "] <- gsub("([a-z]+)([0-9]+)", "\\1 \\2", guideline_param[guideline_param %unlike% " "], ignore.case = TRUE)

  stop_ifnot(guideline_param %in% reference_data$guideline,
    "invalid guideline: '", guideline,
    "'.\nValid guidelines are: ", vector_and(reference_data$guideline, quotes = TRUE, reverse = TRUE),
    call = FALSE
  )

  guideline_param
}

convert_host <- function(x, lang = get_AMR_locale()) {
  x <- gsub("[^a-zA-Z ]", "", trimws2(tolower(as.character(x))), perl = TRUE)
  x_out <- rep(NA_character_, length(x))
  x_out[trimws2(tolower(x)) == "human"] <- "human"
  x_out[trimws2(tolower(x)) == "ecoff"] <- "ecoff"
  # this order is based on: clinical_breakpoints |> filter(type == "animal") |> count(host, sort = TRUE)
  x_out[is.na(x_out) & (x %like% "dog|canine|Canis lupus" | x %like% translate_AMR("dog|dogs|canine", lang))] <- "dogs"
  x_out[is.na(x_out) & (x %like% "cattle|bovine|Bos taurus" | x %like% translate_AMR("cattle|bovine", lang))] <- "cattle"
  x_out[is.na(x_out) & (x %like% "swine|suida(e)?|Sus scrofa" | x %like% translate_AMR("swine|swines", lang))] <- "swine"
  x_out[is.na(x_out) & (x %like% "cat|feline|Felis catus" | x %like% translate_AMR("cat|cats|feline", lang))] <- "cats"
  x_out[is.na(x_out) & (x %like% "horse|equine|Equus ferus" | x %like% translate_AMR("horse|horses|equine", lang))] <- "horse"
  x_out[is.na(x_out) & (x %like% "aqua|fish|Pisces" | x %like% translate_AMR("aquatic|fish", lang))] <- "aquatic"
  x_out[is.na(x_out) & (x %like% "bird|chicken|poultry|avia|Gallus gallus" | x %like% translate_AMR("bird|birds|poultry", lang))] <- "poultry"

  # additional animals, not necessarily currently in breakpoint guidelines:
  x_out[is.na(x_out) & (x %like% "camel|camelid|Camelus dromedarius" | x %like% translate_AMR("camel|camels|camelid", lang))] <- "camels"
  x_out[is.na(x_out) & (x %like% "deer|cervine|Cervidae" | x %like% translate_AMR("deer|deers|cervine", lang))] <- "deer"
  x_out[is.na(x_out) & (x %like% "donkey|asinine|Equus africanus" | x %like% translate_AMR("donkey|donkeys|asinine", lang))] <- "donkeys"
  x_out[is.na(x_out) & (x %like% "ferret|musteline|Mustela putorius" | x %like% translate_AMR("ferret|ferrets|musteline", lang))] <- "ferrets"
  x_out[is.na(x_out) & (x %like% "goat|caprine|Capra aegagrus" | x %like% translate_AMR("goat|goats|caprine", lang))] <- "goats"
  x_out[is.na(x_out) & (x %like% "guinea pig|caviine|Cavia porcellus" | x %like% translate_AMR("guinea pig|guinea pigs|caviine", lang))] <- "guinea pigs"
  x_out[is.na(x_out) & (x %like% "hamster|cricetine|Cricetinae" | x %like% translate_AMR("hamster|hamsters|cricetine", lang))] <- "hamsters"
  x_out[is.na(x_out) & (x %like% "monkey|simian|Simia" | x %like% translate_AMR("monkey|monkeys|simian", lang))] <- "monkeys"
  x_out[is.na(x_out) & (x %like% "mouse|murine|Mus musculus" | x %like% translate_AMR("mouse|mice|murine", lang))] <- "mice"
  x_out[is.na(x_out) & (x %like% "pig|porcine|Sus scrofa" | x %like% translate_AMR("pig|pigs|porcine", lang))] <- "pigs"
  x_out[is.na(x_out) & (x %like% "rabbit|leporine|Oryctolagus cuniculus" | x %like% translate_AMR("rabbit|rabbits|leporine", lang))] <- "rabbits"
  x_out[is.na(x_out) & (x %like% "rat|ratine|Rattus" | x %like% translate_AMR("rat|rats|ratine", lang))] <- "rats"
  x_out[is.na(x_out) & (x %like% "sheep|ovine|Ovis aries" | x %like% translate_AMR("sheep|sheeps|ovine", lang))] <- "sheep"
  x_out[is.na(x_out) & (x %like% "snake|serpentine|Serpentes" | x %like% translate_AMR("snake|snakes|serpentine", lang))] <- "snakes"
  x_out[is.na(x_out) & (x %like% "turkey|meleagrine|Meleagris gallopavo" | x %like% translate_AMR("turkey|turkeys|meleagrine", lang))] <- "turkey"


  x_out[x_out == "ecoff"] <- "ECOFF"
  x_out
}

as_sir_method <- function(method_short,
                          method_long,
                          x,
                          mo,
                          ab,
                          guideline,
                          uti,
                          capped_mic_handling,
                          add_intrinsic_resistance,
                          reference_data,
                          substitute_missing_r_breakpoint,
                          include_screening,
                          include_PKPD,
                          breakpoint_type,
                          host,
                          verbose,
                          info,
                          conserve_capped_values = NULL,
                          ...) {
  if (isTRUE(conserve_capped_values)) {
    deprecation_warning(old = "conserve_capped_values", new = "capped_mic_handling", fn = "as.sir", is_argument = TRUE)
    capped_mic_handling <- "conservative"
  }
  meet_criteria(x, allow_NA = TRUE, .call_depth = -2)
  meet_criteria(mo, allow_class = c("mo", "character"), has_length = c(1, length(x)), allow_NULL = TRUE, .call_depth = -2)
  meet_criteria(ab, allow_class = c("ab", "character"), has_length = c(1, length(x)), .call_depth = -2)
  meet_criteria(guideline, allow_class = "character", has_length = c(1, length(x)), .call_depth = -2)
  meet_criteria(uti, allow_class = c("logical", "character"), has_length = c(1, length(x)), allow_NULL = TRUE, allow_NA = TRUE, .call_depth = -2)
  meet_criteria(capped_mic_handling, allow_class = "character", has_length = 1, is_in = c("standard", "conservative", "none", "inverse"), .call_depth = -2)
  meet_criteria(add_intrinsic_resistance, allow_class = "logical", has_length = 1, .call_depth = -2)
  meet_criteria(reference_data, allow_class = "data.frame", .call_depth = -2)
  meet_criteria(substitute_missing_r_breakpoint, allow_class = "logical", has_length = 1, .call_depth = -2)
  meet_criteria(include_screening, allow_class = "logical", has_length = 1, .call_depth = -2)
  meet_criteria(include_PKPD, allow_class = "logical", has_length = 1, .call_depth = -2)
  check_reference_data(reference_data, .call_depth = -2)
  meet_criteria(breakpoint_type, allow_class = "character", is_in = reference_data$type, has_length = 1, .call_depth = -2)
  meet_criteria(host, allow_class = c("character", "factor"), allow_NULL = TRUE, allow_NA = TRUE, .call_depth = -2)
  meet_criteria(verbose, allow_class = "logical", has_length = 1, .call_depth = -2)
  meet_criteria(info, allow_class = "logical", has_length = 1, .call_depth = -2)

  # backward compatibilty
  dots <- list(...)
  dots <- dots[which(!names(dots) %in% c("warn", "mo.bak", "is_data.frame"))]
  if (length(dots) != 0) {
    warning_("These arguments in `as.sir()` are no longer used: ", vector_and(names(dots), quotes = "`"), ".", call = FALSE)
  }

  current_sir_interpretation_history <- NROW(AMR_env$sir_interpretation_history)

  if (isTRUE(info) && message_not_thrown_before("as.sir", "sir_interpretation_history")) {
    message()
    message_("Run `sir_interpretation_history()` afterwards to retrieve a logbook with all the details of the breakpoint interpretations.\n\n", add_fn = font_green)
  }

  current_df <- tryCatch(get_current_data(NA, 0), error = function(e) NULL)

  # get guideline
  if (!is.null(current_df) && length(guideline) == 1 && guideline %in% colnames(current_df) && any(current_df[[guideline]] %like% "CLSI|EUCAST", na.rm = TRUE)) {
    guideline <- current_df[[guideline]]
  }
  guideline_coerced <- get_guideline(guideline, reference_data)

  # get host
  if (breakpoint_type == "animal") {
    if (is.null(host)) {
      host <- "dogs"
      if (isTRUE(info) && message_not_thrown_before("as.sir", "host_missing")) {
        message_("Animal hosts not set in `host`, assuming `host = \"dogs\"`, since these have the highest breakpoint availability.\n\n")
      }
    }
  } else {
    if (!is.null(host) && !all(toupper(as.character(host)) %in% c("HUMAN", "ECOFF"))) {
      if (isTRUE(info) && message_not_thrown_before("as.sir", "assumed_breakpoint_animal")) {
        message_("Assuming `breakpoint_type = \"animal\"`, since `host` is set.", ifelse(guideline_coerced %like% "EUCAST", " Do you also need to set `guideline = \"CLSI\"`?", ""), "\n\n")
      }
      breakpoint_type <- "animal"
    } else {
      host <- breakpoint_type
    }
  }

  if (!is.null(host) && !all(toupper(as.character(host)) %in% c("HUMAN", "ECOFF"))) {
    if (!is.null(current_df) && length(host) == 1 && host %in% colnames(current_df) && any(current_df[[host]] %like% "[A-Z]", na.rm = TRUE)) {
      host <- current_df[[host]]
    } else if (length(host) != length(x)) {
      # for dplyr's across()
      cur_column_dplyr <- import_fn("cur_column", "dplyr", error_on_fail = FALSE)
      if (!is.null(cur_column_dplyr) && is.data.frame(current_df)) {
        # try to get current column, which will only be available when in across()
        host <- tryCatch(cur_column_dplyr(),
          error = function(e) host
        )
      }
    }
  }
  host.bak <- host
  host <- convert_host(host)
  if (any(is.na(host) & !is.na(host.bak)) && isTRUE(info) && message_not_thrown_before("as.sir", "missing_hosts")) {
    warning_("The following animal host(s) could not be coerced: ", vector_and(host.bak[is.na(host) & !is.na(host.bak)]), immediate = TRUE)
    message() # new line
  }
  # TODO add a switch to turn this off? In interactive sessions perhaps ask the user. Default should be On.
  # if (breakpoint_type == "animal" && isTRUE(info) && message_not_thrown_before("as.sir", "host_missing_breakpoints")) {
  #   if (guideline_coerced %like% "CLSI") {
  #     message_("Please note that in the absence of specific veterinary breakpoints for certain animal hosts, the CLSI guideline VET09 will be applied where possible.\n\n")
  #   }
  # }

  # get ab
  if (!is.null(current_df) && length(ab) == 1 && ab %in% colnames(current_df) && any(current_df[[ab]] %like% "[A-Z]", na.rm = TRUE)) {
    ab <- current_df[[ab]]
  } else if (length(ab) != length(x)) {
    # for dplyr's across()
    cur_column_dplyr <- import_fn("cur_column", "dplyr", error_on_fail = FALSE)
    if (!is.null(cur_column_dplyr) && is.data.frame(current_df)) {
      # try to get current column, which will only be available when in across()
      ab <- tryCatch(cur_column_dplyr(),
        error = function(e) ab
      )
    }
  }

  # get mo
  if (!is.null(current_df) && length(mo) == 1 && mo %in% colnames(current_df)) {
    mo_var_found <- paste0(" based on column '", font_bold(mo), "'")
    mo <- current_df[[mo]]
  } else if (length(mo) != length(x)) {
    mo_var_found <- ""
    if (is.null(mo)) {
      tryCatch(
        {
          df <- get_current_data(arg_name = "mo", call = -3) # will return an error if not found
          mo <- NULL
          try(
            {
              mo <- suppressMessages(search_type_in_df(df, "mo", add_col_prefix = FALSE))
            },
            silent = TRUE
          )
          if (!is.null(df) && !is.null(mo) && is.data.frame(df)) {
            mo_var_found <- paste0(" based on column '", font_bold(mo), "'")
            mo <- df[, mo, drop = TRUE]
          }
        },
        error = function(e) {
          mo <- NULL
        }
      )
    }
  } else {
    mo_var_found <- ""
  }
  if (is.null(mo)) {
    stop_("No information was supplied about the microorganisms (missing argument `mo` and no column of class 'mo' found). See ?as.sir.\n\n",
      "To transform certain columns with e.g. mutate(), use `data %>% mutate(across(..., as.sir, mo = x))`, where x is your column with microorganisms.\n",
      "To transform all ", method_long, " in a data set, use `data %>% as.sir()` or `data %>% mutate_if(is.", method_short, ", as.sir)`.",
      call = FALSE
    )
  }

  # get uti
  if (!is.null(current_df) && length(uti) == 1 && uti %in% colnames(current_df)) {
    uti <- current_df[[uti]]
  } else if (length(uti) != length(x)) {
    if (is.null(uti)) {
      tryCatch(
        {
          df <- get_current_data(arg_name = "uti", call = -3) # will return an error if not found
          uti <- NULL
          try(
            {
              uti <- suppressMessages(search_type_in_df(df, "uti", add_col_prefix = FALSE))
            },
            silent = TRUE
          )
          if (!is.null(df) && !is.null(uti) && is.data.frame(df)) {
            uti <- df[, uti, drop = TRUE]
          }
        },
        error = function(e) {
          uti <- NULL
        }
      )
    }
  }
  # TODO set uti to specimen column here


  if (length(ab) == 1 && ab %like% paste0("as.", method_short)) {
    stop_("No unambiguous name was supplied about the antibiotic (argument `ab`). See ?as.sir.", call = FALSE)
  }

  ab.bak <- trimws2(ab)
  ab <- suppressWarnings(as.ab(ab))
  if (!is.null(list(...)$mo.bak)) {
    mo.bak <- list(...)$mo.bak
  } else {
    mo.bak <- mo
  }
  mo.bak <- trimws2(mo.bak)
  # be sure to take current taxonomy, as the 'clinical_breakpoints' data set only contains current taxonomy
  mo <- suppressWarnings(suppressMessages(as.mo(mo, keep_synonyms = FALSE, info = FALSE)))
  if (all(is.na(ab))) {
    if (isTRUE(info)) {
      message_("Returning NAs for unknown antibiotic: ", vector_and(ab.bak, sort = FALSE, quotes = TRUE),
        ". Rename this column to a valid name or code, and check the output with `as.ab()`.",
        add_fn = font_red,
        as_note = FALSE
      )
    }
    return(as.sir(rep(NA, length(x))))
  }
  if (length(mo) == 1) {
    mo <- rep(mo, length(x))
  }
  if (length(ab) == 1) {
    ab <- rep(ab, length(x))
    ab.bak <- rep(ab.bak, length(ab))
  }
  if (length(host) == 1) {
    host <- rep(host, length(x))
  }
  if (is.null(uti)) {
    uti <- NA
  }
  if (length(uti) == 1) {
    uti <- rep(uti, length(x))
  }
  if (isTRUE(add_intrinsic_resistance) && guideline_coerced %unlike% "EUCAST") {
    if (isTRUE(info) && message_not_thrown_before("as.sir", "intrinsic")) {
      message_("in `as.sir()`: using 'add_intrinsic_resistance' is only useful when using EUCAST guidelines, since the rules for intrinsic resistance are based on EUCAST.",
        add_fn = font_red
      )
    }
  }

  # format agents ----
  agent_formatted <- paste0("'", font_bold(ab.bak, collapse = NULL), "'")
  agent_name <- ab_name(ab, tolower = TRUE, language = NULL)
  same_ab <- generalise_antibiotic_name(ab) == generalise_antibiotic_name(agent_name)
  same_ab.bak <- generalise_antibiotic_name(ab.bak) == generalise_antibiotic_name(agent_name)
  agent_formatted[same_ab.bak] <- paste0(agent_formatted[same_ab.bak], " (", ab[same_ab.bak], ")")
  agent_formatted[!same_ab.bak & !same_ab] <- paste0(
    agent_formatted[!same_ab.bak & !same_ab],
    " (", ifelse(ab.bak[!same_ab.bak & !same_ab] == ab[!same_ab.bak & !same_ab],
      "",
      paste0(ab[!same_ab.bak & !same_ab], ", ")
    ),
    agent_name[!same_ab.bak & !same_ab],
    ")"
  )
  # this intro text will also be printed in the progress bar if the `progress` package is installed
  intro_txt <- paste0(
    "Interpreting ", method_long, ": ", ifelse(isTRUE(list(...)$is_data.frame), "column ", ""),
    ifelse(length(unique(agent_formatted)) == 1, unique(agent_formatted), paste0(vector_and(agent_formatted, quotes = FALSE, sort = FALSE))),
    mo_var_found,
    ifelse(identical(reference_data, AMR::clinical_breakpoints),
      paste0(", ", vector_and(font_bold(guideline_coerced, collapse = NULL), quotes = FALSE)),
      ""
    ),
    "... "
  )

  # prepare used arguments ----
  method <- method_short

  metadata_mo <- get_mo_uncertainties()

  rise_warning <- FALSE
  rise_notes <- FALSE
  method_coerced <- toupper(method)
  ab_coerced <- as.ab(ab)

  if (identical(reference_data, AMR::clinical_breakpoints)) {
    breakpoints <- reference_data %pm>%
      subset(guideline %in% guideline_coerced & method == method_coerced & ab %in% ab_coerced)
    if (any(ab_coerced == "AMX") && nrow(breakpoints[breakpoints$ab == "AMX", , drop = FALSE]) == 0) {
      ab_coerced[ab_coerced == "AMX"] <- "AMP"
      breakpoints <- reference_data %pm>%
        subset(guideline %in% guideline_coerced & method == method_coerced & ab %in% ab_coerced)
    }
  } else {
    breakpoints <- reference_data %pm>%
      subset(method == method_coerced & ab %in% ab_coerced)
  }


  # create the unique data frame to be filled to save time
  df <- data.frame(
    values = x,
    values_bak = x,
    guideline = guideline_coerced,
    mo = mo,
    ab = ab,
    result = NA_sir_,
    uti = uti,
    host = host,
    stringsAsFactors = FALSE
  )
  if (method == "mic") {
    if (any(guideline_coerced %like% "CLSI")) {
      # CLSI says: if MIC is not a log2 value it must be rounded up to the nearest log2 value
      log2_levels <- 2^c(-9:12)
      df$values[which(df$guideline %like% "CLSI")] <- vapply(
        FUN.VALUE = character(1),
        df$values[which(df$guideline %like% "CLSI")],
        function(mic_val) {
          if (is.na(mic_val)) {
            return(NA_character_)
          } else {
            # find the smallest log2 level that is >= mic_val
            log2_val <- log2_levels[which(log2_levels >= as.double(mic_val))][1]
            if (!is.na(log2_val) && as.double(mic_val) != log2_val) {
              if (message_not_thrown_before("as.sir", "CLSI", "MICupscaling")) {
                warning_("Some MICs were converted to the nearest higher log2 level, following the CLSI interpretation guideline.")
              }
              return(as.character(log2_val)) # will be MIC later
            } else {
              return(as.character(mic_val))
            }
          }
        }
      )
    }
    df$values <- as.mic(df$values)
  } else if (method == "disk") {
    # when as.sir.disk is called directly
    df$values <- as.disk(df$values)
  }

  df_unique <- unique(df[, c("guideline", "mo", "ab", "uti", "host"), drop = FALSE])
  mo_grams <- suppressWarnings(suppressMessages(mo_gramstain(df_unique$mo, language = NULL, keep_synonyms = FALSE)))

  # get all breakpoints, use humans as backup for animals
  breakpoint_type_lookup <- breakpoint_type
  if (breakpoint_type == "animal") {
    breakpoint_type_lookup <- c(breakpoint_type, "human")
  }
  breakpoints <- breakpoints %pm>%
    subset(type %in% breakpoint_type_lookup)

  if (isFALSE(include_screening)) {
    # remove screening rules from the breakpoints table
    breakpoints <- breakpoints %pm>%
      subset(site %unlike% "screen" & ref_tbl %unlike% "screen")
  }
  if (isFALSE(include_PKPD)) {
    # remove PKPD rules from the breakpoints table
    breakpoints <- breakpoints %pm>%
      subset(mo != "UNKNOWN" & ref_tbl %unlike% "PK.*PD")
  }

  notes <- character(0)

  if (any(guideline_coerced %like% "EUCAST")) {
    any_is_intrinsic_resistant <- FALSE
    add_intrinsic_resistance_to_AMR_env()
  }

  if (isTRUE(info) && nrow(df_unique) < 10 || nrow(breakpoints) == 0) {
    # only print intro under 10 items, otherwise progressbar will print this and then it will be printed double
    message_(intro_txt, appendLF = FALSE, as_note = FALSE)
  }
  p <- progress_ticker(n = nrow(df_unique), n_min = 10, title = font_blue(intro_txt), only_bar_percent = TRUE)
  has_progress_bar <- !is.null(import_fn("progress_bar", "progress", error_on_fail = FALSE)) && nrow(df_unique) >= 10
  on.exit(close(p))

  if (nrow(breakpoints) == 0) {
    # apparently no breakpoints found
    if (isTRUE(info)) {
      message(
        paste0(font_rose_bg(" WARNING "), "\n"),
        font_black(paste0(
          "  ", AMR_env$bullet_icon, " No ", method_coerced, " breakpoints available for ",
          suppressMessages(suppressWarnings(ab_name(unique(ab_coerced), language = NULL, tolower = TRUE))),
          " (", unique(ab_coerced), ")."
        ), collapse = "\n")
      )
    }

    load_mo_uncertainties(metadata_mo)
    return(rep(NA_sir_, nrow(df)))
  }

  vectorise_log_entry <- function(x, len) {
    if (length(x) == 1 && len > 1) {
      rep(x, len)
    } else {
      x
    }
  }

  # run the rules (df_unique is a row combination per mo/ab/uti/host) ----
  for (i in seq_len(nrow(df_unique))) {
    p$tick()
    guideline_current <- df_unique[i, "guideline", drop = TRUE]
    mo_current <- df_unique[i, "mo", drop = TRUE]
    mo_gram_current <- mo_grams[i]
    ab_current <- df_unique[i, "ab", drop = TRUE]
    host_current <- df_unique[i, "host", drop = TRUE]
    uti_current <- df_unique[i, "uti", drop = TRUE]
    notes_current <- character(0)
    rows <- which(as.character(df$mo) == mo_current & df$ab == ab_current & df$host == host_current & df$guideline == guideline_current)
    if (!is.na(uti_current)) {
      # also filter on UTIs
      rows <- rows[df$uti[rows] == uti_current]
    }

    if (length(rows) == 0) {
      # this can happen if a host is unavailable, just continue with the next one, since a note about hosts having NA are already given at this point
      next
    }
    values <- df[rows, "values", drop = TRUE]
    values_bak <- df[rows, "values_bak", drop = TRUE]
    notes_current <- rep("", length(rows))
    new_sir <- rep(NA_sir_, length(rows))

    # find different mo properties, as fast as possible
    # TODO in case of VET09, we need to keep E. coli, also when users have Proteus in their data set
    # TODO look up which species, at least E. coli - also Staph or Strep?
    mo_current_genus <- AMR_env$MO_lookup$mo[match(AMR_env$MO_lookup$genus[match(mo_current, AMR_env$MO_lookup$mo)], AMR_env$MO_lookup$fullname)]
    mo_current_family <- AMR_env$MO_lookup$mo[match(AMR_env$MO_lookup$family[match(mo_current, AMR_env$MO_lookup$mo)], AMR_env$MO_lookup$fullname)]
    mo_current_order <- AMR_env$MO_lookup$mo[match(AMR_env$MO_lookup$order[match(mo_current, AMR_env$MO_lookup$mo)], AMR_env$MO_lookup$fullname)]
    mo_current_class <- AMR_env$MO_lookup$mo[match(AMR_env$MO_lookup$class[match(mo_current, AMR_env$MO_lookup$mo)], AMR_env$MO_lookup$fullname)]
    mo_current_rank <- AMR_env$MO_lookup$rank[match(mo_current, AMR_env$MO_lookup$mo)]
    mo_current_name <- AMR_env$MO_lookup$fullname[match(mo_current, AMR_env$MO_lookup$mo)]
    mo_current_oxygen_tolerance <- AMR_env$MO_lookup$oxygen_tolerance[match(mo_current, AMR_env$MO_lookup$mo)]
    if (mo_current %in% AMR::microorganisms.groups$mo) {
      # get the species group (might be more than 1 entry)
      mo_current_species_group <- AMR::microorganisms.groups$mo_group[which(AMR::microorganisms.groups$mo == mo_current)]
    } else {
      mo_current_species_group <- NULL
    }
    mo_current_gram <- structure(character(0), class = c("mo", "character"))
    if (identical(mo_gram_current, "Gram-negative")) {
      mo_current_gram <- c(mo_current_gram, "B_GRAMN")
      if (identical(mo_current_oxygen_tolerance, "anaerobe")) {
        mo_current_gram <- c(mo_current_gram, "B_ANAER", "B_ANAER-NEG")
      }
    } else if (identical(mo_gram_current, "Gram-positive")) {
      mo_current_gram <- c(mo_current_gram, "B_GRAMP")
      if (identical(mo_current_oxygen_tolerance, "anaerobe")) {
        mo_current_gram <- c(mo_current_gram, "B_ANAER", "B_ANAER-POS")
      }
    }
    mo_current_other <- structure("UNKNOWN", class = c("mo", "character"))
    # formatted for notes
    mo_formatted <- mo_current_name
    if (!mo_current_rank %in% c("kingdom", "phylum", "class", "order")) {
      mo_formatted <- font_italic(mo_formatted, collapse = NULL)
    }
    ab_formatted <- paste0(
      suppressMessages(suppressWarnings(ab_name(ab_current, language = NULL, tolower = TRUE))),
      " (", ab_current, ")"
    )

    # gather all available breakpoints for current MO
    # TODO for VET09 do not filter out E. coli and such
    breakpoints_current <- breakpoints %pm>%
      subset(ab == ab_current & guideline == guideline_current) %pm>%
      subset(mo %in% c(
        mo_current, mo_current_genus, mo_current_family,
        mo_current_order, mo_current_class,
        mo_current_species_group,
        mo_current_gram,
        mo_current_other
      ))


    # TODO are operators considered??
    # This seems to not work well: as.sir(as.mic(c(4, ">4", ">=4", 8, ">8", ">=8")), ab = "AMC", mo = "E. coli", breakpoint_type = "animal", host = "dogs", guideline = "CLSI 2024")

    if (breakpoint_type == "animal") {
      # 2025-03-13 for now, only strictly follow guideline for current host, no extrapolation
      breakpoints_current <- breakpoints_current[which(breakpoints_current$host == host_current), , drop = FALSE]
    }

    ## fall-back methods for veterinary guidelines ----
    ## TODO actually implement this well
    if (FALSE) {
      # if (breakpoint_type == "animal" && !host_current %in% breakpoints_current$host) {
      if (guideline_coerced %like% "CLSI") {
        # VET09 says that staph/strep/enterococcus BP can be extrapolated to all Gr+ cocci except for intrinsic resistance, so take all Gr+ cocci:
        gram_plus_cocci_vet09 <- microorganisms$mo[microorganisms$genus %in% c("Staphylococcus", "Streptococcus", "Peptostreptococcus", "Aerococcus", "Micrococcus") & microorganisms$rank == "genus"] # TODO should probably include genera that were either of these before

        # HUMAN SUBSTITUTES
        if (ab_current == "AZM" && mo_current_genus %in% gram_plus_cocci_vet09 && host_current %in% c("dogs", "cats", "horse")) {
          # azithro can take human breakpoints for these agents
          breakpoints_current <- breakpoints_current %pm>% subset(host == "human")
          notes_current <- c(notes_current, paste0("Using ", font_bold("human"), " breakpoints for ", ab_formatted, " in Gram-positive cocci based on CLSI VET09."))
        } else if (ab_current == "CTX" && mo_current_order == "B_[ORD]_ENTRBCTR" && host_current %in% c("dogs", "cats", "horse")) {
          # cefotax can take human breakpoints for these agents
          breakpoints_current <- breakpoints_current %pm>% subset(host == "human")
          notes_current <- c(notes_current, paste0("Using ", font_bold("human"), " breakpoints for ", ab_formatted, " in Enterobacterales based on CLSI VET09."))
        } else if (ab_current == "CAZ" && (mo_current_order == "B_[ORD]_ENTRBCTR" | mo_current == "B_PSDMN_AERG") && host_current %in% c("dogs", "cats", "horse")) {
          # cefta can take human breakpoints for these agents
          breakpoints_current <- breakpoints_current %pm>% subset(host == "human")
          notes_current <- c(notes_current, paste0("Using ", font_bold("human"), " breakpoints for ", ab_formatted, " in Enterobacterales and ", font_italic("P. aeruginosa"), " based on CLSI VET09."))
        } else if (ab_current == "ERY" && mo_current_genus %in% gram_plus_cocci_vet09 && host_current %in% c("dogs", "cats", "horse")) {
          # erythro can take human breakpoints for these agents
          breakpoints_current <- breakpoints_current %pm>% subset(host == "human")
          notes_current <- c(notes_current, paste0("Using ", font_bold("human"), " breakpoints for ", ab_formatted, " in Gram-positive cocci based on CLSI VET09."))
        } else if (ab_current == "IPM" && (mo_current_order == "B_[ORD]_ENTRBCTR" | mo_current == "B_PSDMN_AERG") && host_current %in% c("dogs", "cats", "horse")) {
          # imipenem can take human breakpoints for these agents
          breakpoints_current <- breakpoints_current %pm>% subset(host == "human")
          notes_current <- c(notes_current, paste0("Using ", font_bold("human"), " breakpoints for ", ab_formatted, " in Enterobacterales and ", font_italic("P. aeruginosa"), " based on CLSI VET09."))
        } else if (ab_current == "LNZ" && mo_current_genus %in% gram_plus_cocci_vet09 && host_current %in% c("dogs", "cats")) {
          # linezolid can take human breakpoints for these agents
          breakpoints_current <- breakpoints_current %pm>% subset(host == "human")
          notes_current <- c(notes_current, paste0("Using ", font_bold("human"), " breakpoints for ", ab_formatted, " in staphylococci/enterococci based on CLSI VET09."))
        } else if (ab_current == "NIT" && host_current %in% c("dogs", "cats")) {
          # nitro can take human breakpoints for these agents
          breakpoints_current <- breakpoints_current %pm>% subset(host == "human")
          notes_current <- c(notes_current, paste0("Using ", font_bold("human"), " breakpoints for ", ab_formatted, " based on CLSI VET09."))
        } else if (ab_current == "PEN" && mo_current_genus %in% gram_plus_cocci_vet09 && host_current %in% c("dogs", "cats")) {
          # penicillin can take human breakpoints for these agents
          breakpoints_current <- breakpoints_current %pm>% subset(host == "human")
          notes_current <- c(notes_current, paste0("Using ", font_bold("human"), " breakpoints for ", ab_formatted, " in Gram-positive cocci based on CLSI VET09."))
        } else if (ab_current == "RIF" && mo_current_genus %in% gram_plus_cocci_vet09 && host_current %in% c("dogs", "cats")) {
          # rifampicin can take human breakpoints for staphylococci
          breakpoints_current <- breakpoints_current %pm>% subset(host == "human")
          notes_current <- c(notes_current, paste0("Using ", font_bold("human"), " breakpoints for ", ab_formatted, " in staphylococci based on CLSI VET09."))
        } else if (ab_current == "SXT" && host_current %in% c("dogs", "cats", "horse")) {
          # trimethoprim-sulfamethoxazole (TMS) can take human breakpoints for these agents
          breakpoints_current <- breakpoints_current %pm>% subset(host == "human")
          notes_current <- c(notes_current, paste0("Using ", font_bold("human"), " breakpoints for ", ab_formatted, " based on CLSI VET09."))
        } else if (ab_current == "VAN" && host_current %in% c("dogs", "cats", "horse")) {
          # vancomycin can take human breakpoints in these hosts
          breakpoints_current <- breakpoints_current %pm>% subset(host == "human")
          notes_current <- c(notes_current, paste0("Using ", font_bold("human"), " breakpoints for ", ab_formatted, " based on CLSI VET09."))
        } else if (host_current %in% c("dogs", "cats") && (mo_current_genus %in% c("B_AMYCS", "B_NOCRD", "B_CMPYL", "B_CRYNB", "B_ENTRC", "B_MYCBC", "B_PSDMN", "B_AERMN") | mo_current_class == "B_[CLS]_BTPRTBCT" | mo_current == "B_LISTR_MNCY")) {
          # dog breakpoints if no canine/feline
          # TODO do we still have dogs breakpoints at this point???
          breakpoints_current <- breakpoints_current %pm>% subset(host == "human") # WRONG
          notes_current <- c(notes_current, paste0("Using ", font_bold("human"), " breakpoints for ", mo_formatted, " based on CLSI VET09."))
        } else {
          # no specific CLSI solution for this, so only filter on current host (if no breakpoints available -> too bad)
          breakpoints_current <- breakpoints_current %pm>%
            subset(host == host_current)
        }
      }
    }

    if (NROW(breakpoints_current) == 0) {
      out <- data.frame(
        # recycling 1 to 2 rows does not always seem to work, which is why vectorise_log_entry() was added
        datetime = vectorise_log_entry(Sys.time(), length(rows)),
        index = rows,
        method = vectorise_log_entry(method_coerced, length(rows)),
        ab_given = vectorise_log_entry(ab.bak[match(ab_current, df$ab)][1], length(rows)),
        mo_given = vectorise_log_entry(mo.bak[match(mo_current, df$mo)][1], length(rows)),
        host_given = vectorise_log_entry(host.bak[match(host_current, df$host)][1], length(rows)),
        input_given = vectorise_log_entry(as.character(values_bak), length(rows)),
        ab = vectorise_log_entry(ab_current, length(rows)),
        mo = vectorise_log_entry(mo_current, length(rows)),
        host = vectorise_log_entry(host_current, length(rows)),
        input = vectorise_log_entry(as.character(values), length(rows)),
        outcome = vectorise_log_entry(NA_sir_, length(rows)),
        notes = vectorise_log_entry("No breakpoint available", length(rows)),
        guideline = vectorise_log_entry(guideline_current, length(rows)),
        ref_table = vectorise_log_entry(NA_character_, length(rows)),
        uti = vectorise_log_entry(uti_current, length(rows)),
        breakpoint_S_R = vectorise_log_entry(NA_character_, length(rows)),
        stringsAsFactors = FALSE
      )
      out <- subset(out, !is.na(input_given))
      AMR_env$sir_interpretation_history <- rbind_AMR(AMR_env$sir_interpretation_history, out)
      notes <- c(notes, notes_current)
      next
    }

    # sort on host and taxonomic rank
    # (this will e.g. prefer 'species' breakpoints over 'order' breakpoints)
    if (is.na(uti_current)) {
      breakpoints_current <- breakpoints_current %pm>%
        #  `uti` is a column in the data set
        # this will put UTI = FALSE first, then UTI = NA, then UTI = TRUE
        pm_mutate(uti_index = ifelse(!is.na(uti) & uti == FALSE, 1,
          ifelse(is.na(uti), 2,
            3
          )
        )) %pm>%
        # be as specific as possible (i.e. prefer species over genus):
        pm_arrange(rank_index, uti_index)
    } else if (uti_current == TRUE) {
      breakpoints_current <- breakpoints_current %pm>%
        subset(uti == TRUE) %pm>%
        # be as specific as possible (i.e. prefer species over genus):
        pm_arrange(rank_index)
    }

    # throw messages for different body sites
    site <- breakpoints_current[1L, "site", drop = FALSE] # this is the one we'll take
    if (is.na(site)) {
      site <- paste0("an unspecified body site")
    } else {
      site <- paste0("body site '", site, "'")
    }
    if (nrow(breakpoints_current) == 1 && all(breakpoints_current$uti == TRUE) && is.na(uti_current) && message_not_thrown_before("as.sir", "uti", ab_current)) {
      # only UTI breakpoints available
      notes_current <- paste0(
        notes_current, "\n",
        paste0("Breakpoints for ", font_bold(ab_formatted), " in ", mo_formatted, " are only available for (uncomplicated) urinary tract infections (UTI); assuming `uti = TRUE`.")
      )
    } else if (nrow(breakpoints_current) > 1 && length(unique(breakpoints_current$site)) > 1 && any(is.na(uti_current)) && all(c(TRUE, FALSE) %in% breakpoints_current$uti, na.rm = TRUE) && message_not_thrown_before("as.sir", "siteUTI", mo_current, ab_current)) {
      # both UTI and Non-UTI breakpoints available
      breakpoints_current <- breakpoints_current %pm>%
        pm_filter(uti == FALSE)
      notes_current <- paste0(
        notes_current, "\n",
        paste0("Breakpoints for UTI ", font_bold("and"), " non-UTI available for ", ab_formatted, " in ", mo_formatted, " - assuming ", site, ". Use argument `uti` to set which isolates are from urine. See `?as.sir`.")
      )
    } else if (nrow(breakpoints_current) > 1 && length(unique(breakpoints_current$site)) > 1 && all(breakpoints_current$uti == FALSE, na.rm = TRUE) && message_not_thrown_before("as.sir", "siteOther", mo_current, ab_current)) {
      # breakpoints for multiple body sites available
      notes_current <- paste0(
        notes_current, "\n",
        paste0("Multiple breakpoints available for ", font_bold(ab_formatted), " in ", mo_formatted, " - assuming ", site, ".")
      )
    }

    # first check if mo is intrinsic resistant
    if (isTRUE(add_intrinsic_resistance) && guideline_current %like% "EUCAST" && paste(mo_current, ab_current) %in% AMR_env$intrinsic_resistant) {
      new_sir <- rep(as.sir("R"), length(rows))
      notes_current <- paste0(
        notes_current, "\n",
        paste0("Intrinsic resistance applied for ", ab_formatted, " in ", mo_formatted, "")
      )
    } else if (nrow(breakpoints_current) == 0) {
      # no rules available
      new_sir <- rep(NA_sir_, length(rows))
    } else {
      # then run the rules
      breakpoints_current <- breakpoints_current[1L, , drop = FALSE]

      notes_current <- paste0(
        notes_current, "\n",
        ifelse(breakpoints_current$mo == "UNKNOWN" | breakpoints_current$ref_tbl %like% "PK.*PD",
          "Some PK/PD breakpoints were applied - use `include_PKPD = FALSE` to prevent this",
          ""
        ),
        "\n",
        ifelse(breakpoints_current$site %like% "screen" | breakpoints_current$ref_tbl %like% "screen",
          "Some screening breakpoints were applied - use `include_screening = FALSE` to prevent this",
          ""
        ),
        "\n",
        ifelse(method == "mic" & capped_mic_handling %in% c("conservative", "inverse") & as.character(values_bak) %like% "^[<][0-9]",
          paste0("MIC values with the operator '<' are all considered 'S' since capped_mic_handling = \"", capped_mic_handling, "\""),
          ""
        ),
        "\n",
        ifelse(method == "mic" & capped_mic_handling %in% c("conservative", "inverse") & as.character(values_bak) %like% "^[>][0-9]",
          paste0("MIC values with the operator '>' are all considered 'R' since capped_mic_handling = \"", capped_mic_handling, "\""),
          ""
        ),
        "\n",
        ifelse(method == "mic" & capped_mic_handling %in% c("conservative", "standard") & as.character(values_bak) %like% "^[><]=[0-9]" & as.double(values) > breakpoints_current$breakpoint_S & as.double(values) < breakpoints_current$breakpoint_R,
          paste0("MIC values within the breakpoint guideline range with the operator '<=' or '>=' are considered 'NI' since capped_mic_handling = \"", capped_mic_handling, "\""),
          ""
        )
      )
      if (isTRUE(substitute_missing_r_breakpoint) && !is.na(breakpoints_current$breakpoint_S) && is.na(breakpoints_current$breakpoint_R)) {
        # breakpoints_current only has 1 row at this moment
        breakpoints_current$breakpoint_R <- breakpoints_current$breakpoint_S
        notes_current <- paste0(
          notes_current, "\n",
          ifelse(!is.na(breakpoints_current$breakpoint_S) & is.na(breakpoints_current$breakpoint_R),
            "NAs because of missing R breakpoints were substituted with R since substitute_missing_r_breakpoint = TRUE",
            ""
          )
        )
      }

      ## actual interpretation ----
      if (method == "mic") {
        new_sir <- case_when_AMR(
          is.na(values) ~ NA_sir_,
          capped_mic_handling %in% c("conservative", "inverse") & as.character(values_bak) %like% "^[<][0-9]" ~ as.sir("S"),
          capped_mic_handling %in% c("conservative", "inverse") & as.character(values_bak) %like% "^[>][0-9]" ~ as.sir("R"),
          capped_mic_handling %in% c("conservative", "standard") & as.character(values_bak) %like% "^[><]=[0-9]" & as.double(values) > breakpoints_current$breakpoint_S & as.double(values) < breakpoints_current$breakpoint_R ~ as.sir("NI"),
          values <= breakpoints_current$breakpoint_S ~ as.sir("S"),
          guideline_current %like% "EUCAST" & values > breakpoints_current$breakpoint_R ~ as.sir("R"),
          guideline_current %like% "CLSI" & values >= breakpoints_current$breakpoint_R ~ as.sir("R"),
          # return "I" or "SDD" when breakpoints are in the middle
          !is.na(breakpoints_current$breakpoint_S) & !is.na(breakpoints_current$breakpoint_R) & breakpoints_current$is_SDD == TRUE ~ as.sir("SDD"),
          !is.na(breakpoints_current$breakpoint_S) & !is.na(breakpoints_current$breakpoint_R) & breakpoints_current$is_SDD == FALSE ~ as.sir("I"),
          # and NA otherwise
          TRUE ~ NA_sir_
        )
      } else if (method == "disk") {
        new_sir <- case_when_AMR(
          is.na(values) ~ NA_sir_,
          as.double(values) >= as.double(breakpoints_current$breakpoint_S) ~ as.sir("S"),
          guideline_current %like% "EUCAST" & as.double(values) < as.double(breakpoints_current$breakpoint_R) ~ as.sir("R"),
          guideline_current %like% "CLSI" & as.double(values) <= as.double(breakpoints_current$breakpoint_R) ~ as.sir("R"),
          # return "I" or "SDD" when breakpoints are in the middle
          !is.na(breakpoints_current$breakpoint_S) & !is.na(breakpoints_current$breakpoint_R) & breakpoints_current$is_SDD == TRUE ~ as.sir("SDD"),
          !is.na(breakpoints_current$breakpoint_S) & !is.na(breakpoints_current$breakpoint_R) & breakpoints_current$is_SDD == FALSE ~ as.sir("I"),
          # and NA otherwise
          TRUE ~ NA_sir_
        )
      }

      # write to verbose output
      notes_current <- trimws2(notes_current)
      notes_current[notes_current == ""] <- NA_character_
      out <- data.frame(
        # recycling 1 to 2 rows does not always seem to work, which is why vectorise_log_entry() was added
        datetime = vectorise_log_entry(Sys.time(), length(rows)),
        index = rows,
        method = vectorise_log_entry(method_coerced, length(rows)),
        ab_given = vectorise_log_entry(ab.bak[match(ab_current, df$ab)][1], length(rows)),
        mo_given = vectorise_log_entry(mo.bak[match(mo_current, df$mo)][1], length(rows)),
        host_given = vectorise_log_entry(host.bak[match(host_current, df$host)][1], length(rows)),
        input_given = vectorise_log_entry(as.character(values_bak), length(rows)),
        ab = vectorise_log_entry(breakpoints_current[, "ab", drop = TRUE], length(rows)),
        mo = vectorise_log_entry(breakpoints_current[, "mo", drop = TRUE], length(rows)),
        host = vectorise_log_entry(breakpoints_current[, "host", drop = TRUE], length(rows)),
        input = vectorise_log_entry(as.character(values), length(rows)),
        outcome = vectorise_log_entry(as.sir(new_sir), length(rows)),
        notes = font_stripstyle(notes_current), # vectorise_log_entry(paste0(font_stripstyle(notes_current), collapse = "\n"), length(rows)),
        guideline = vectorise_log_entry(guideline_current, length(rows)),
        ref_table = vectorise_log_entry(breakpoints_current[, "ref_tbl", drop = TRUE], length(rows)),
        uti = vectorise_log_entry(breakpoints_current[, "uti", drop = TRUE], length(rows)),
        breakpoint_S_R = vectorise_log_entry(paste0(breakpoints_current[, "breakpoint_S", drop = TRUE], "-", breakpoints_current[, "breakpoint_R", drop = TRUE]), length(rows)),
        stringsAsFactors = FALSE
      )
      out <- subset(out, !is.na(input_given))
      AMR_env$sir_interpretation_history <- rbind_AMR(AMR_env$sir_interpretation_history, out)
    }

    notes <- c(notes, notes_current)
    df[rows, "result"] <- new_sir
  }

  close(p)
  # printing messages
  if (isTRUE(info)) {
    if (has_progress_bar == TRUE) {
      # the progress bar has overwritten the intro text, so:
      message_(intro_txt, appendLF = FALSE, as_note = FALSE)
    }
    notes <- notes[!trimws2(notes) %in% c("", NA_character_)]
    if (length(notes) > 0) {
      if (isTRUE(rise_warning)) {
        message(font_rose_bg(" WARNING "))
      } else {
        message(font_yellow_bg(" NOTE "))
      }
      notes <- unique(notes)
      if (isTRUE(verbose) || length(notes) == 1 || NROW(AMR_env$sir_interpretation_history) == 0) {
        for (i in seq_along(notes)) {
          message(word_wrap("  ", AMR_env$bullet_icon, " ", notes[i], add_fn = font_black))
        }
      } else {
        message(word_wrap("  ", AMR_env$bullet_icon, " There were multiple notes. Print or View `sir_interpretation_history()` to examine them, or use `as.sir(..., verbose = TRUE)` next time to directly print them here.", add_fn = font_black))
      }
    } else {
      message(font_green_bg(" OK "))
    }
  }

  load_mo_uncertainties(metadata_mo)

  # reorder AMR_env$sir_interpretation_history to get a clean ordering on index
  old_part <- AMR_env$sir_interpretation_history[seq_len(current_sir_interpretation_history), , drop = FALSE]
  new_part <- AMR_env$sir_interpretation_history[c((current_sir_interpretation_history + 1):NROW(AMR_env$sir_interpretation_history)), , drop = FALSE]
  new_part <- new_part[order(new_part$index), , drop = FALSE]
  AMR_env$sir_interpretation_history <- rbind_AMR(old_part, new_part)

  df$result
}

#' @rdname as.sir
#' @param clean A [logical] to indicate whether previously stored results should be forgotten after returning the 'logbook' with results.
#' @export
sir_interpretation_history <- function(clean = FALSE) {
  meet_criteria(clean, allow_class = "logical", has_length = 1)
  out <- AMR_env$sir_interpretation_history
  out$outcome <- as.sir(out$outcome)
  if (isTRUE(clean)) {
    AMR_env$sir_interpretation_history <- AMR_env$sir_interpretation_history[0, , drop = FALSE]
  }
  if (pkg_is_available("tibble")) {
    out <- import_fn("as_tibble", "tibble")(out)
  }
  as_original_data_class(out, class(out), extra_class = "sir_log")
}

#' @method print sir_log
#' @export
#' @noRd
print.sir_log <- function(x, ...) {
  if (NROW(x) == 0) {
    message_("No results to print. Run `as.sir()` on MIC values or disk diffusion zones first to print a 'logbook' data set here.")
    return(invisible(NULL))
  }
  class(x) <- class(x)[class(x) != "sir_log"]
  print(x, ...)
}

# will be exported using s3_register() in R/zzz.R
pillar_shaft.sir <- function(x, ...) {
  out <- trimws(format(x))
  if (has_colour()) {
    # colours will anyway not work when has_colour() == FALSE,
    # but then the indentation should also not be applied
    out[is.na(x)] <- font_grey("  NA")
    out[x == "NI"] <- font_grey_bg("  NI ")
    out[x == "S"] <- font_green_bg("  S  ")
    out[x == "I"] <- font_orange_bg("  I  ")
    out[x == "SDD"] <- font_orange_bg(" SDD ")
    out[x == "R"] <- font_rose_bg("  R  ")
  }
  create_pillar_column(out, align = "left", width = 5)
}

# will be exported using s3_register() in R/zzz.R
type_sum.sir <- function(x, ...) {
  "sir"
}

# will be exported using s3_register() in R/zzz.R
freq.sir <- function(x, ...) {
  x_name <- deparse(substitute(x))
  x_name <- gsub(".*[$]", "", x_name)
  if (x_name %in% c("x", ".")) {
    # try again going through system calls
    x_name <- stats::na.omit(vapply(
      FUN.VALUE = character(1),
      sys.calls(),
      function(call) {
        call_txt <- as.character(call)
        ifelse(call_txt[1] %like% "freq$", call_txt[length(call_txt)], character(0))
      }
    ))[1L]
  }
  ab <- suppressMessages(suppressWarnings(as.ab(x_name)))
  digits <- list(...)$digits
  if (is.null(digits)) {
    digits <- 2
  }
  if (!is.na(ab)) {
    cleaner::freq.default(
      x = x, ...,
      .add_header = list(
        Drug = paste0(ab_name(ab, language = NULL), " (", ab, ", ", paste(ab_atc(ab), collapse = "/"), ")"),
        `Drug group` = ab_group(ab, language = NULL),
        `%SI` = trimws(percentage(susceptibility(x, minimum = 0, as_percent = FALSE),
          digits = digits
        ))
      )
    )
  } else {
    cleaner::freq.default(
      x = x, ...,
      .add_header = list(
        `%SI` = trimws(percentage(susceptibility(x, minimum = 0, as_percent = FALSE),
          digits = digits
        ))
      )
    )
  }
}


# will be exported using s3_register() in R/zzz.R
get_skimmers.sir <- function(column) {
  # get the variable name 'skim_variable'
  name_call <- function(.data) {
    calls <- sys.calls()
    frms <- sys.frames()
    calls_txt <- vapply(calls, function(x) paste(deparse(x), collapse = ""), FUN.VALUE = character(1))
    if (any(calls_txt %like% "skim_variable", na.rm = TRUE)) {
      ind <- which(calls_txt %like% "skim_variable")[1L]
      vars <- tryCatch(eval(parse(text = ".data$skim_variable$sir"), envir = frms[[ind]]),
        error = function(e) NULL
      )
      tryCatch(ab_name(as.character(calls[[length(calls)]][[2]]), language = NULL),
        error = function(e) NA_character_
      )
    } else {
      NA_character_
    }
  }

  skimr::sfl(
    skim_type = "sir",
    ab_name = name_call,
    count_R = count_R,
    count_S = count_susceptible,
    count_I = count_I,
    prop_R = ~ proportion_R(., minimum = 0),
    prop_S = ~ susceptibility(., minimum = 0),
    prop_I = ~ proportion_I(., minimum = 0)
  )
}

#' @method print sir
#' @export
#' @noRd
print.sir <- function(x, ...) {
  x_name <- deparse(substitute(x))
  cat("Class 'sir'\n")
  # TODO for #170
  # if (!is.null(attributes(x)$guideline) && !all(is.na(attributes(x)$guideline))) {
  #   cat(font_blue(word_wrap("These values were interpreted using ",
  #                           font_bold(vector_and(attributes(x)$guideline, quotes = FALSE)),
  #                           " based on ",
  #                           vector_and(attributes(x)$method, quotes = FALSE),
  #                           " values. ",
  #                           "Use `sir_interpretation_history(", x_name, ")` to return a full logbook.")))
  #   cat("\n")
  # }
  print(as.character(x), quote = FALSE)
}


#' @method as.double sir
#' @export
as.double.sir <- function(x, ...) {
  dbls <- rep(NA_real_, length(x))
  dbls[x == "S"] <- 1
  dbls[x %in% c("SDD", "I")] <- 2
  dbls[x == "R"] <- 3
  dbls
}

#' @method droplevels sir
#' @export
#' @noRd
droplevels.sir <- function(x, exclude = if (any(is.na(levels(x)))) NULL else NA, ...) {
  x <- droplevels.factor(x, exclude = exclude, ...)
  class(x) <- c("sir", "ordered", "factor")
  x
}

#' @method summary sir
#' @export
#' @noRd
summary.sir <- function(object, ...) {
  x <- object
  n <- sum(!is.na(x))
  S <- sum(x == "S", na.rm = TRUE)
  SDD <- sum(x == "SDD", na.rm = TRUE)
  I <- sum(x == "I", na.rm = TRUE)
  R <- sum(x == "R", na.rm = TRUE)
  NI <- sum(x == "NI", na.rm = TRUE)
  pad <- function(x) {
    if (is.na(x)) {
      return("??")
    }
    if (x == "0%") {
      x <- " 0.0%"
    }
    if (nchar(x) < 5) {
      x <- paste0(rep(" ", 5 - nchar(x)), x)
    }
    x
  }
  value <- c(
    "Class" = "sir",
    "%S" = paste0(pad(percentage(S / n, digits = 1)), " (n=", S, ")"),
    "%SDD" = paste0(pad(percentage(SDD / n, digits = 1)), " (n=", SDD, ")"),
    "%I" = paste0(pad(percentage(I / n, digits = 1)), " (n=", I, ")"),
    "%R" = paste0(pad(percentage(R / n, digits = 1)), " (n=", R, ")"),
    "%NI" = paste0(pad(percentage(NI / n, digits = 1)), " (n=", NI, ")")
  )
  class(value) <- c("summaryDefault", "table")
  value
}

#' @method [<- sir
#' @export
#' @noRd
"[<-.sir" <- function(i, j, ..., value) {
  value <- as.sir(value)
  y <- NextMethod()
  attributes(y) <- attributes(i)
  y
}
#' @method [[<- sir
#' @export
#' @noRd
"[[<-.sir" <- function(i, j, ..., value) {
  value <- as.sir(value)
  y <- NextMethod()
  attributes(y) <- attributes(i)
  y
}
#' @method c sir
#' @export
#' @noRd
c.sir <- function(...) {
  lst <- list(...)

  # TODO for #170
  # guideline <- vapply(FUN.VALUE = character(1), lst, function(x) attributes(x)$guideline %or% NA_character_)
  # mo <- vapply(FUN.VALUE = character(1), lst, function(x) attributes(x)$mo %or% NA_character_)
  # ab <- vapply(FUN.VALUE = character(1), lst, function(x) attributes(x)$ab %or% NA_character_)
  # method <- vapply(FUN.VALUE = character(1), lst, function(x) attributes(x)$method %or% NA_character_)
  # ref_tbl <- vapply(FUN.VALUE = character(1), lst, function(x) attributes(x)$ref_tbl %or% NA_character_)
  # ref_breakpoints <- vapply(FUN.VALUE = character(1), lst, function(x) attributes(x)$ref_breakpoints %or% NA_character_)

  out <- as.sir(unlist(lapply(list(...), as.character)))

  # TODO for #170
  # if (!all(is.na(guideline))) {
  #   attributes(out)$guideline <- guideline
  #   attributes(out)$mo <- mo
  #   attributes(out)$ab <- ab
  #   attributes(out)$method <- method
  #   attributes(out)$ref_tbl <- ref_tbl
  #   attributes(out)$ref_breakpoints <- ref_breakpoints
  # }

  out
}

#' @method unique sir
#' @export
#' @noRd
unique.sir <- function(x, incomparables = FALSE, ...) {
  y <- NextMethod()
  attributes(y) <- attributes(x)
  y
}

#' @method rep sir
#' @export
#' @noRd
rep.sir <- function(x, ...) {
  y <- NextMethod()
  attributes(y) <- attributes(x)
  y
}

check_reference_data <- function(reference_data, .call_depth) {
  if (!identical(reference_data, AMR::clinical_breakpoints)) {
    class_sir <- vapply(FUN.VALUE = character(1), AMR::clinical_breakpoints, function(x) paste0("<", class(x), ">", collapse = " and "))
    class_ref <- vapply(FUN.VALUE = character(1), reference_data, function(x) paste0("<", class(x), ">", collapse = " and "))
    if (!all(names(class_sir) == names(class_ref))) {
      stop_("`reference_data` must have the same column names as the 'clinical_breakpoints' data set.", call = .call_depth)
    }
    if (!all(class_sir == class_ref)) {
      stop_("`reference_data` must be the same structure as the 'clinical_breakpoints' data set. Column '", names(class_ref[class_sir != class_ref][1]), "' is of class ", class_ref[class_sir != class_ref][1], ", but should be of class ", class_sir[class_sir != class_ref][1], ".", call = .call_depth)
    }
  }
}