1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-08 17:21:49 +02:00

5 Commits

29 changed files with 815 additions and 115 deletions

View File

@ -1,6 +1,6 @@
Package: AMR Package: AMR
Version: 3.0.0 Version: 3.0.0.9004
Date: 2025-06-01 Date: 2025-06-13
Title: Antimicrobial Resistance Data Analysis Title: Antimicrobial Resistance Data Analysis
Description: Functions to simplify and standardise antimicrobial resistance (AMR) Description: Functions to simplify and standardise antimicrobial resistance (AMR)
data analysis and to work with microbial and antimicrobial properties by data analysis and to work with microbial and antimicrobial properties by
@ -51,6 +51,8 @@ Suggests:
pillar, pillar,
progress, progress,
readxl, readxl,
recipes,
rlang,
rmarkdown, rmarkdown,
rstudioapi, rstudioapi,
rvest, rvest,

View File

@ -106,6 +106,8 @@ S3method(print,mo_uncertainties)
S3method(print,pca) S3method(print,pca)
S3method(print,sir) S3method(print,sir)
S3method(print,sir_log) S3method(print,sir_log)
S3method(print,step_mic_log2)
S3method(print,step_sir_numeric)
S3method(quantile,mic) S3method(quantile,mic)
S3method(rep,ab) S3method(rep,ab)
S3method(rep,av) S3method(rep,av)
@ -159,6 +161,10 @@ export(administrable_per_os)
export(age) export(age)
export(age_groups) export(age_groups)
export(all_antimicrobials) export(all_antimicrobials)
export(all_mic)
export(all_mic_predictors)
export(all_sir)
export(all_sir_predictors)
export(aminoglycosides) export(aminoglycosides)
export(aminopenicillins) export(aminopenicillins)
export(amr_class) export(amr_class)
@ -352,6 +358,8 @@ export(sir_df)
export(sir_interpretation_history) export(sir_interpretation_history)
export(sir_predict) export(sir_predict)
export(skewness) export(skewness)
export(step_mic_log2)
export(step_sir_numeric)
export(streptogramins) export(streptogramins)
export(sulfonamides) export(sulfonamides)
export(susceptibility) export(susceptibility)
@ -388,6 +396,12 @@ if(getRversion() >= "3.0.0") S3method(pillar::type_sum, av)
if(getRversion() >= "3.0.0") S3method(pillar::type_sum, mic) if(getRversion() >= "3.0.0") S3method(pillar::type_sum, mic)
if(getRversion() >= "3.0.0") S3method(pillar::type_sum, mo) if(getRversion() >= "3.0.0") S3method(pillar::type_sum, mo)
if(getRversion() >= "3.0.0") S3method(pillar::type_sum, sir) if(getRversion() >= "3.0.0") S3method(pillar::type_sum, sir)
if(getRversion() >= "3.0.0") S3method(recipes::bake, step_mic_log2)
if(getRversion() >= "3.0.0") S3method(recipes::bake, step_sir_numeric)
if(getRversion() >= "3.0.0") S3method(recipes::prep, step_mic_log2)
if(getRversion() >= "3.0.0") S3method(recipes::prep, step_sir_numeric)
if(getRversion() >= "3.0.0") S3method(recipes::tidy, step_mic_log2)
if(getRversion() >= "3.0.0") S3method(recipes::tidy, step_sir_numeric)
if(getRversion() >= "3.0.0") S3method(skimr::get_skimmers, disk) if(getRversion() >= "3.0.0") S3method(skimr::get_skimmers, disk)
if(getRversion() >= "3.0.0") S3method(skimr::get_skimmers, mic) if(getRversion() >= "3.0.0") S3method(skimr::get_skimmers, mic)
if(getRversion() >= "3.0.0") S3method(skimr::get_skimmers, mo) if(getRversion() >= "3.0.0") S3method(skimr::get_skimmers, mo)

18
NEWS.md
View File

@ -1,3 +1,19 @@
# AMR 3.0.0.9004
### New
* Integration with the **tidymodels** framework to allow seamless use of MIC and SIR data in modelling pipelines via `recipes`
- `step_mic_log2()` to transform `<mic>` columns with log2, and `step_sir_numeric()` to convert `<sir>` columns to numeric
- `tidyselect` helpers: `all_mic()`, `all_mic_predictors()`, `all_sir()`, `all_sir_predictors()`
- Enables seamless use of MIC and SIR data in modelling pipelines via `recipes`
### Changed
* Fixed a bug in `antibiogram()` for when no antimicrobials are set
* Fixed a bug in `as.ab()` for antimicrobial codes with a number in it if they are preceded by a space
* Fixed a bug in `eucast_rules()` for using specific custom rules
* Fixed some specific Dutch translations for antimicrobials
* Updated `random_mic()` and `random_disk()` to set skewedness of the distribution and allow multiple microorganisms
# AMR 3.0.0 # AMR 3.0.0
This package now supports not only tools for AMR data analysis in clinical settings, but also for veterinary and environmental microbiology. This was made possible through a collaboration with the [University of Prince Edward Island's Atlantic Veterinary College](https://www.upei.ca/avc), Canada. To celebrate this great improvement of the package, we also updated the package logo to reflect this change. This package now supports not only tools for AMR data analysis in clinical settings, but also for veterinary and environmental microbiology. This was made possible through a collaboration with the [University of Prince Edward Island's Atlantic Veterinary College](https://www.upei.ca/avc), Canada. To celebrate this great improvement of the package, we also updated the package logo to reflect this change.
@ -122,7 +138,7 @@ This package now supports not only tools for AMR data analysis in clinical setti
## Older Versions ## Older Versions
This changelog only contains changes from AMR v3.0 (March 2025) and later. This changelog only contains changes from AMR v3.0 (June 2025) and later.
* For prior v2 versions, please see [our v2 archive](https://github.com/msberends/AMR/blob/v2.1.1/NEWS.md). * For prior v2 versions, please see [our v2 archive](https://github.com/msberends/AMR/blob/v2.1.1/NEWS.md).
* For prior v1 versions, please see [our v1 archive](https://github.com/msberends/AMR/blob/v1.8.2/NEWS.md). * For prior v1 versions, please see [our v1 archive](https://github.com/msberends/AMR/blob/v1.8.2/NEWS.md).

View File

@ -1244,7 +1244,9 @@ try_colour <- function(..., before, after, collapse = " ") {
} }
} }
is_dark <- function() { is_dark <- function() {
if (is.null(AMR_env$is_dark_theme)) { AMR_env$current_theme <- tryCatch(getExportedValue("getThemeInfo", ns = asNamespace("rstudioapi"))()$editor, error = function(e) NULL)
if (!identical(AMR_env$current_theme, AMR_env$former_theme) || is.null(AMR_env$is_dark_theme)) {
AMR_env$former_theme <- AMR_env$current_theme
AMR_env$is_dark_theme <- !has_colour() || tryCatch(isTRUE(getExportedValue("getThemeInfo", ns = asNamespace("rstudioapi"))()$dark), error = function(e) FALSE) AMR_env$is_dark_theme <- !has_colour() || tryCatch(isTRUE(getExportedValue("getThemeInfo", ns = asNamespace("rstudioapi"))()$dark), error = function(e) FALSE)
} }
isTRUE(AMR_env$is_dark_theme) isTRUE(AMR_env$is_dark_theme)

4
R/ab.R
View File

@ -655,7 +655,9 @@ generalise_antibiotic_name <- function(x) {
x <- trimws(gsub(" +", " ", x, perl = TRUE)) x <- trimws(gsub(" +", " ", x, perl = TRUE))
# remove last couple of words if they numbers or units # remove last couple of words if they numbers or units
x <- gsub("( ([0-9]{3,}|U?M?C?G|L))+$", "", x, perl = TRUE) x <- gsub("( ([0-9]{3,}|U?M?C?G|L))+$", "", x, perl = TRUE)
# move HIGH to end # remove whitespace prior to numbers if preceded by A-Z
x <- gsub("([A-Z]+) +([0-9]+)", "\\1\\2", x, perl = TRUE)
# move HIGH to the end
x <- trimws(gsub("(.*) HIGH(.*)", "\\1\\2 HIGH", x, perl = TRUE)) x <- trimws(gsub("(.*) HIGH(.*)", "\\1\\2 HIGH", x, perl = TRUE))
x x
} }

View File

@ -208,7 +208,7 @@ age_groups <- function(x, split_at = c(12, 25, 55, 75), na.rm = FALSE) {
split_at <- c(0, split_at) split_at <- c(0, split_at)
} }
split_at <- split_at[!is.na(split_at)] split_at <- split_at[!is.na(split_at)]
stop_if(length(split_at) == 1, "invalid value for `split_at`") # only 0 is available stop_if(length(split_at) == 1, "invalid value for `split_at`.") # only 0 is available
# turn input values to 'split_at' indices # turn input values to 'split_at' indices
y <- x y <- x

View File

@ -40,6 +40,7 @@
#' - A combination of the above, using `c()`, e.g.: #' - A combination of the above, using `c()`, e.g.:
#' - `c(aminoglycosides(), "AMP", "AMC")` #' - `c(aminoglycosides(), "AMP", "AMC")`
#' - `c(aminoglycosides(), carbapenems())` #' - `c(aminoglycosides(), carbapenems())`
#' - Column indices using numbers
#' - Combination therapy, indicated by using `"+"`, with or without [antimicrobial selectors][antimicrobial_selectors], e.g.: #' - Combination therapy, indicated by using `"+"`, with or without [antimicrobial selectors][antimicrobial_selectors], e.g.:
#' - `"cipro + genta"` #' - `"cipro + genta"`
#' - `"TZP+TOB"` #' - `"TZP+TOB"`
@ -452,7 +453,7 @@ antibiogram.default <- function(x,
deprecation_warning("antibiotics", "antimicrobials", fn = "antibiogram", is_argument = TRUE) deprecation_warning("antibiotics", "antimicrobials", fn = "antibiogram", is_argument = TRUE)
antimicrobials <- list(...)$antibiotics antimicrobials <- list(...)$antibiotics
} }
meet_criteria(antimicrobials, allow_class = "character", allow_NA = FALSE, allow_NULL = FALSE) meet_criteria(antimicrobials, allow_class = c("character", "numeric", "integer"), allow_NA = FALSE, allow_NULL = FALSE)
if (!is.function(mo_transform)) { if (!is.function(mo_transform)) {
meet_criteria(mo_transform, allow_class = "character", has_length = 1, is_in = c("name", "shortname", "gramstain", colnames(AMR::microorganisms)), allow_NULL = TRUE, allow_NA = TRUE) meet_criteria(mo_transform, allow_class = "character", has_length = 1, is_in = c("name", "shortname", "gramstain", colnames(AMR::microorganisms)), allow_NULL = TRUE, allow_NA = TRUE)
} }
@ -1194,12 +1195,13 @@ retrieve_wisca_parameters <- function(wisca_model, ...) {
#' @rawNamespace if(getRversion() >= "3.0.0") S3method(pillar::tbl_sum, antibiogram) #' @rawNamespace if(getRversion() >= "3.0.0") S3method(pillar::tbl_sum, antibiogram)
tbl_sum.antibiogram <- function(x, ...) { tbl_sum.antibiogram <- function(x, ...) {
dims <- paste(format(NROW(x), big.mark = ","), AMR_env$cross_icon, format(NCOL(x), big.mark = ",")) dims <- paste(format(NROW(x), big.mark = ","), AMR_env$cross_icon, format(NCOL(x), big.mark = ","))
names(dims) <- "An Antibiogram"
if (isTRUE(attributes(x)$wisca)) { if (isTRUE(attributes(x)$wisca)) {
names(dims) <- paste0("An Antibiogram (WISCA / ", attributes(x)$conf_interval * 100, "% CI)") dims <- c(dims, Type = paste0("WISCA with ", attributes(x)$conf_interval * 100, "% CI"))
} else if (isTRUE(attributes(x)$formatting_type >= 13)) { } else if (isTRUE(attributes(x)$formatting_type >= 13)) {
names(dims) <- paste0("An Antibiogram (non-WISCA / ", attributes(x)$conf_interval * 100, "% CI)") dims <- c(dims, Type = paste0("Non-WISCA with ", attributes(x)$conf_interval * 100, "% CI"))
} else { } else {
names(dims) <- paste0("An Antibiogram (non-WISCA)") dims <- c(dims, Type = paste0("Non-WISCA without CI"))
} }
dims dims
} }

View File

@ -361,3 +361,15 @@
#' @examples #' @examples
#' dosage #' dosage
"dosage" "dosage"
#' Data Set with `r format(nrow(esbl_isolates), big.mark = " ")` ESBL Isolates
#'
#' A data set containing `r format(nrow(esbl_isolates), big.mark = " ")` microbial isolates with MIC values of common antibiotics and a binary `esbl` column for extended-spectrum beta-lactamase (ESBL) production. This data set contains randomised fictitious data but reflects reality and can be used to practise AMR-related machine learning, e.g., classification modelling with [tidymodels](https://amr-for-r.org/articles/AMR_with_tidymodels.html).
#' @format A [tibble][tibble::tibble] with `r format(nrow(esbl_isolates), big.mark = " ")` observations and `r ncol(esbl_isolates)` variables:
#' - `esbl`\cr Logical indicator if the isolate is ESBL-producing
#' - `genus`\cr Genus of the microorganism
#' - `AMC:COL`\cr MIC values for 17 antimicrobial agents, transformed to class [`mic`] (see [as.mic()])
#' @details See our [tidymodels integration][amr-tidymodels] for an example using this data set.
#' @examples
#' esbl_isolates
"esbl_isolates"

View File

@ -442,7 +442,7 @@ eucast_rules <- function(x,
# big speed gain! only analyse unique rows: # big speed gain! only analyse unique rows:
pm_distinct(`.rowid`, .keep_all = TRUE) %pm>% pm_distinct(`.rowid`, .keep_all = TRUE) %pm>%
as.data.frame(stringsAsFactors = FALSE) as.data.frame(stringsAsFactors = FALSE)
x[, col_mo] <- as.mo(as.character(x[, col_mo, drop = TRUE]), info = info) x[, col_mo] <- as.mo(as.character(x[, col_mo, drop = TRUE]), info = FALSE)
# rename col_mo to prevent interference with joined columns # rename col_mo to prevent interference with joined columns
colnames(x)[colnames(x) == col_mo] <- ".col_mo" colnames(x)[colnames(x) == col_mo] <- ".col_mo"
col_mo <- ".col_mo" col_mo <- ".col_mo"
@ -450,13 +450,20 @@ eucast_rules <- function(x,
x <- left_join_microorganisms(x, by = col_mo, suffix = c("_oldcols", "")) x <- left_join_microorganisms(x, by = col_mo, suffix = c("_oldcols", ""))
x$gramstain <- mo_gramstain(x[, col_mo, drop = TRUE], language = NULL, info = FALSE) x$gramstain <- mo_gramstain(x[, col_mo, drop = TRUE], language = NULL, info = FALSE)
x$genus_species <- trimws(paste(x$genus, x$species)) x$genus_species <- trimws(paste(x$genus, x$species))
if (isTRUE(info) && NROW(x) > 10000) { if (isTRUE(info) && NROW(x.bak) > 10000) {
message_(" OK.", add_fn = list(font_green, font_bold), as_note = FALSE) message_("OK.", add_fn = list(font_green, font_bold), as_note = FALSE)
} }
n_added <- 0 n_added <- 0
n_changed <- 0 n_changed <- 0
rule_current <- ""
rule_group_current <- ""
rule_group_previous <- ""
rule_next <- ""
rule_previous <- ""
rule_text <- ""
# >>> Apply Other rules: enzyme inhibitors <<< ------------------------------------------ # >>> Apply Other rules: enzyme inhibitors <<< ------------------------------------------
if (any(c("all", "other") %in% rules)) { if (any(c("all", "other") %in% rules)) {
if (isTRUE(info)) { if (isTRUE(info)) {
@ -617,31 +624,16 @@ eucast_rules <- function(x,
eucast_rules_df <- eucast_rules_df %pm>% eucast_rules_df <- eucast_rules_df %pm>%
rbind_AMR(eucast_rules_df_total %pm>% rbind_AMR(eucast_rules_df_total %pm>%
subset(reference.rule_group %like% "breakpoint" & reference.version == version_breakpoints)) subset(reference.rule_group %like% "breakpoint" & reference.version == version_breakpoints))
# eucast_rules_df <- subset(
# eucast_rules_df,
# reference.rule_group %unlike% "breakpoint" |
# (reference.rule_group %like% "breakpoint" & reference.version == version_breakpoints)
# )
} }
if (any(c("all", "expected_phenotypes") %in% rules)) { if (any(c("all", "expected_phenotypes") %in% rules)) {
eucast_rules_df <- eucast_rules_df %pm>% eucast_rules_df <- eucast_rules_df %pm>%
rbind_AMR(eucast_rules_df_total %pm>% rbind_AMR(eucast_rules_df_total %pm>%
subset(reference.rule_group %like% "expected" & reference.version == version_expected_phenotypes)) subset(reference.rule_group %like% "expected" & reference.version == version_expected_phenotypes))
# eucast_rules_df <- subset(
# eucast_rules_df,
# reference.rule_group %unlike% "expected" |
# (reference.rule_group %like% "expected" & reference.version == version_expected_phenotypes)
# )
} }
if (any(c("all", "expert") %in% rules)) { if (any(c("all", "expert") %in% rules)) {
eucast_rules_df <- eucast_rules_df %pm>% eucast_rules_df <- eucast_rules_df %pm>%
rbind_AMR(eucast_rules_df_total %pm>% rbind_AMR(eucast_rules_df_total %pm>%
subset(reference.rule_group %like% "expert" & reference.version == version_expertrules)) subset(reference.rule_group %like% "expert" & reference.version == version_expertrules))
# eucast_rules_df <- subset(
# eucast_rules_df,
# reference.rule_group %unlike% "expert" |
# (reference.rule_group %like% "expert" & reference.version == version_expertrules)
# )
} }
## filter out AmpC de-repressed cephalosporin-resistant mutants ---- ## filter out AmpC de-repressed cephalosporin-resistant mutants ----
# no need to filter on version number here - the rules contain these version number, so are inherently filtered # no need to filter on version number here - the rules contain these version number, so are inherently filtered
@ -664,6 +656,9 @@ eucast_rules <- function(x,
# we only hints on remaining rows in `eucast_rules_df` # we only hints on remaining rows in `eucast_rules_df`
screening_abx <- as.character(AMR::antimicrobials$ab[which(AMR::antimicrobials$ab %like% "-S$")]) screening_abx <- as.character(AMR::antimicrobials$ab[which(AMR::antimicrobials$ab %like% "-S$")])
screening_abx <- screening_abx[screening_abx %in% unique(unlist(strsplit(EUCAST_RULES_DF$and_these_antibiotics[!is.na(EUCAST_RULES_DF$and_these_antibiotics)], ", *")))] screening_abx <- screening_abx[screening_abx %in% unique(unlist(strsplit(EUCAST_RULES_DF$and_these_antibiotics[!is.na(EUCAST_RULES_DF$and_these_antibiotics)], ", *")))]
if (isTRUE(info)) {
cat("\n")
}
for (ab_s in screening_abx) { for (ab_s in screening_abx) {
ab <- gsub("-S$", "", ab_s) ab <- gsub("-S$", "", ab_s)
if (ab %in% names(cols_ab) && !ab_s %in% names(cols_ab)) { if (ab %in% names(cols_ab) && !ab_s %in% names(cols_ab)) {
@ -894,7 +889,9 @@ eucast_rules <- function(x,
} }
for (i in seq_len(length(custom_rules))) { for (i in seq_len(length(custom_rules))) {
rule <- custom_rules[[i]] rule <- custom_rules[[i]]
rows <- which(eval(parse(text = rule$query), envir = x)) rows <- tryCatch(which(eval(parse(text = rule$query), envir = x)),
error = function(e) stop_(paste0(conditionMessage(e), font_red(" (check available data and compare with the custom rules set)")), call = FALSE)
)
cols <- as.character(rule$result_group) cols <- as.character(rule$result_group)
cols <- c( cols <- c(
cols[cols %in% colnames(x)], # direct column names cols[cols %in% colnames(x)], # direct column names
@ -908,9 +905,8 @@ eucast_rules <- function(x,
get_antibiotic_names(cols) get_antibiotic_names(cols)
) )
if (isTRUE(info)) { if (isTRUE(info)) {
# print rule
cat(italicise_taxonomy( cat(italicise_taxonomy(
word_wrap(format_custom_query_rule(rule$query, colours = FALSE), word_wrap(rule_text,
width = getOption("width") - 30, width = getOption("width") - 30,
extra_indent = 6 extra_indent = 6
), ),

View File

@ -432,11 +432,17 @@ pillar_shaft.mic <- function(x, ...) {
} }
crude_numbers <- as.double(x) crude_numbers <- as.double(x)
operators <- gsub("[^<=>]+", "", as.character(x)) operators <- gsub("[^<=>]+", "", as.character(x))
# colourise operators
operators[!is.na(operators) & operators != ""] <- font_silver(operators[!is.na(operators) & operators != ""], collapse = NULL) operators[!is.na(operators) & operators != ""] <- font_silver(operators[!is.na(operators) & operators != ""], collapse = NULL)
out <- trimws(paste0(operators, trimws(format(crude_numbers)))) out <- trimws(paste0(operators, trimws(format(crude_numbers))))
out[is.na(x)] <- font_na(NA) out[is.na(x)] <- font_na(NA)
# make trailing zeroes less visible # make trailing zeroes less visible
out[out %like% "[.]"] <- gsub("([.]?0+)$", font_silver("\\1"), out[out %like% "[.]"], perl = TRUE) if (is_dark()) {
fn <- font_silver
} else {
fn <- font_white
}
out[out %like% "[.]"] <- gsub("([.]?0+)$", fn("\\1"), out[out %like% "[.]"], perl = TRUE)
create_pillar_column(out, align = "right", width = max(nchar(font_stripstyle(out)))) create_pillar_column(out, align = "right", width = max(nchar(font_stripstyle(out))))
} }

View File

@ -31,13 +31,17 @@
#' #'
#' These functions can be used for generating random MIC values and disk diffusion diameters, for AMR data analysis practice. By providing a microorganism and antimicrobial drug, the generated results will reflect reality as much as possible. #' These functions can be used for generating random MIC values and disk diffusion diameters, for AMR data analysis practice. By providing a microorganism and antimicrobial drug, the generated results will reflect reality as much as possible.
#' @param size Desired size of the returned vector. If used in a [data.frame] call or `dplyr` verb, will get the current (group) size if left blank. #' @param size Desired size of the returned vector. If used in a [data.frame] call or `dplyr` verb, will get the current (group) size if left blank.
#' @param mo Any [character] that can be coerced to a valid microorganism code with [as.mo()]. #' @param mo Any [character] that can be coerced to a valid microorganism code with [as.mo()]. Can be the same length as `size`.
#' @param ab Any [character] that can be coerced to a valid antimicrobial drug code with [as.ab()]. #' @param ab Any [character] that can be coerced to a valid antimicrobial drug code with [as.ab()].
#' @param prob_SIR A vector of length 3: the probabilities for "S" (1st value), "I" (2nd value) and "R" (3rd value). #' @param prob_SIR A vector of length 3: the probabilities for "S" (1st value), "I" (2nd value) and "R" (3rd value).
#' @param skew Direction of skew for MIC or disk values, either `"right"` or `"left"`. A left-skewed distribution has the majority of the data on the right.
#' @param severity Skew severity; higher values will increase the skewedness. Default is `2`; use `0` to prevent skewedness.
#' @param ... Ignored, only in place to allow future extensions. #' @param ... Ignored, only in place to allow future extensions.
#' @details The base \R function [sample()] is used for generating values. #' @details
#' #' Internally, MIC and disk zone values are sampled based on clinical breakpoints defined in the [clinical_breakpoints] data set. To create specific generated values per bug or drug, set the `mo` and/or `ab` argument. The MICs are sampled on a log2 scale and disks linearly, using weighted probabilities. The weights are based on the `skew` and `severity` arguments:
#' Generated values are based on the EUCAST `r max(as.integer(gsub("[^0-9]", "", subset(clinical_breakpoints, guideline %like% "EUCAST")$guideline)))` guideline as implemented in the [clinical_breakpoints] data set. To create specific generated values per bug or drug, set the `mo` and/or `ab` argument. #' * `skew = "right"` places more emphasis on lower MIC or higher disk values.
#' * `skew = "left"` places more emphasis on higher MIC or lower disk values.
#' * `severity` controls the exponential bias applied.
#' @return class `mic` for [random_mic()] (see [as.mic()]) and class `disk` for [random_disk()] (see [as.disk()]) #' @return class `mic` for [random_mic()] (see [as.mic()]) and class `disk` for [random_disk()] (see [as.disk()])
#' @name random #' @name random
#' @rdname random #' @rdname random
@ -47,8 +51,13 @@
#' random_disk(25) #' random_disk(25)
#' random_sir(25) #' random_sir(25)
#' #'
#' # add more skewedness, make more realistic by setting a bug and/or drug:
#' disks <- random_disk(100, severity = 2, mo = "Escherichia coli", ab = "CIP")
#' plot(disks)
#' # `plot()` and `ggplot2::autoplot()` allow for coloured bars if `mo` and `ab` are set
#' plot(disks, mo = "Escherichia coli", ab = "CIP", guideline = "CLSI 2025")
#'
#' \donttest{ #' \donttest{
#' # make the random generation more realistic by setting a bug and/or drug:
#' random_mic(25, "Klebsiella pneumoniae") # range 0.0625-64 #' random_mic(25, "Klebsiella pneumoniae") # range 0.0625-64
#' random_mic(25, "Klebsiella pneumoniae", "meropenem") # range 0.0625-16 #' random_mic(25, "Klebsiella pneumoniae", "meropenem") # range 0.0625-16
#' random_mic(25, "Streptococcus pneumoniae", "meropenem") # range 0.0625-4 #' random_mic(25, "Streptococcus pneumoniae", "meropenem") # range 0.0625-4
@ -57,26 +66,61 @@
#' random_disk(25, "Klebsiella pneumoniae", "ampicillin") # range 11-17 #' random_disk(25, "Klebsiella pneumoniae", "ampicillin") # range 11-17
#' random_disk(25, "Streptococcus pneumoniae", "ampicillin") # range 12-27 #' random_disk(25, "Streptococcus pneumoniae", "ampicillin") # range 12-27
#' } #' }
random_mic <- function(size = NULL, mo = NULL, ab = NULL, ...) { random_mic <- function(size = NULL, mo = NULL, ab = NULL, skew = "right", severity = 1, ...) {
meet_criteria(size, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE, allow_NULL = TRUE) meet_criteria(size, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE, allow_NULL = TRUE)
meet_criteria(mo, allow_class = "character", has_length = 1, allow_NULL = TRUE) meet_criteria(mo, allow_class = "character", has_length = c(1, size), allow_NULL = TRUE)
meet_criteria(ab, allow_class = "character", has_length = 1, allow_NULL = TRUE) meet_criteria(ab, allow_class = "character", has_length = 1, allow_NULL = TRUE)
meet_criteria(skew, allow_class = "character", is_in = c("right", "left"), has_length = 1)
meet_criteria(severity, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE)
if (is.null(size)) { if (is.null(size)) {
size <- NROW(get_current_data(arg_name = "size", call = -3)) size <- NROW(get_current_data(arg_name = "size", call = -3))
} }
random_exec("MIC", size = size, mo = mo, ab = ab) if (length(mo) > 1) {
out <- rep(NA_mic_, length(size))
p <- progress_ticker(n = length(unique(mo)), n_min = 10, title = "Generating random MIC values")
for (mo_ in unique(mo)) {
p$tick()
out[which(mo == mo_)] <- random_exec("MIC", size = sum(mo == mo_), mo = mo_, ab = ab, skew = skew, severity = severity)
}
out <- as.mic(out, keep_operators = "none")
if (stats::runif(1) > 0.5 && length(unique(out)) > 1) {
out[out == min(out)] <- paste0("<=", out[out == min(out)])
}
if (stats::runif(1) > 0.5 && length(unique(out)) > 1) {
out[out == max(out) & out %unlike% "<="] <- paste0(">=", out[out == max(out) & out %unlike% "<="])
}
return(out)
} else {
random_exec("MIC", size = size, mo = mo, ab = ab, skew = skew, severity = severity)
}
} }
#' @rdname random #' @rdname random
#' @export #' @export
random_disk <- function(size = NULL, mo = NULL, ab = NULL, ...) { random_disk <- function(size = NULL, mo = NULL, ab = NULL, skew = "left", severity = 1, ...) {
meet_criteria(size, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE, allow_NULL = TRUE) meet_criteria(size, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE, allow_NULL = TRUE)
meet_criteria(mo, allow_class = "character", has_length = 1, allow_NULL = TRUE) meet_criteria(mo, allow_class = "character", has_length = c(1, size), allow_NULL = TRUE)
meet_criteria(ab, allow_class = "character", has_length = 1, allow_NULL = TRUE) meet_criteria(ab, allow_class = "character", has_length = 1, allow_NULL = TRUE)
meet_criteria(skew, allow_class = "character", is_in = c("right", "left"), has_length = 1)
meet_criteria(severity, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE)
if (is.null(size)) { if (is.null(size)) {
size <- NROW(get_current_data(arg_name = "size", call = -3)) size <- NROW(get_current_data(arg_name = "size", call = -3))
} }
random_exec("DISK", size = size, mo = mo, ab = ab) if (length(mo) > 1) {
out <- rep(NA_mic_, length(size))
p <- progress_ticker(n = length(unique(mo)), n_min = 10, title = "Generating random MIC values")
for (mo_ in unique(mo)) {
p$tick()
out[which(mo == mo_)] <- random_exec("DISK", size = sum(mo == mo_), mo = mo_, ab = ab, skew = skew, severity = severity)
}
out <- as.disk(out)
return(out)
} else {
random_exec("DISK", size = size, mo = mo, ab = ab, skew = skew, severity = severity)
}
} }
#' @rdname random #' @rdname random
@ -90,78 +134,60 @@ random_sir <- function(size = NULL, prob_SIR = c(0.33, 0.33, 0.33), ...) {
sample(as.sir(c("S", "I", "R")), size = size, replace = TRUE, prob = prob_SIR) sample(as.sir(c("S", "I", "R")), size = size, replace = TRUE, prob = prob_SIR)
} }
random_exec <- function(method_type, size, mo = NULL, ab = NULL) {
df <- AMR::clinical_breakpoints %pm>% random_exec <- function(method_type, size, mo = NULL, ab = NULL, skew = "right", severity = 1) {
pm_filter(guideline %like% "EUCAST") %pm>% df <- AMR::clinical_breakpoints %pm>% subset(method == method_type & type == "human")
pm_arrange(pm_desc(guideline)) %pm>%
subset(guideline == max(guideline) &
method == method_type &
type == "human")
if (!is.null(mo)) { if (!is.null(mo)) {
mo_coerced <- as.mo(mo) mo_coerced <- as.mo(mo, info = FALSE)
mo_include <- c( mo_include <- c(mo_coerced, as.mo(mo_genus(mo_coerced)), as.mo(mo_family(mo_coerced)), as.mo(mo_order(mo_coerced)))
mo_coerced, df_new <- df %pm>% subset(mo %in% mo_include)
as.mo(mo_genus(mo_coerced)), if (nrow(df_new) > 0) df <- df_new
as.mo(mo_family(mo_coerced)),
as.mo(mo_order(mo_coerced))
)
df_new <- df %pm>%
subset(mo %in% mo_include)
if (nrow(df_new) > 0) {
df <- df_new
} else {
warning_("in `random_", tolower(method_type), "()`: no rows found that match mo '", mo, "', ignoring argument `mo`")
}
} }
if (!is.null(ab)) { if (!is.null(ab)) {
ab_coerced <- as.ab(ab) ab_coerced <- as.ab(ab)
df_new <- df %pm>% df_new <- df %pm>% subset(ab %in% ab_coerced)
subset(ab %in% ab_coerced) if (nrow(df_new) > 0) df <- df_new
if (nrow(df_new) > 0) {
df <- df_new
} else {
warning_("in `random_", tolower(method_type), "()`: no rows found that match ab '", ab, "' (", ab_name(ab_coerced, tolower = TRUE, language = NULL), "), ignoring argument `ab`")
}
} }
if (method_type == "MIC") { if (method_type == "MIC") {
# set range lowest_mic <- min(df$breakpoint_S, na.rm = TRUE)
mic_range <- c(0.001, 0.002, 0.005, 0.010, 0.025, 0.0625, 0.125, 0.250, 0.5, 1, 2, 4, 8, 16, 32, 64, 128, 256) lowest_mic <- log2(lowest_mic) + sample(c(-3:2), 1)
lowest_mic <- 2^lowest_mic
highest_mic <- max(df$breakpoint_R, na.rm = TRUE)
highest_mic <- log2(highest_mic) + sample(c(-3:1), 1)
highest_mic <- max(lowest_mic * 2, 2^highest_mic)
# get highest/lowest +/- random 1 to 3 higher factors of two out <- skewed_values(COMMON_MIC_VALUES, size = size, min = lowest_mic, max = highest_mic, skew = skew, severity = severity)
max_range <- mic_range[min(
length(mic_range),
which(mic_range == max(df$breakpoint_R[!is.na(df$breakpoint_R)], na.rm = TRUE)) + sample(c(1:3), 1)
)]
min_range <- mic_range[max(
1,
which(mic_range == min(df$breakpoint_S, na.rm = TRUE)) - sample(c(1:3), 1)
)]
mic_range_new <- mic_range[mic_range <= max_range & mic_range >= min_range]
if (length(mic_range_new) == 0) {
mic_range_new <- mic_range
}
out <- as.mic(sample(mic_range_new, size = size, replace = TRUE))
# 50% chance that lowest will get <= and highest will get >=
if (stats::runif(1) > 0.5 && length(unique(out)) > 1) { if (stats::runif(1) > 0.5 && length(unique(out)) > 1) {
out[out == min(out)] <- paste0("<=", out[out == min(out)]) out[out == min(out)] <- paste0("<=", out[out == min(out)])
} }
if (stats::runif(1) > 0.5 && length(unique(out)) > 1) { if (stats::runif(1) > 0.5 && length(unique(out)) > 1) {
out[out == max(out)] <- paste0(">=", out[out == max(out)]) out[out == max(out) & out %unlike% "<="] <- paste0(">=", out[out == max(out) & out %unlike% "<="])
} }
return(out) return(as.mic(out))
} else if (method_type == "DISK") { } else if (method_type == "DISK") {
set_range <- seq( disk_range <- seq(
from = as.integer(min(df$breakpoint_R[!is.na(df$breakpoint_R)], na.rm = TRUE) / 1.25), from = floor(min(df$breakpoint_R[!is.na(df$breakpoint_R)], na.rm = TRUE) / 1.25),
to = as.integer(max(df$breakpoint_S, na.rm = TRUE) * 1.25), to = ceiling(max(df$breakpoint_S[df$breakpoint_S != 50], na.rm = TRUE) * 1.25),
by = 1 by = 1
) )
out <- sample(set_range, size = size, replace = TRUE) disk_range <- disk_range[disk_range >= 6 & disk_range <= 50]
out[out < 6] <- sample(c(6:10), length(out[out < 6]), replace = TRUE) out <- skewed_values(disk_range, size = size, min = min(disk_range), max = max(disk_range), skew = skew, severity = severity)
out[out > 50] <- sample(c(40:50), length(out[out > 50]), replace = TRUE)
return(as.disk(out)) return(as.disk(out))
} }
} }
skewed_values <- function(values, size, min, max, skew = c("right", "left"), severity = 1) {
skew <- match.arg(skew)
range_vals <- values[values >= min & values <= max]
if (length(range_vals) < 2) range_vals <- values
ranks <- seq_along(range_vals)
weights <- switch(skew,
right = rev(ranks)^severity,
left = ranks^severity
)
weights <- weights / sum(weights)
sample(range_vals, size = size, replace = TRUE, prob = weights)
}

View File

@ -159,7 +159,7 @@
#' #'
#' The function [is.sir()] detects if the input contains class `sir`. If the input is a [data.frame] or [list], it iterates over all columns/items and returns a [logical] vector. #' The function [is.sir()] detects if the input contains class `sir`. If the input is a [data.frame] or [list], it iterates over all columns/items and returns a [logical] vector.
#' #'
#' The base R function [as.double()] can be used to retrieve quantitative values from a `sir` object: `"S"` = 1, `"I"`/`"SDD"` = 2, `"R"` = 3. All other values are rendered `NA` . **Note:** Do not use `as.integer()`, since that (because of how R works internally) will return the factor level indices, and not these aforementioned quantitative values. #' The base R function [as.double()] can be used to retrieve quantitative values from a `sir` object: `"S"` = 1, `"I"`/`"SDD"` = 2, `"R"` = 3. All other values are rendered `NA`. **Note:** Do not use `as.integer()`, since that (because of how R works internally) will return the factor level indices, and not these aforementioned quantitative values.
#' #'
#' The function [is_sir_eligible()] returns `TRUE` when a column contains at most 5% potentially invalid antimicrobial interpretations, and `FALSE` otherwise. The threshold of 5% can be set with the `threshold` argument. If the input is a [data.frame], it iterates over all columns and returns a [logical] vector. #' The function [is_sir_eligible()] returns `TRUE` when a column contains at most 5% potentially invalid antimicrobial interpretations, and `FALSE` otherwise. The threshold of 5% can be set with the `threshold` argument. If the input is a [data.frame], it iterates over all columns and returns a [logical] vector.
#' @section Interpretation of SIR: #' @section Interpretation of SIR:

Binary file not shown.

262
R/tidymodels.R Normal file
View File

@ -0,0 +1,262 @@
#' AMR Extensions for Tidymodels
#'
#' This family of functions allows using AMR-specific data types such as `<mic>` and `<sir>` inside `tidymodels` pipelines.
#' @inheritParams recipes::step_center
#' @details
#' You can read more in our online [AMR with tidymodels introduction](https://amr-for-r.org/articles/AMR_with_tidymodels.html).
#'
#' Tidyselect helpers include:
#' - [all_mic()] and [all_mic_predictors()] to select `<mic>` columns
#' - [all_sir()] and [all_sir_predictors()] to select `<sir>` columns
#'
#' Pre-processing pipeline steps include:
#' - [step_mic_log2()] to convert MIC columns to numeric (via `as.numeric()`) and apply a log2 transform, to be used with [all_mic_predictors()]
#' - [step_sir_numeric()] to convert SIR columns to numeric (via `as.numeric()`), to be used with [all_sir_predictors()]: `"S"` = 1, `"I"`/`"SDD"` = 2, `"R"` = 3. All other values are rendered `NA`. Keep this in mind for further processing, especially if the model does not allow for `NA` values.
#'
#' These steps integrate with `recipes::recipe()` and work like standard preprocessing steps. They are useful for preparing data for modelling, especially with classification models.
#' @seealso [recipes::recipe()], [as.mic()], [as.sir()]
#' @name amr-tidymodels
#' @keywords internal
#' @export
#' @examples
#' library(tidymodels)
#'
#' # The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703
#' # Presence of ESBL genes was predicted based on raw MIC values.
#'
#'
#' # example data set in the AMR package
#' esbl_isolates
#'
#' # Prepare a binary outcome and convert to ordered factor
#' data <- esbl_isolates %>%
#' mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE))
#'
#' # Split into training and testing sets
#' split <- initial_split(data)
#' training_data <- training(split)
#' testing_data <- testing(split)
#'
#' # Create and prep a recipe with MIC log2 transformation
#' mic_recipe <- recipe(esbl ~ ., data = training_data) %>%
#' # Optionally remove non-predictive variables
#' remove_role(genus, old_role = "predictor") %>%
#' # Apply the log2 transformation to all MIC predictors
#' step_mic_log2(all_mic_predictors()) %>%
#' prep()
#'
#' # View prepped recipe
#' mic_recipe
#'
#' # Apply the recipe to training and testing data
#' out_training <- bake(mic_recipe, new_data = NULL)
#' out_testing <- bake(mic_recipe, new_data = testing_data)
#'
#' # Fit a logistic regression model
#' fitted <- logistic_reg(mode = "classification") %>%
#' set_engine("glm") %>%
#' fit(esbl ~ ., data = out_training)
#'
#' # Generate predictions on the test set
#' predictions <- predict(fitted, out_testing) %>%
#' bind_cols(out_testing)
#'
#' # Evaluate predictions using standard classification metrics
#' our_metrics <- metric_set(accuracy, kap, ppv, npv)
#' metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class)
#'
#' # Show performance:
#' # - negative predictive value (NPV) of ~98%
#' # - positive predictive value (PPV) of ~94%
#' metrics
all_mic <- function() {
x <- tidymodels_amr_select(levels(NA_mic_))
names(x)
}
#' @rdname amr-tidymodels
#' @export
all_mic_predictors <- function() {
x <- tidymodels_amr_select(levels(NA_mic_))
intersect(x, recipes::has_role("predictor"))
}
#' @rdname amr-tidymodels
#' @export
all_sir <- function() {
x <- tidymodels_amr_select(levels(NA_sir_))
names(x)
}
#' @rdname amr-tidymodels
#' @export
all_sir_predictors <- function() {
x <- tidymodels_amr_select(levels(NA_sir_))
intersect(x, recipes::has_role("predictor"))
}
#' @rdname amr-tidymodels
#' @export
step_mic_log2 <- function(
recipe,
...,
role = NA,
trained = FALSE,
columns = NULL,
skip = FALSE,
id = recipes::rand_id("mic_log2")) {
recipes::add_step(
recipe,
step_mic_log2_new(
terms = rlang::enquos(...),
role = role,
trained = trained,
columns = columns,
skip = skip,
id = id
)
)
}
step_mic_log2_new <- function(terms, role, trained, columns, skip, id) {
recipes::step(
subclass = "mic_log2",
terms = terms,
role = role,
trained = trained,
columns = columns,
skip = skip,
id = id
)
}
#' @rawNamespace if(getRversion() >= "3.0.0") S3method(recipes::prep, step_mic_log2)
prep.step_mic_log2 <- function(x, training, info = NULL, ...) {
col_names <- recipes::recipes_eval_select(x$terms, training, info)
recipes::check_type(training[, col_names], types = "ordered")
step_mic_log2_new(
terms = x$terms,
role = x$role,
trained = TRUE,
columns = col_names,
skip = x$skip,
id = x$id
)
}
#' @rawNamespace if(getRversion() >= "3.0.0") S3method(recipes::bake, step_mic_log2)
bake.step_mic_log2 <- function(object, new_data, ...) {
recipes::check_new_data(object$columns, object, new_data)
for (col in object$columns) {
new_data[[col]] <- log2(as.numeric(as.mic(new_data[[col]])))
}
new_data
}
#' @export
print.step_mic_log2 <- function(x, width = max(20, options()$width - 35), ...) {
title <- "Log2 transformation of MIC columns"
recipes::print_step(x$columns, x$terms, x$trained, title, width)
invisible(x)
}
#' @rawNamespace if(getRversion() >= "3.0.0") S3method(recipes::tidy, step_mic_log2)
tidy.step_mic_log2 <- function(x, ...) {
if (recipes::is_trained(x)) {
res <- tibble::tibble(terms = x$columns)
} else {
res <- tibble::tibble(terms = recipes::sel2char(x$terms))
}
res$id <- x$id
res
}
#' @rdname amr-tidymodels
#' @export
step_sir_numeric <- function(
recipe,
...,
role = NA,
trained = FALSE,
columns = NULL,
skip = FALSE,
id = recipes::rand_id("sir_numeric")) {
recipes::add_step(
recipe,
step_sir_numeric_new(
terms = rlang::enquos(...),
role = role,
trained = trained,
columns = columns,
skip = skip,
id = id
)
)
}
step_sir_numeric_new <- function(terms, role, trained, columns, skip, id) {
recipes::step(
subclass = "sir_numeric",
terms = terms,
role = role,
trained = trained,
columns = columns,
skip = skip,
id = id
)
}
#' @rawNamespace if(getRversion() >= "3.0.0") S3method(recipes::prep, step_sir_numeric)
prep.step_sir_numeric <- function(x, training, info = NULL, ...) {
col_names <- recipes::recipes_eval_select(x$terms, training, info)
recipes::check_type(training[, col_names], types = "ordered")
step_sir_numeric_new(
terms = x$terms,
role = x$role,
trained = TRUE,
columns = col_names,
skip = x$skip,
id = x$id
)
}
#' @rawNamespace if(getRversion() >= "3.0.0") S3method(recipes::bake, step_sir_numeric)
bake.step_sir_numeric <- function(object, new_data, ...) {
recipes::check_new_data(object$columns, object, new_data)
for (col in object$columns) {
new_data[[col]] <- as.numeric(as.sir(new_data[[col]]))
}
new_data
}
#' @export
print.step_sir_numeric <- function(x, width = max(20, options()$width - 35), ...) {
title <- "Numeric transformation of SIR columns"
recipes::print_step(x$columns, x$terms, x$trained, title, width)
invisible(x)
}
#' @rawNamespace if(getRversion() >= "3.0.0") S3method(recipes::tidy, step_sir_numeric)
tidy.step_sir_numeric <- function(x, ...) {
if (recipes::is_trained(x)) {
res <- tibble::tibble(terms = x$columns)
} else {
res <- tibble::tibble(terms = recipes::sel2char(x$terms))
}
res$id <- x$id
res
}
tidymodels_amr_select <- function(check_vector) {
df <- get_current_data()
ind <- which(
vapply(
FUN.VALUE = logical(1),
df,
function(x) all(x %in% c(check_vector, NA), na.rm = TRUE) & any(x %in% check_vector),
USE.NAMES = TRUE
),
useNames = TRUE
)
ind
}

View File

@ -258,6 +258,11 @@ translate_into_language <- function(from,
return(from) return(from)
} }
if (only_affect_ab_names == TRUE) {
df_trans$pattern[df_trans$regular_expr == TRUE] <- paste0(df_trans$pattern[df_trans$regular_expr == TRUE], "$")
df_trans$pattern[df_trans$regular_expr == TRUE] <- gsub("$$", "$", df_trans$pattern[df_trans$regular_expr == TRUE], fixed = TRUE)
}
lapply( lapply(
# starting with longest pattern, since more general translations are shorter, such as 'Group' # starting with longest pattern, since more general translations are shorter, such as 'Group'
order(nchar(df_trans$pattern), decreasing = TRUE), order(nchar(df_trans$pattern), decreasing = TRUE),

View File

@ -234,6 +234,7 @@ reference:
- "`antimicrobials`" - "`antimicrobials`"
- "`clinical_breakpoints`" - "`clinical_breakpoints`"
- "`example_isolates`" - "`example_isolates`"
- "`esbl_isolates`"
- "`microorganisms.codes`" - "`microorganisms.codes`"
- "`microorganisms.groups`" - "`microorganisms.groups`"
- "`intrinsic_resistant`" - "`intrinsic_resistant`"

View File

@ -663,7 +663,9 @@ if (files_changed()) {
} }
# Update index.md and README.md ------------------------------------------- # Update index.md and README.md -------------------------------------------
if (files_changed("man/microorganisms.Rd") || if (files_changed("README.Rmd") ||
files_changed("index.Rmd") ||
files_changed("man/microorganisms.Rd") ||
files_changed("man/antimicrobials.Rd") || files_changed("man/antimicrobials.Rd") ||
files_changed("man/clinical_breakpoints.Rd") || files_changed("man/clinical_breakpoints.Rd") ||
files_changed("man/antibiogram.Rd") || files_changed("man/antibiogram.Rd") ||

BIN
data/esbl_isolates.rda Normal file

Binary file not shown.

View File

@ -28,8 +28,8 @@ AMR:::reset_all_thrown_messages()
> Now available for Python too! [Click here](./articles/AMR_for_Python.html) to read more. > Now available for Python too! [Click here](./articles/AMR_for_Python.html) to read more.
<div style="display: flex; font-size: 0.8em;"> <div style="display: flex; font-size: 0.8em;">
<p style="text-align:left; width: 50%;"><small><a href="https://amr-for-r.org/">https://amr-for-r.org</a></small></p> <p style="text-align:left; width: 50%;"><small><a href="https://amr-for-r.org/">amr-for-r.org</a></small></p>
<p style="text-align:right; width: 50%;"><small><a href="https://doi.org/10.18637/jss.v104.i03" target="_blank">https://doi.org/10.18637/jss.v104.i03</a></small></p> <p style="text-align:right; width: 50%;"><small><a href="https://doi.org/10.18637/jss.v104.i03" target="_blank">doi.org/10.18637/jss.v104.i03</a></small></p>
</div> </div>
<a href="./reference/clinical_breakpoints.html#response-from-clsi-and-eucast"><img src="./endorsement_clsi_eucast.jpg" class="endorse_img" align="right" height="120" /></a> <a href="./reference/clinical_breakpoints.html#response-from-clsi-and-eucast"><img src="./endorsement_clsi_eucast.jpg" class="endorse_img" align="right" height="120" /></a>

122
man/amr-tidymodels.Rd Normal file
View File

@ -0,0 +1,122 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tidymodels.R
\name{amr-tidymodels}
\alias{amr-tidymodels}
\alias{all_mic}
\alias{all_mic_predictors}
\alias{all_sir}
\alias{all_sir_predictors}
\alias{step_mic_log2}
\alias{step_sir_numeric}
\title{AMR Extensions for Tidymodels}
\usage{
all_mic()
all_mic_predictors()
all_sir()
all_sir_predictors()
step_mic_log2(recipe, ..., role = NA, trained = FALSE, columns = NULL,
skip = FALSE, id = recipes::rand_id("mic_log2"))
step_sir_numeric(recipe, ..., role = NA, trained = FALSE, columns = NULL,
skip = FALSE, id = recipes::rand_id("sir_numeric"))
}
\arguments{
\item{recipe}{A recipe object. The step will be added to the sequence of
operations for this recipe.}
\item{...}{One or more selector functions to choose variables for this step.
See \code{\link[recipes:selections]{selections()}} for more details.}
\item{role}{Not used by this step since no new variables are created.}
\item{trained}{A logical to indicate if the quantities for preprocessing have
been estimated.}
\item{skip}{A logical. Should the step be skipped when the recipe is baked by
\code{\link[recipes:bake]{bake()}}? While all operations are baked when \code{\link[recipes:prep]{prep()}} is run, some
operations may not be able to be conducted on new data (e.g. processing the
outcome variable(s)). Care should be taken when using \code{skip = TRUE} as it
may affect the computations for subsequent operations.}
\item{id}{A character string that is unique to this step to identify it.}
}
\description{
This family of functions allows using AMR-specific data types such as \verb{<mic>} and \verb{<sir>} inside \code{tidymodels} pipelines.
}
\details{
You can read more in our online \href{https://amr-for-r.org/articles/AMR_with_tidymodels.html}{AMR with tidymodels introduction}.
Tidyselect helpers include:
\itemize{
\item \code{\link[=all_mic]{all_mic()}} and \code{\link[=all_mic_predictors]{all_mic_predictors()}} to select \verb{<mic>} columns
\item \code{\link[=all_sir]{all_sir()}} and \code{\link[=all_sir_predictors]{all_sir_predictors()}} to select \verb{<sir>} columns
}
Pre-processing pipeline steps include:
\itemize{
\item \code{\link[=step_mic_log2]{step_mic_log2()}} to convert MIC columns to numeric (via \code{as.numeric()}) and apply a log2 transform, to be used with \code{\link[=all_mic_predictors]{all_mic_predictors()}}
\item \code{\link[=step_sir_numeric]{step_sir_numeric()}} to convert SIR columns to numeric (via \code{as.numeric()}), to be used with \code{\link[=all_sir_predictors]{all_sir_predictors()}}: \code{"S"} = 1, \code{"I"}/\code{"SDD"} = 2, \code{"R"} = 3. All other values are rendered \code{NA}. Keep this in mind for further processing, especially if the model does not allow for \code{NA} values.
}
These steps integrate with \code{recipes::recipe()} and work like standard preprocessing steps. They are useful for preparing data for modelling, especially with classification models.
}
\examples{
library(tidymodels)
# The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703
# Presence of ESBL genes was predicted based on raw MIC values.
# example data set in the AMR package
esbl_isolates
# Prepare a binary outcome and convert to ordered factor
data <- esbl_isolates \%>\%
mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE))
# Split into training and testing sets
split <- initial_split(data)
training_data <- training(split)
testing_data <- testing(split)
# Create and prep a recipe with MIC log2 transformation
mic_recipe <- recipe(esbl ~ ., data = training_data) \%>\%
# Optionally remove non-predictive variables
remove_role(genus, old_role = "predictor") \%>\%
# Apply the log2 transformation to all MIC predictors
step_mic_log2(all_mic_predictors()) \%>\%
prep()
# View prepped recipe
mic_recipe
# Apply the recipe to training and testing data
out_training <- bake(mic_recipe, new_data = NULL)
out_testing <- bake(mic_recipe, new_data = testing_data)
# Fit a logistic regression model
fitted <- logistic_reg(mode = "classification") \%>\%
set_engine("glm") \%>\%
fit(esbl ~ ., data = out_training)
# Generate predictions on the test set
predictions <- predict(fitted, out_testing) \%>\%
bind_cols(out_testing)
# Evaluate predictions using standard classification metrics
our_metrics <- metric_set(accuracy, kap, ppv, npv)
metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class)
# Show performance:
# - negative predictive value (NPV) of ~98\%
# - positive predictive value (PPV) of ~94\%
metrics
}
\seealso{
\code{\link[recipes:recipe]{recipes::recipe()}}, \code{\link[=as.mic]{as.mic()}}, \code{\link[=as.sir]{as.sir()}}
}
\keyword{internal}

View File

@ -56,6 +56,7 @@ retrieve_wisca_parameters(wisca_model, ...)
\item \code{c(aminoglycosides(), "AMP", "AMC")} \item \code{c(aminoglycosides(), "AMP", "AMC")}
\item \code{c(aminoglycosides(), carbapenems())} \item \code{c(aminoglycosides(), carbapenems())}
} }
\item Column indices using numbers
\item Combination therapy, indicated by using \code{"+"}, with or without \link[=antimicrobial_selectors]{antimicrobial selectors}, e.g.: \item Combination therapy, indicated by using \code{"+"}, with or without \link[=antimicrobial_selectors]{antimicrobial selectors}, e.g.:
\itemize{ \itemize{
\item \code{"cipro + genta"} \item \code{"cipro + genta"}

View File

@ -247,7 +247,7 @@ To determine which isolates are multi-drug resistant, be sure to run \code{\link
The function \code{\link[=is.sir]{is.sir()}} detects if the input contains class \code{sir}. If the input is a \link{data.frame} or \link{list}, it iterates over all columns/items and returns a \link{logical} vector. The function \code{\link[=is.sir]{is.sir()}} detects if the input contains class \code{sir}. If the input is a \link{data.frame} or \link{list}, it iterates over all columns/items and returns a \link{logical} vector.
The base R function \code{\link[=as.double]{as.double()}} can be used to retrieve quantitative values from a \code{sir} object: \code{"S"} = 1, \code{"I"}/\code{"SDD"} = 2, \code{"R"} = 3. All other values are rendered \code{NA} . \strong{Note:} Do not use \code{as.integer()}, since that (because of how R works internally) will return the factor level indices, and not these aforementioned quantitative values. The base R function \code{\link[=as.double]{as.double()}} can be used to retrieve quantitative values from a \code{sir} object: \code{"S"} = 1, \code{"I"}/\code{"SDD"} = 2, \code{"R"} = 3. All other values are rendered \code{NA}. \strong{Note:} Do not use \code{as.integer()}, since that (because of how R works internally) will return the factor level indices, and not these aforementioned quantitative values.
The function \code{\link[=is_sir_eligible]{is_sir_eligible()}} returns \code{TRUE} when a column contains at most 5\% potentially invalid antimicrobial interpretations, and \code{FALSE} otherwise. The threshold of 5\% can be set with the \code{threshold} argument. If the input is a \link{data.frame}, it iterates over all columns and returns a \link{logical} vector. The function \code{\link[=is_sir_eligible]{is_sir_eligible()}} returns \code{TRUE} when a column contains at most 5\% potentially invalid antimicrobial interpretations, and \code{FALSE} otherwise. The threshold of 5\% can be set with the \code{threshold} argument. If the input is a \link{data.frame}, it iterates over all columns and returns a \link{logical} vector.
} }

27
man/esbl_isolates.Rd Normal file
View File

@ -0,0 +1,27 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data.R
\docType{data}
\name{esbl_isolates}
\alias{esbl_isolates}
\title{Data Set with 500 ESBL Isolates}
\format{
A \link[tibble:tibble]{tibble} with 500 observations and 19 variables:
\itemize{
\item \code{esbl}\cr Logical indicator if the isolate is ESBL-producing
\item \code{genus}\cr Genus of the microorganism
\item \code{AMC:COL}\cr MIC values for 17 antimicrobial agents, transformed to class \code{\link{mic}} (see \code{\link[=as.mic]{as.mic()}})
}
}
\usage{
esbl_isolates
}
\description{
A data set containing 500 microbial isolates with MIC values of common antibiotics and a binary \code{esbl} column for extended-spectrum beta-lactamase (ESBL) production. This data set contains randomised fictitious data but reflects reality and can be used to practise AMR-related machine learning, e.g., classification modelling with \href{https://amr-for-r.org/articles/AMR_with_tidymodels.html}{tidymodels}.
}
\details{
See our \link[=amr-tidymodels]{tidymodels integration} for an example using this data set.
}
\examples{
esbl_isolates
}
\keyword{datasets}

View File

@ -7,19 +7,25 @@
\alias{random_sir} \alias{random_sir}
\title{Random MIC Values/Disk Zones/SIR Generation} \title{Random MIC Values/Disk Zones/SIR Generation}
\usage{ \usage{
random_mic(size = NULL, mo = NULL, ab = NULL, ...) random_mic(size = NULL, mo = NULL, ab = NULL, skew = "right",
severity = 1, ...)
random_disk(size = NULL, mo = NULL, ab = NULL, ...) random_disk(size = NULL, mo = NULL, ab = NULL, skew = "left",
severity = 1, ...)
random_sir(size = NULL, prob_SIR = c(0.33, 0.33, 0.33), ...) random_sir(size = NULL, prob_SIR = c(0.33, 0.33, 0.33), ...)
} }
\arguments{ \arguments{
\item{size}{Desired size of the returned vector. If used in a \link{data.frame} call or \code{dplyr} verb, will get the current (group) size if left blank.} \item{size}{Desired size of the returned vector. If used in a \link{data.frame} call or \code{dplyr} verb, will get the current (group) size if left blank.}
\item{mo}{Any \link{character} that can be coerced to a valid microorganism code with \code{\link[=as.mo]{as.mo()}}.} \item{mo}{Any \link{character} that can be coerced to a valid microorganism code with \code{\link[=as.mo]{as.mo()}}. Can be the same length as \code{size}.}
\item{ab}{Any \link{character} that can be coerced to a valid antimicrobial drug code with \code{\link[=as.ab]{as.ab()}}.} \item{ab}{Any \link{character} that can be coerced to a valid antimicrobial drug code with \code{\link[=as.ab]{as.ab()}}.}
\item{skew}{Direction of skew for MIC or disk values, either \code{"right"} or \code{"left"}. A left-skewed distribution has the majority of the data on the right.}
\item{severity}{Skew severity; higher values will increase the skewedness. Default is \code{2}; use \code{0} to prevent skewedness.}
\item{...}{Ignored, only in place to allow future extensions.} \item{...}{Ignored, only in place to allow future extensions.}
\item{prob_SIR}{A vector of length 3: the probabilities for "S" (1st value), "I" (2nd value) and "R" (3rd value).} \item{prob_SIR}{A vector of length 3: the probabilities for "S" (1st value), "I" (2nd value) and "R" (3rd value).}
@ -31,17 +37,25 @@ class \code{mic} for \code{\link[=random_mic]{random_mic()}} (see \code{\link[=a
These functions can be used for generating random MIC values and disk diffusion diameters, for AMR data analysis practice. By providing a microorganism and antimicrobial drug, the generated results will reflect reality as much as possible. These functions can be used for generating random MIC values and disk diffusion diameters, for AMR data analysis practice. By providing a microorganism and antimicrobial drug, the generated results will reflect reality as much as possible.
} }
\details{ \details{
The base \R function \code{\link[=sample]{sample()}} is used for generating values. Internally, MIC and disk zone values are sampled based on clinical breakpoints defined in the \link{clinical_breakpoints} data set. To create specific generated values per bug or drug, set the \code{mo} and/or \code{ab} argument. The MICs are sampled on a log2 scale and disks linearly, using weighted probabilities. The weights are based on the \code{skew} and \code{severity} arguments:
\itemize{
Generated values are based on the EUCAST 2025 guideline as implemented in the \link{clinical_breakpoints} data set. To create specific generated values per bug or drug, set the \code{mo} and/or \code{ab} argument. \item \code{skew = "right"} places more emphasis on lower MIC or higher disk values.
\item \code{skew = "left"} places more emphasis on higher MIC or lower disk values.
\item \code{severity} controls the exponential bias applied.
}
} }
\examples{ \examples{
random_mic(25) random_mic(25)
random_disk(25) random_disk(25)
random_sir(25) random_sir(25)
# add more skewedness, make more realistic by setting a bug and/or drug:
disks <- random_disk(100, severity = 2, mo = "Escherichia coli", ab = "CIP")
plot(disks)
# `plot()` and `ggplot2::autoplot()` allow for coloured bars if `mo` and `ab` are set
plot(disks, mo = "Escherichia coli", ab = "CIP", guideline = "CLSI 2025")
\donttest{ \donttest{
# make the random generation more realistic by setting a bug and/or drug:
random_mic(25, "Klebsiella pneumoniae") # range 0.0625-64 random_mic(25, "Klebsiella pneumoniae") # range 0.0625-64
random_mic(25, "Klebsiella pneumoniae", "meropenem") # range 0.0625-16 random_mic(25, "Klebsiella pneumoniae", "meropenem") # range 0.0625-16
random_mic(25, "Streptococcus pneumoniae", "meropenem") # range 0.0625-4 random_mic(25, "Streptococcus pneumoniae", "meropenem") # range 0.0625-4

View File

@ -41,7 +41,7 @@
--bs-success: var(--amr-green-dark) !important; --bs-success: var(--amr-green-dark) !important;
--bs-light: var(--amr-green-light) !important; --bs-light: var(--amr-green-light) !important;
/* --bs-light was this: #128f76a6; that's success with 60% alpha */ /* --bs-light was this: #128f76a6; that's bs-success with 60% alpha */
--bs-info: var(--amr-green-middle) !important; --bs-info: var(--amr-green-middle) !important;
--bs-link-color: var(--amr-green-dark) !important; --bs-link-color: var(--amr-green-dark) !important;
--bs-link-color-rgb: var(--amr-green-dark-rgb) !important; --bs-link-color-rgb: var(--amr-green-dark-rgb) !important;
@ -104,6 +104,16 @@ body.amr-for-python * {
.navbar .algolia-autocomplete .aa-dropdown-menu { .navbar .algolia-autocomplete .aa-dropdown-menu {
background-color: var(--amr-green-dark) !important; background-color: var(--amr-green-dark) !important;
} }
.version-main {
font-weight: bold;
color: var(--bs-navbar-brand-color);
}
.version-build {
font-weight: normal;
opacity: 0.75;
font-size: 0.85em;
}
input[type="search"] { input[type="search"] {
color: var(--bs-tertiary-bg) !important; color: var(--bs-tertiary-bg) !important;
background-color: var(--amr-green-light) !important; background-color: var(--amr-green-light) !important;
@ -149,6 +159,7 @@ this shows on top of every sidebar to the right
margin-top: 10px; margin-top: 10px;
border: 2px dashed var(--amr-green-dark); border: 2px dashed var(--amr-green-dark);
text-align: center; text-align: center;
background: var(--bs-body-bg);
} }
.amr-gpt-assistant * { .amr-gpt-assistant * {
width: 90%; width: 90%;

View File

@ -29,10 +29,22 @@
# ==================================================================== # # ==================================================================== #
*/ */
$(document).ready(function() { $(function () {
// add GPT assistant info // add GPT assistant info
$('aside').prepend('<div class="amr-gpt-assistant"><a target="_blank" href="https://chat.amr-for-r.org"><img src="https://amr-for-r.org/AMRforRGPT.svg"></a></div>'); $('aside').prepend('<div class="amr-gpt-assistant"><a target="_blank" href="https://chat.amr-for-r.org"><img src="https://amr-for-r.org/AMRforRGPT.svg"></a></div>');
// split version number in navbar into main version and build number
$('.nav-text').each(function () {
const $el = $(this);
const text = $.trim($el.text());
const lastDotIndex = text.lastIndexOf('.');
if (lastDotIndex > -1) {
const main = text.substring(0, lastDotIndex);
const build = text.substring(lastDotIndex);
$el.html(`<span class="version-main">${main}</span><span class="version-build">${build}</span>`);
}
});
// replace 'Developers' with 'Maintainers' on the main page, and "Contributors" on the Authors page // replace 'Developers' with 'Maintainers' on the main page, and "Contributors" on the Authors page
$(".developers h2").text("Maintainers"); $(".developers h2").text("Maintainers");
$(".template-citation-authors h1:nth(0)").text("Contributors and Citation"); $(".template-citation-authors h1:nth(0)").text("Contributors and Citation");

View File

@ -127,6 +127,24 @@ test_that("test-zzz.R", {
"type_sum" = "pillar", "type_sum" = "pillar",
# readxl # readxl
"read_excel" = "readxl", "read_excel" = "readxl",
# recipes
"add_step" = "recipes",
"bake" = "recipes",
"check_new_data" = "recipes",
"check_type" = "recipes",
"has_role" = "recipes",
"is_trained" = "recipes",
"prep" = "recipes",
"print_step" = "recipes",
"rand_id" = "recipes",
"recipe" = "recipes",
"recipes_eval_select" = "recipes",
"sel2char" = "recipes",
"step" = "recipes",
"step_center" = "recipes",
"tidy" = "recipes",
# rlang
"enquos" = "rlang",
# rmarkdown # rmarkdown
"html_vignette" = "rmarkdown", "html_vignette" = "rmarkdown",
# skimr # skimr

View File

@ -26,7 +26,14 @@ knitr::opts_chunk$set(
Antimicrobial resistance (AMR) is a global health crisis, and understanding resistance patterns is crucial for managing effective treatments. The `AMR` R package provides robust tools for analysing AMR data, including convenient antimicrobial selector functions like `aminoglycosides()` and `betalactams()`. Antimicrobial resistance (AMR) is a global health crisis, and understanding resistance patterns is crucial for managing effective treatments. The `AMR` R package provides robust tools for analysing AMR data, including convenient antimicrobial selector functions like `aminoglycosides()` and `betalactams()`.
In this post, we will explore how to use the `tidymodels` framework to predict resistance patterns in the `example_isolates` dataset in two examples. In this post, we will explore how to use the `tidymodels` framework to predict resistance patterns in the `example_isolates` dataset in two examples.
This post contains the following examples:
1. Using Antimicrobial Selectors
2. Predicting ESBL Presence Using Raw MICs
3. Predicting AMR Over Time
## Example 1: Using Antimicrobial Selectors ## Example 1: Using Antimicrobial Selectors
@ -208,10 +215,150 @@ This workflow is extensible to other antimicrobial classes and resistance patter
--- ---
## Example 2: Predicting ESBL Presence Using Raw MICs
## Example 2: Predicting AMR Over Time In this second example, we demonstrate how to use `<mic>` columns directly in `tidymodels` workflows using AMR-specific recipe steps. This includes a transformation to `log2` scale using `step_mic_log2()`, which prepares MIC values for use in classification models.
In this second example, we aim to predict antimicrobial resistance (AMR) trends over time using `tidymodels`. We will model resistance to three antibiotics (amoxicillin `AMX`, amoxicillin-clavulanic acid `AMC`, and ciprofloxacin `CIP`), based on historical data grouped by year and hospital ward. This approach and idea formed the basis for the publication [DOI: 10.3389/fmicb.2025.1582703](https://doi.org/10.3389/fmicb.2025.1582703) to model the presence of extended-spectrum beta-lactamases (ESBL).
### **Objective**
Our goal is to:
1. Use raw MIC values to predict whether a bacterial isolate produces ESBL.
2. Apply AMR-aware preprocessing in a `tidymodels` recipe.
3. Train a classification model and evaluate its predictive performance.
### **Data Preparation**
We use the `esbl_isolates` dataset that comes with the AMR package.
```{r}
# Load required libraries
library(AMR)
library(tidymodels)
# View the esbl_isolates data set
esbl_isolates
# Prepare a binary outcome and convert to ordered factor
data <- esbl_isolates %>%
mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE))
```
**Explanation:**
- `esbl_isolates`: Contains MIC test results and ESBL status for each isolate.
- `mutate(esbl = ...)`: Converts the target column to an ordered factor for classification.
### **Defining the Workflow**
#### 1. Preprocessing with a Recipe
We use our `step_mic_log2()` function to log2-transform MIC values, ensuring that MICs are numeric and properly scaled. All MIC predictors can easily and agnostically selected using the new `all_mic_predictors()`:
```{r}
# Split into training and testing sets
set.seed(123)
split <- initial_split(data)
training_data <- training(split)
testing_data <- testing(split)
# Define the recipe
mic_recipe <- recipe(esbl ~ ., data = training_data) %>%
remove_role(genus, old_role = "predictor") %>% # Remove non-informative variable
step_mic_log2(all_mic_predictors()) #%>% # Log2 transform all MIC predictors
# prep()
mic_recipe
```
**Explanation:**
- `remove_role()`: Removes irrelevant variables like genus.
- `step_mic_log2()`: Applies `log2(as.numeric(...))` to all MIC predictors in one go.
- `prep()`: Finalises the recipe based on training data.
#### 2. Specifying the Model
We use a simple logistic regression to model ESBL presence, though recent models such as xgboost ([link to `parsnip` manual](https://parsnip.tidymodels.org/reference/details_boost_tree_xgboost.html)) could be much more precise.
```{r}
# Define the model
model <- logistic_reg(mode = "classification") %>%
set_engine("glm")
model
```
**Explanation:**
- `logistic_reg()`: Specifies a binary classification model.
- `set_engine("glm")`: Uses the base R GLM engine.
#### 3. Building the Workflow
```{r}
# Create workflow
workflow_model <- workflow() %>%
add_recipe(mic_recipe) %>%
add_model(model)
workflow_model
```
### **Training and Evaluating the Model**
```{r}
# Fit the model
fitted <- fit(workflow_model, training_data)
# Generate predictions
predictions <- predict(fitted, testing_data) %>%
bind_cols(testing_data)
# Evaluate model performance
our_metrics <- metric_set(accuracy, kap, ppv, npv)
metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class)
metrics
```
**Explanation:**
- `fit()`: Trains the model on the processed training data.
- `predict()`: Produces predictions for unseen test data.
- `metric_set()`: Allows evaluating multiple classification metrics.
It appears we can predict ESBL gene presence with a positive predictive value (PPV) of `r round(metrics$.estimate[3], 3) * 100`% and a negative predictive value (NPV) of `r round(metrics$.estimate[4], 3) * 100` using a simplistic logistic regression model.
### **Visualising Predictions**
We can visualise predictions by comparing predicted and actual ESBL status.
```{r}
library(ggplot2)
ggplot(predictions, aes(x = esbl, fill = .pred_class)) +
geom_bar(position = "stack") +
labs(title = "Predicted vs Actual ESBL Status",
x = "Actual ESBL",
y = "Count") +
theme_minimal()
```
### **Conclusion**
In this example, we showcased how the new `AMR`-specific recipe steps simplify working with `<mic>` columns in `tidymodels`. The `step_mic_log2()` transformation converts ordered MICs to log2-transformed numerics, improving compatibility with classification models.
This pipeline enables realistic, reproducible, and interpretable modelling of antimicrobial resistance data.
---
## Example 3: Predicting AMR Over Time
In this third example, we aim to predict antimicrobial resistance (AMR) trends over time using `tidymodels`. We will model resistance to three antibiotics (amoxicillin `AMX`, amoxicillin-clavulanic acid `AMC`, and ciprofloxacin `CIP`), based on historical data grouped by year and hospital ward.
### **Objective** ### **Objective**

View File

@ -28,7 +28,7 @@ Note: to keep the package size as small as possible, we only include this vignet
The `AMR` package is a peer-reviewed, [free and open-source](https://amr-for-r.org/#copyright) R package with [zero dependencies](https://en.wikipedia.org/wiki/Dependency_hell) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. **Our aim is to provide a standard** for clean and reproducible AMR data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting. We are a team of [many different researchers](https://amr-for-r.org/authors.html) from around the globe to make this a successful and durable project! The `AMR` package is a peer-reviewed, [free and open-source](https://amr-for-r.org/#copyright) R package with [zero dependencies](https://en.wikipedia.org/wiki/Dependency_hell) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. **Our aim is to provide a standard** for clean and reproducible AMR data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting. We are a team of [many different researchers](https://amr-for-r.org/authors.html) from around the globe to make this a successful and durable project!
This work was published in the Journal of Statistical Software (Volume 104(3); \doi{10.18637/jss.v104.i03}) and formed the basis of two PhD theses (\doi{10.33612/diss.177417131} and \doi{10.33612/diss.192486375}). This work was published in the Journal of Statistical Software (Volume 104(3); [DOI 10.18637/jss.v104.i03](https://doi.org/10.18637/jss.v104.i03)) and formed the basis of two PhD theses ([DOI 10.33612/diss.177417131](https://doi.org/10.33612/diss.177417131) and [DOI 10.33612/diss.192486375](https://doi.org/10.33612/diss.192486375)).
After installing this package, R knows [**`r AMR:::format_included_data_number(AMR::microorganisms)` distinct microbial species**](https://amr-for-r.org/reference/microorganisms.html) (updated June 2024) and all [**`r AMR:::format_included_data_number(NROW(AMR::antimicrobials) + NROW(AMR::antivirals))` antimicrobial and antiviral drugs**](https://amr-for-r.org/reference/antimicrobials.html) by name and code (including ATC, EARS-Net, ASIARS-Net, PubChem, LOINC and SNOMED CT), and knows all about valid SIR and MIC values. The integral clinical breakpoint guidelines from CLSI `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, grepl("CLSI", guideline))$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, grepl("CLSI", guideline))$guideline)))` and EUCAST `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, grepl("EUCAST", guideline))$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, grepl("EUCAST", guideline))$guideline)))` are included, even with epidemiological cut-off (ECOFF) values. It supports and can read any data format, including WHONET data. This package works on Windows, macOS and Linux with all versions of R since R-3.0 (April 2013). **It was designed to work in any setting, including those with very limited resources**. It was created for both routine data analysis and academic research at the Faculty of Medical Sciences of the [University of Groningen](https://www.rug.nl) and the [University Medical Center Groningen](https://www.umcg.nl). After installing this package, R knows [**`r AMR:::format_included_data_number(AMR::microorganisms)` distinct microbial species**](https://amr-for-r.org/reference/microorganisms.html) (updated June 2024) and all [**`r AMR:::format_included_data_number(NROW(AMR::antimicrobials) + NROW(AMR::antivirals))` antimicrobial and antiviral drugs**](https://amr-for-r.org/reference/antimicrobials.html) by name and code (including ATC, EARS-Net, ASIARS-Net, PubChem, LOINC and SNOMED CT), and knows all about valid SIR and MIC values. The integral clinical breakpoint guidelines from CLSI `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, grepl("CLSI", guideline))$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, grepl("CLSI", guideline))$guideline)))` and EUCAST `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, grepl("EUCAST", guideline))$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, grepl("EUCAST", guideline))$guideline)))` are included, even with epidemiological cut-off (ECOFF) values. It supports and can read any data format, including WHONET data. This package works on Windows, macOS and Linux with all versions of R since R-3.0 (April 2013). **It was designed to work in any setting, including those with very limited resources**. It was created for both routine data analysis and academic research at the Faculty of Medical Sciences of the [University of Groningen](https://www.rug.nl) and the [University Medical Center Groningen](https://www.umcg.nl).