1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-09 13:42:04 +02:00

(v2.1.1.9260) fix antibiogram

This commit is contained in:
2025-04-29 16:15:18 +02:00
parent 6819473457
commit faa9ae0d85
31 changed files with 195 additions and 2283 deletions

View File

@ -32,11 +32,11 @@
#' @description
#' Welcome to the `AMR` package.
#'
#' The `AMR` package is a peer-reviewed, [free and open-source](https://amr-for-r.org/#copyright) R package with [zero dependencies](https://en.wikipedia.org/wiki/Dependency_hell) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. **Our aim is to provide a standard** for clean and reproducible AMR data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting. [Many different researchers](https://amr-for-r.org/authors.html) from around the globe are continually helping us to make this a successful and durable project!
#' The `AMR` package is a peer-reviewed, [free and open-source](https://amr-for-r.org/#copyright) R package with [zero dependencies](https://en.wikipedia.org/wiki/Dependency_hell) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. **Our aim is to provide a standard** for clean and reproducible AMR data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting. We are a team of [many different researchers](https://amr-for-r.org/authors.html) from around the globe to make this a successful and durable project!
#'
#' This work was published in the Journal of Statistical Software (Volume 104(3); \doi{10.18637/jss.v104.i03}) and formed the basis of two PhD theses (\doi{10.33612/diss.177417131} and \doi{10.33612/diss.192486375}).
#'
#' After installing this package, R knows [**`r format_included_data_number(AMR::microorganisms)` microorganisms**](https://amr-for-r.org/reference/microorganisms.html) (updated `r format(TAXONOMY_VERSION$GBIF$accessed_date, "%B %Y")`) and all [**`r format_included_data_number(nrow(AMR::antimicrobials) + nrow(AMR::antivirals))` antibiotic, antimycotic and antiviral drugs**](https://amr-for-r.org/reference/antimicrobials.html) by name and code (including ATC, EARS-Net, ASIARS-Net, PubChem, LOINC and SNOMED CT), and knows all about valid SIR and MIC values. The integral clinical breakpoint guidelines from CLSI and EUCAST are included, even with epidemiological cut-off (ECOFF) values. It supports and can read any data format, including WHONET data. This package works on Windows, macOS and Linux with all versions of R since R-3.0 (April 2013). **It was designed to work in any setting, including those with very limited resources**. It was created for both routine data analysis and academic research at the Faculty of Medical Sciences of the public [University of Groningen](https://www.rug.nl), in collaboration with non-profit organisations [Certe Medical Diagnostics and Advice Foundation](https://www.certe.nl) and [University Medical Center Groningen](https://www.umcg.nl).
#' After installing this package, R knows [**`r AMR:::format_included_data_number(AMR::microorganisms)` distinct microbial species**](https://amr-for-r.org/reference/microorganisms.html) (updated June 2024) and all [**`r AMR:::format_included_data_number(NROW(AMR::antimicrobials) + NROW(AMR::antivirals))` antimicrobial and antiviral drugs**](https://amr-for-r.org/reference/antimicrobials.html) by name and code (including ATC, EARS-Net, ASIARS-Net, PubChem, LOINC and SNOMED CT), and knows all about valid SIR and MIC values. The integral clinical breakpoint guidelines from CLSI `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, grepl("CLSI", guideline))$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, grepl("CLSI", guideline))$guideline)))` and EUCAST `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, grepl("EUCAST", guideline))$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, grepl("EUCAST", guideline))$guideline)))` are included, even with epidemiological cut-off (ECOFF) values. It supports and can read any data format, including WHONET data. This package works on Windows, macOS and Linux with all versions of R since R-3.0 (April 2013). **It was designed to work in any setting, including those with very limited resources**. It was created for both routine data analysis and academic research at the Faculty of Medical Sciences of the [University of Groningen](https://www.rug.nl) and the [University Medical Center Groningen](https://www.umcg.nl).
#'
#' The `AMR` package is available in `r vector_and(vapply(FUN.VALUE = character(1), LANGUAGES_SUPPORTED_NAMES, function(x) x$exonym), quotes = FALSE, sort = FALSE)`. Antimicrobial drug (group) names and colloquial microorganism names are provided in these languages.
#' @section Download Our Reference Data:
@ -44,7 +44,7 @@
#'
#' For maximum compatibility, we also provide machine-readable, tab-separated plain text files suitable for use in any software, including laboratory information systems.
#'
#' Visit [our website for direct download links](https://amr-for-r.org/articles/datasets.html), or explore the actual files in [our GitHub repository](https://github.com/msberends/AMR/tree/main/data-raw).
#' Visit [our website for direct download links](https://amr-for-r.org/articles/datasets.html), or explore the actual files in [our GitHub repository](https://github.com/msberends/AMR/tree/main/data-raw/datasets).
#' @source
#' To cite AMR in publications use:
#'

View File

@ -538,9 +538,9 @@ word_wrap <- function(...,
txt = parts[parts %in% c("antimicrobials", "microorganisms", "microorganisms.codes", "microorganisms.groups")]
)
# text starting with `?` must also lead to the help page
parts[parts %like% "^[?]"] <- font_url(
url = paste0("ide:help:AMR::", gsub("?", "", parts[parts %like% "^[?]"], fixed = TRUE)),
txt = parts[parts %like% "^[?]"]
parts[parts %like% "^[?].+"] <- font_url(
url = paste0("ide:help:AMR::", gsub("?", "", parts[parts %like% "^[?].+"], fixed = TRUE)),
txt = parts[parts %like% "^[?].+"]
)
msg <- paste0(parts, collapse = "`")
}

View File

@ -682,9 +682,8 @@ antibiogram.default <- function(x,
wisca_parameters <- data.frame()
# WISCA START
if (wisca == TRUE) {
# WISCA ----
if (isTRUE(has_syndromic_group)) {
colnames(out)[1] <- "syndromic_group"
out_wisca <- out %pm>%
@ -708,9 +707,6 @@ antibiogram.default <- function(x,
warning_("Number of tested isolates should exceed ", minimum, " for each regimen (and group). WISCA coverage estimates might be inaccurate.", call = FALSE)
}
out_wisca$p_susceptible <- out_wisca$n_susceptible / out_wisca$n_tested
out_wisca$p_susceptible[is.nan(out_wisca$p_susceptible)] <- 0
if (isTRUE(has_syndromic_group)) {
out$group <- paste(out$syndromic_group, out$ab)
out_wisca$group <- paste(out_wisca$syndromic_group, out_wisca$ab)
@ -719,31 +715,6 @@ antibiogram.default <- function(x,
out_wisca$group <- out_wisca$ab
}
# create the WISCA parameters, including our priors/posteriors
out$gamma_posterior <- NA_real_
out$beta_posterior_1 <- NA_real_
out$beta_posterior_2 <- NA_real_
for (i in seq_len(NROW(out))) {
out_current <- out[i, , drop = FALSE]
## calculate priors ----
# pathogen incidence (Dirichlet distribution)
gamma_prior <- rep(1, length(unique(out_current$mo))) # Dirichlet prior
gamma_posterior <- gamma_prior + out_current$n_total # Posterior parameters
# regimen susceptibility (Beta distribution)
beta_prior <- rep(1, length(unique(out_current$mo))) # Beta prior
r <- out_current$n_susceptible
n <- out_current$n_tested
beta_posterior_1 <- beta_prior + r # Posterior alpha
beta_posterior_2 <- beta_prior + (n - r) # Posterior beta
out$gamma_posterior[i] <- gamma_posterior
out$beta_posterior_1[i] <- beta_posterior_1
out$beta_posterior_2[i] <- beta_posterior_2
}
wisca_parameters <- out
progress <- progress_ticker(
@ -754,42 +725,28 @@ antibiogram.default <- function(x,
)
on.exit(close(progress))
# run WISCA
# run WISCA per group
for (group in unique(wisca_parameters$group)) {
params_current <- wisca_parameters[which(wisca_parameters$group == group), , drop = FALSE]
params_current <- wisca_parameters[wisca_parameters$group == group, , drop = FALSE]
if (sum(params_current$n_tested, na.rm = TRUE) == 0) {
next
}
# Monte Carlo simulation
coverage_simulations <- replicate(simulations, {
progress$tick()
# prepare priors
priors_current <- create_wisca_priors(params_current)
# simulate pathogen incidence
# = Dirichlet (Gamma) parameters
random_incidence <- stats::runif(n = 1, min = 0, max = 1)
simulated_incidence <- stats::qgamma(
p = random_incidence,
shape = params_current$gamma_posterior,
scale = 1
)
# Monte Carlo simulations
coverage_simulations <- vapply(
FUN.VALUE = double(1),
seq_len(simulations), function(i) {
progress$tick()
simulate_coverage(priors_current)
}
)
# normalise
simulated_incidence <- simulated_incidence / sum(simulated_incidence, na.rm = TRUE)
# simulate susceptibility
# = Beta parameters
random_susceptibity <- stats::runif(n = 1, min = 0, max = 1)
simulated_susceptibility <- stats::qbeta(
p = random_susceptibity,
shape1 = params_current$beta_posterior_1,
shape2 = params_current$beta_posterior_2
)
sum(simulated_incidence * simulated_susceptibility, na.rm = TRUE)
})
# calculate coverage statistics
# summarise results
coverage_mean <- mean(coverage_simulations)
if (interval_side == "two-tailed") {
probs <- c((1 - conf_interval) / 2, 1 - (1 - conf_interval) / 2)
} else if (interval_side == "left") {
@ -797,17 +754,20 @@ antibiogram.default <- function(x,
} else if (interval_side == "right") {
probs <- c(1 - conf_interval, 1)
}
coverage_ci <- unname(stats::quantile(coverage_simulations, probs = probs))
out_wisca$coverage[which(out_wisca$group == group)] <- coverage_mean
out_wisca$lower_ci[which(out_wisca$group == group)] <- coverage_ci[1]
out_wisca$upper_ci[which(out_wisca$group == group)] <- coverage_ci[2]
out_wisca$coverage[out_wisca$group == group] <- coverage_mean
out_wisca$lower_ci[out_wisca$group == group] <- coverage_ci[1]
out_wisca$upper_ci[out_wisca$group == group] <- coverage_ci[2]
}
# remove progress bar from console
close(progress)
# prepare for definitive output
# final output preparation
out <- out_wisca
wisca_parameters <- wisca_parameters[, colnames(wisca_parameters)[!colnames(wisca_parameters) %in% c(levels(NA_sir_), "lower_ci", "upper_ci", "group")], drop = FALSE]
if (isTRUE(has_syndromic_group)) {
long_numeric <- out_wisca %pm>%
pm_ungroup() %pm>%
@ -1346,3 +1306,56 @@ knit_print.antibiogram <- function(x, italicise = TRUE, na = getOption("knitr.ka
out <- paste(c("", "", knitr::kable(x, ..., output = FALSE)), collapse = "\n")
knitr::asis_output(out)
}
create_wisca_priors <- function(data) {
pathogens <- unique(data$mo)
n_pathogens <- length(pathogens)
# Dirichlet prior (gamma parameters)
gamma_prior <- rep(1, times = n_pathogens)
multinomial_obs <- data$n_total
gamma_posterior <- gamma_prior + multinomial_obs
# beta priors
beta_prior_alpha <- rep(1, times = n_pathogens)
beta_prior_beta <- rep(1, times = n_pathogens)
r <- data$n_susceptible
n <- data$n_tested
diff_nr <- n - r
beta_posterior_1 <- beta_prior_alpha + r
beta_posterior_2 <- beta_prior_beta + diff_nr
list(
gamma_posterior = gamma_posterior,
beta_posterior_1 = beta_posterior_1,
beta_posterior_2 = beta_posterior_2
)
}
simulate_coverage <- function(params) {
n_pathogens <- length(params$gamma_posterior)
# random draws per pathogen
random_incidence <- runif(n = n_pathogens)
random_susceptibility <- runif(n = n_pathogens)
simulated_incidence <- stats::qgamma(
p = random_incidence,
shape = params$gamma_posterior,
scale = 1
)
# normalise incidence
simulated_incidence <- simulated_incidence / sum(simulated_incidence, na.rm = TRUE)
simulated_susceptibility <- stats::qbeta(
p = random_susceptibility,
shape1 = params$beta_posterior_1,
shape2 = params$beta_posterior_2
)
# weighted coverage
sum(simulated_incidence * simulated_susceptibility, na.rm = TRUE)
}

54
R/sir.R
View File

@ -729,7 +729,7 @@ as.sir.data.frame <- function(x,
# -- MO
col_mo.bak <- col_mo
if (is.null(col_mo)) {
col_mo <- search_type_in_df(x = x, type = "mo", info = FALSE)
col_mo <- search_type_in_df(x = x, type = "mo", info = info)
}
# -- host
@ -742,7 +742,7 @@ as.sir.data.frame <- function(x,
}
if (breakpoint_type == "animal") {
if (is.null(host)) {
host <- search_type_in_df(x = x, type = "host", add_col_prefix = FALSE)
host <- search_type_in_df(x = x, type = "host", add_col_prefix = FALSE, info = info)
} else if (length(host) == 1 && as.character(host) %in% colnames(x)) {
host <- x[[as.character(host)]]
}
@ -753,7 +753,7 @@ as.sir.data.frame <- function(x,
# -- UTIs
col_uti <- uti
if (is.null(col_uti)) {
col_uti <- search_type_in_df(x = x, type = "uti", add_col_prefix = FALSE)
col_uti <- search_type_in_df(x = x, type = "uti", add_col_prefix = FALSE, info = info)
}
if (!is.null(col_uti)) {
if (is.logical(col_uti)) {
@ -773,7 +773,7 @@ as.sir.data.frame <- function(x,
}
} else {
# col_uti is still NULL - look for specimen column and make logicals of the urines
col_specimen <- suppressMessages(search_type_in_df(x = x, type = "specimen"))
col_specimen <- suppressMessages(search_type_in_df(x = x, type = "specimen", info = info))
if (!is.null(col_specimen)) {
uti <- x[, col_specimen, drop = TRUE] %like% "urin"
values <- sort(unique(x[uti, col_specimen, drop = TRUE]))
@ -846,7 +846,7 @@ as.sir.data.frame <- function(x,
stop_if(is.null(col_mo), "`col_mo` must be set")
# if not null, we already found it, now find again so a message will show
if (is.null(col_mo.bak)) {
col_mo <- search_type_in_df(x = x, type = "mo")
col_mo <- search_type_in_df(x = x, type = "mo", info = info)
}
x_mo <- as.mo(x[, col_mo, drop = TRUE], info = info)
}
@ -854,10 +854,17 @@ as.sir.data.frame <- function(x,
# set up parallel computing
n_cores <- get_n_cores(max_cores = max_cores)
n_cores <- min(n_cores, length(ab_cols)) # never more cores than variables required
if (isTRUE(parallel) && .Platform$OS.type != "windows" && getRversion() < "4.0.0") {
n_cores <- 1
if (isTRUE(info)) {
warning("Parallel computing is not available on unix in R < 4.0", call. = FALSE)
if (isTRUE(parallel) && (.Platform$OS.type == "windows" || getRversion() < "4.0.0")) {
cl <- tryCatch(parallel::makeCluster(n_cores, type = "PSOCK"),
error = function(e) {
if (isTRUE(info)) {
message_("Could not create parallel cluster, using single-core computation. Error message: ", e$message, add_fn = font_red)
}
return(NULL)
}
)
if (is.null(cl)) {
n_cores <- 1
}
}
@ -959,10 +966,10 @@ as.sir.data.frame <- function(x,
if (isTRUE(parallel) && n_cores > 1 && length(ab_cols) > 1) {
if (isTRUE(info)) {
message()
message_("Running in parallel mode using ", n_cores, " out of ", get_n_cores(Inf), " cores, on columns ", vector_and(font_bold(ab_cols, collapse = NULL), quotes = "'", sort = FALSE), "...", as_note = FALSE, appendLF = FALSE, add_fn = font_red)
message_("Running in parallel mode using ", n_cores, " out of ", get_n_cores(Inf), " cores, on columns ", vector_and(font_bold(ab_cols, collapse = NULL), quotes = "'", sort = FALSE), "...", as_note = FALSE, appendLF = FALSE)
}
if (.Platform$OS.type == "windows" || getRversion() < "4.0.0") {
cl <- parallel::makeCluster(n_cores, type = "PSOCK")
# `cl` has been created in the part above before the `run_as_sir_column` function
on.exit(parallel::stopCluster(cl), add = TRUE)
parallel::clusterExport(cl, varlist = c(
"x", "x.bak", "x_mo", "ab_cols", "types",
@ -974,12 +981,13 @@ as.sir.data.frame <- function(x,
), envir = environment())
result_list <- parallel::parLapply(cl, seq_along(ab_cols), run_as_sir_column)
} else {
# R>=4.0 on unix
result_list <- parallel::mclapply(seq_along(ab_cols), run_as_sir_column, mc.cores = n_cores)
}
if (isTRUE(info)) {
message_(" Done.", appendLF = TRUE, as_note = FALSE, add_fn = font_red)
message_(font_green_bg(" DONE "), as_note = FALSE)
message()
message_("Run `sir_interpretation_history()` to retrieve a logbook with all the details of the breakpoint interpretations.", add_fn = font_green)
message_("Run `sir_interpretation_history()` to retrieve a logbook with all details of the breakpoint interpretations.", add_fn = font_green)
}
} else {
# sequential mode (non-parallel)
@ -1116,7 +1124,7 @@ as_sir_method <- function(method_short,
if (isTRUE(info) && message_not_thrown_before("as.sir", "sir_interpretation_history")) {
message()
message_("Run `sir_interpretation_history()` afterwards to retrieve a logbook with all the details of the breakpoint interpretations.\n\n", add_fn = font_green)
message_("Run `sir_interpretation_history()` afterwards to retrieve a logbook with all details of the breakpoint interpretations.\n\n", add_fn = font_green)
}
current_df <- tryCatch(get_current_data(NA, 0), error = function(e) NULL)
@ -1200,7 +1208,7 @@ as_sir_method <- function(method_short,
mo <- NULL
try(
{
mo <- suppressMessages(search_type_in_df(df, "mo", add_col_prefix = FALSE))
mo <- suppressMessages(search_type_in_df(df, "mo", add_col_prefix = FALSE, info = info))
},
silent = TRUE
)
@ -1236,7 +1244,7 @@ as_sir_method <- function(method_short,
uti <- NULL
try(
{
uti <- suppressMessages(search_type_in_df(df, "uti", add_col_prefix = FALSE))
uti <- suppressMessages(search_type_in_df(df, "uti", add_col_prefix = FALSE, info = info))
},
silent = TRUE
)
@ -1441,14 +1449,7 @@ as_sir_method <- function(method_short,
if (nrow(breakpoints) == 0) {
# apparently no breakpoints found
if (isTRUE(info)) {
message(
paste0(font_rose_bg(" WARNING "), "\n"),
font_black(paste0(
" ", AMR_env$bullet_icon, " No ", method_coerced, " breakpoints available for ",
suppressMessages(suppressWarnings(ab_name(unique(ab_coerced), language = NULL, tolower = TRUE, info = info))),
" (", unique(ab_coerced), ")."
), collapse = "\n")
)
message(font_grey_bg(font_black(" NO BREAKPOINTS ")))
}
load_mo_uncertainties(metadata_mo)
@ -1829,12 +1830,13 @@ as_sir_method <- function(method_short,
message(font_yellow_bg(" NOTE "))
}
notes <- unique(notes)
if (isTRUE(verbose) || length(notes) == 1 || NROW(AMR_env$sir_interpretation_history) == 0) {
# if (isTRUE(verbose) || length(notes) == 1 || NROW(AMR_env$sir_interpretation_history) == 0) {
if (isTRUE(verbose)) {
for (i in seq_along(notes)) {
message(word_wrap(" ", AMR_env$bullet_icon, " ", notes[i], add_fn = font_black))
}
} else {
message(word_wrap(" ", AMR_env$bullet_icon, " There were multiple notes. Print or View `sir_interpretation_history()` to examine them, or use `as.sir(..., verbose = TRUE)` next time to directly print them here.", add_fn = font_black))
# message(word_wrap(" ", AMR_env$bullet_icon, " There were multiple notes. Print or View `sir_interpretation_history()` to examine them, or use `as.sir(..., verbose = TRUE)` next time to directly print them here.", add_fn = font_black))
}
} else {
message(font_green_bg(" OK "))