1
0
mirror of https://github.com/msberends/AMR.git synced 2026-06-24 06:16:24 +02:00

(v3.0.1.9059) Update taxonomy of microorganisms

This commit is contained in:
Matthijs Berends
2026-06-23 01:38:13 +02:00
committed by GitHub
parent 0af3f84655
commit 3f9f931777
123 changed files with 121928 additions and 94162 deletions

View File

@@ -45,7 +45,7 @@
#' For maximum compatibility, we also provide machine-readable, tab-separated plain text files suitable for use in any software, including laboratory information systems.
#'
#' Visit [our website for direct download links](https://amr-for-r.org/articles/datasets.html), or explore the actual files in [our GitHub repository](https://github.com/msberends/AMR/tree/main/data-raw/datasets).
#' @source
#' @references
#' To cite AMR in publications use:
#'
#' Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C (2022). "AMR: An R Package for Working with Antimicrobial Resistance Data." _Journal of Statistical Software_, *104*(3), 1-31. \doi{10.18637/jss.v104.i03}

View File

@@ -104,26 +104,27 @@ EUCAST_VERSION_EXPECTED_PHENOTYPES <- list(
TAXONOMY_VERSION <- list(
GBIF = list(
name = "Global Biodiversity Information Facility (GBIF)",
accessed_date = as.Date("2024-06-24"),
citation = "GBIF Secretariat (2023). GBIF Backbone Taxonomy. Checklist dataset \\doi{10.15468/39omei}.",
name = "Global Biodiversity Information Facility (GBIF), via Catalogue of Life (COL)",
accessed_date = as.Date("2026-05-07"),
# take the citation from https://www.gbif.org/dataset/7ddf754f-d193-4cc9-b351-99906754a03b#citation
citation = "Banki, O. *et al.* (2026). Catalogue of Life (2026-04-18 XR). Catalogue of Life Foundation, Amsterdam, Netherlands. \\doi{10.48580/dgxjw}.",
url = "https://www.gbif.org"
),
LPSN = list(
name = "List of Prokaryotic names with Standing in Nomenclature (LPSN)",
accessed_date = as.Date("2024-06-24"),
citation = "Parte, AC *et al.* (2020). **List of Prokaryotic names with Standing in Nomenclature (LPSN) moves to the DSMZ.** International Journal of Systematic and Evolutionary Microbiology, 70, 5607-5612; \\doi{10.1099/ijsem.0.004332}.",
accessed_date = as.Date("2026-05-07"),
citation = "Freese, HM *et al.* (2026). **TYGS and LPSN in 2025: a Global Core Biodata Resource for genome-based classification and nomenclature of prokaryotes within DSMZ Digital Diversity.** Nucleic Acids Research, 54, D884\u2013D891; \\doi{10.1093/nar/gkaf1110}.",
url = "https://lpsn.dsmz.de"
),
MycoBank = list(
name = "MycoBank",
accessed_date = as.Date("2024-06-24"),
accessed_date = as.Date("2026-05-07"),
citation = "Vincent, R *et al* (2013). **MycoBank gearing up for new horizons.** IMA Fungus, 4(2), 371-9; \\doi{10.5598/imafungus.2013.04.02.16}.",
url = "https://www.mycobank.org"
),
BacDive = list(
name = "BacDive",
accessed_date = as.Date("2024-07-16"),
accessed_date = as.Date("2026-05-07"),
citation = "Reimer, LC *et al.* (2022). ***BacDive* in 2022: the knowledge base for standardized bacterial and archaeal data.** Nucleic Acids Res., 50(D1):D741-D74; \\doi{10.1093/nar/gkab961}.",
url = "https://bacdive.dsmz.de"
),
@@ -148,10 +149,13 @@ TAXONOMY_VERSION <- list(
)
globalVariables(c(
".coverage",
".GenericCallEnv",
".lower",
".mo",
".rowid",
".syndromic_group",
".upper",
"ab",
"ab_txt",
"affect_ab_name",
@@ -187,6 +191,7 @@ globalVariables(c(
"hjust",
"host_index",
"host_match",
"incidence",
"input",
"input_given",
"intrinsic_resistant",
@@ -214,6 +219,7 @@ globalVariables(c(
"old",
"old_name",
"p_susceptible",
"pathogen",
"pattern",
"R",
"rank_index",

View File

@@ -489,7 +489,11 @@ cli_to_plain <- function(msg, envir = parent.frame()) {
if (length(m) >= 2L) m[2L] else paste0("?", resolve(c))
})
msg <- apply_sub(msg, "\\{\\.url (\\{[^}]+\\}|[^}]+)\\}", function(c) resolve(c))
msg <- apply_sub(msg, "\\{\\.href ([^}]+)\\}", function(c) strsplit(resolve(c), " ", fixed = TRUE)[[1L]][1L])
msg <- apply_sub(msg, "\\{\\.href ([^}]+)\\}", function(c) {
# Handle [display text](url) markdown link format: extract just the URL
m <- regmatches(c, regexec("^\\[.*\\]\\(([^)]+)\\)$", c))[[1L]]
if (length(m) >= 2L) m[2L] else resolve(c)
})
# bare {variable} or {expression} -> evaluate in caller's environment
while (grepl("\\{[^{}]+\\}", msg)) {
@@ -551,7 +555,7 @@ word_wrap <- function(...,
indentation <- 0L + extra_indent
}
if (indentation > 0L) {
wrapped <- gsub("\n", paste0("\n", strrep(" ", indentation)), wrapped, fixed = TRUE)
wrapped <- gsub("\n", paste0("\n", strrep("\u00a0", indentation)), wrapped, fixed = TRUE)
}
gsub("(\n| )+$", "", wrapped)
}
@@ -583,13 +587,27 @@ simplify_help_markup <- function(msg) {
message_ <- function(...,
appendLF = TRUE,
as_note = TRUE) {
as_note = TRUE,
as_check = FALSE,
extra_indent = 0,
with_bullet = FALSE) {
msg <- paste0(c(...), collapse = "")
if (with_bullet == TRUE) {
as_note <- FALSE
msg <- paste0(AMR_env$bullet_icon, "\u00a0", msg)
}
if (extra_indent > 0) {
msg <- paste0(strrep("\u00a0", extra_indent), msg)
}
if (has_cli_rlang()) {
msg <- paste0(c(...), collapse = "")
# prevent errors with single opening curly brackets, we don't evaluate cli's/glue's {} in AMR anyway
msg <- gsub("\\{(?!\\.)", "", msg, perl = TRUE)
if (!cli::ansi_has_hyperlink_support()) {
msg <- simplify_help_markup(msg)
}
if (isTRUE(as_note)) {
if (isTRUE(as_check)) {
cli::cli_inform(c("v" = msg), .envir = parent.frame())
} else if (isTRUE(as_note)) {
cli::cli_inform(c("i" = msg), .envir = parent.frame())
} else if (isTRUE(appendLF)) {
cli::cli_inform(msg, .envir = parent.frame())
@@ -598,22 +616,28 @@ message_ <- function(...,
cat(format_inline_(msg), file = stderr())
}
} else {
plain_msg <- cli_to_plain(paste0(c(...), collapse = ""), envir = parent.frame())
plain_msg <- cli_to_plain(msg, envir = parent.frame())
message(word_wrap(plain_msg, as_note = as_note), appendLF = appendLF)
}
}
warning_ <- function(...,
immediate = FALSE,
call = FALSE) {
call = FALSE,
extra_indent = 0) {
msg <- paste0(c(...), collapse = "")
if (extra_indent > 0) {
msg <- paste0(strrep("\u00a0", extra_indent), msg)
}
if (has_cli_rlang()) {
msg <- paste0(c(...), collapse = "")
# prevent errors with single opening curly brackets, we don't evaluate cli's/glue's {} in AMR anyway
msg <- gsub("\\{(?!\\.)", "", msg, perl = TRUE)
if (!cli::ansi_has_hyperlink_support()) {
msg <- simplify_help_markup(msg)
}
cli::cli_warn(msg, .envir = parent.frame())
} else {
plain_msg <- cli_to_plain(paste0(c(...), collapse = ""), envir = parent.frame())
plain_msg <- cli_to_plain(msg, envir = parent.frame())
warning(trimws2(word_wrap(plain_msg, as_note = FALSE)), immediate. = immediate, call. = call)
}
}
@@ -621,8 +645,15 @@ warning_ <- function(...,
# this alternative to the stop() function:
# - adds the function name where the error was thrown (plain-text fallback)
# - wraps text to never break lines within words (plain-text fallback)
stop_ <- function(..., call = TRUE) {
stop_ <- function(...,
call = TRUE,
extra_indent = 0) {
msg <- paste0(c(...), collapse = "")
if (extra_indent > 0) {
msg <- paste0(strrep("\u00a0", extra_indent), msg)
}
# prevent errors with single opening curly brackets, we don't evaluate cli's/glue's {} in AMR anyway
msg <- gsub("\\{(?!\\.)", "", msg, perl = TRUE)
if (!cli::ansi_has_hyperlink_support()) {
msg <- simplify_help_markup(msg)
}
@@ -727,7 +758,7 @@ documentation_date <- function(d) {
suffix[day %in% c(1, 21, 31)] <- "st"
suffix[day %in% c(2, 22)] <- "nd"
suffix[day %in% c(3, 23)] <- "rd"
paste0(month.name[as.integer(format(d, "%m"))], " ", day, suffix, ", ", format(d, "%Y"))
paste0(day, suffix, " of ", month.name[as.integer(format(d, "%m"))], ", ", format(d, "%Y"))
}
format_included_data_number <- function(data) {
@@ -1635,14 +1666,14 @@ add_MO_lookup_to_AMR_env <- function() {
if (is.null(AMR_env$MO_lookup)) {
MO_lookup <- AMR::microorganisms
MO_lookup$kingdom_index <- NA_real_
MO_lookup[which(MO_lookup$kingdom == "Bacteria" | as.character(MO_lookup$mo) == "UNKNOWN"), "kingdom_index"] <- 1
MO_lookup[which(MO_lookup$kingdom == "Fungi"), "kingdom_index"] <- 1.25
MO_lookup[which(MO_lookup$kingdom == "Protozoa"), "kingdom_index"] <- 1.5
MO_lookup[which(MO_lookup$kingdom == "Chromista"), "kingdom_index"] <- 1.75
MO_lookup[which(MO_lookup$kingdom == "Archaea"), "kingdom_index"] <- 2
MO_lookup$domain_index <- NA_real_
MO_lookup[which(MO_lookup$domain == "Bacteria" | as.character(MO_lookup$mo) == "UNKNOWN"), "domain_index"] <- 1
MO_lookup[which(MO_lookup$domain == "Fungi"), "domain_index"] <- 1.25
MO_lookup[which(MO_lookup$domain == "Protozoa"), "domain_index"] <- 1.5
MO_lookup[which(MO_lookup$domain == "Chromista"), "domain_index"] <- 1.75
MO_lookup[which(MO_lookup$domain == "Archaea"), "domain_index"] <- 2
# all the rest
MO_lookup[which(is.na(MO_lookup$kingdom_index)), "kingdom_index"] <- 3
MO_lookup[which(is.na(MO_lookup$domain_index)), "domain_index"] <- 3
# the fullname lowercase, important for the internal algorithms in as.mo()
MO_lookup$fullname_lower <- tolower(trimws2(paste(

2
R/ab.R
View File

@@ -54,7 +54,7 @@
#' @section Source:
#' World Health Organization (WHO) Collaborating Centre for Drug Statistics Methodology: \url{https://atcddd.fhi.no/atc_ddd_index/}
#'
#' European Commission Public Health PHARMACEUTICALS - COMMUNITY REGISTER: \url{https://health.ec.europa.eu/documents/community-register/html/reg_hum_atc.htm}
#' European Commission Public Health PHARMACEUTICALS - COMMUNITY REGISTER: \url{https://health.ec.europa.eu/documents/community-register/html/index_en.htm}
#' @aliases ab
#' @return A [character] [vector] with additional class [`ab`]
#' @seealso

View File

@@ -61,7 +61,9 @@
#' @param combine_SI A [logical] to indicate whether all susceptibility should be determined by results of either S, SDD, or I, instead of only S (default is `TRUE`).
#' @param sep A separating character for antimicrobial columns in combination antibiograms.
#' @param sort_columns A [logical] to indicate whether the antimicrobial columns must be sorted on name.
#' @param wisca A [logical] to indicate whether a Weighted-Incidence Syndromic Combination Antibiogram (WISCA) must be generated (default is `FALSE`). This will use a Bayesian decision model to estimate regimen coverage probabilities using [Monte Carlo simulations](https://en.wikipedia.org/wiki/Monte_Carlo_method). Set `simulations`, `conf_interval`, and `interval_side` to adjust.
#' @param wisca A [logical] to indicate whether a Weighted-Incidence Syndromic Combination Antibiogram (WISCA) must be generated (default is `FALSE`). This will use a Bayesian decision model to estimate regimen coverage probabilities using [Monte Carlo simulations](https://en.wikipedia.org/wiki/Monte_Carlo_method). Per \doi{10.1093/jac/dkv397}, susceptibility priors are \eqn{\beta(0.5, 0.5)} (Jeffreys) and intrinsically resistant pairs (based on [intrinsic_resistant]) use \eqn{\beta(1, 9999)}.
#'
#' Set `simulations`, `conf_interval`, and `interval_side` to adjust.
#' @param simulations (for WISCA) a numerical value to set the number of Monte Carlo simulations.
#' @param conf_interval A numerical value to set confidence interval (default is `0.95`).
#' @param interval_side The side of the confidence interval, either `"two-tailed"` (default), `"left"` or `"right"`.
@@ -166,6 +168,10 @@
#'
#' WISCA uses a sophisticated Bayesian decision model to combine both local and pooled antimicrobial resistance data. This approach not only evaluates local patterns but can also draw on multi-centre data sets to improve regimen accuracy, even in low-incidence infections like paediatric bloodstream infections (BSIs).
#'
#' **Prior Distributions**
#'
#' When `wisca = TRUE` or when using `wisca()`, pathogen incidence is modelled with a non-informative \eqn{Dirichlet(1, 1, \ldots, 1)} prior. Susceptibility proportions use the Jeffreys prior, \eqn{\beta(0.5, 0.5)}, except for bug-drug combinations with known intrinsic resistance, which use a strongly informative \eqn{\beta(1, 9999)} prior that forces near-zero susceptibility regardless of observed data (Bielicki *et al.*, 2016). Intrinsic resistance is determined using the [intrinsic_resistant] data set, which is based on `r format_eucast_version_nr(names(EUCAST_VERSION_EXPECTED_PHENOTYPES[1]))`.
#'
#' ### Grouped tibbles
#'
#' For any type of antibiogram, grouped [tibbles][tibble::tibble] can also be used to calculate susceptibilities over various groups.
@@ -266,7 +272,7 @@
#' It weights susceptibility by pathogen prevalence within a clinical syndrome and provides credible intervals around the expected coverage.
#'
#' For more background, interpretation, and examples, see [the WISCA vignette](https://amr-for-r.org/articles/WISCA.html).
#' @source
#' @references
#' * Bielicki JA *et al.* (2016). **Selecting appropriate empirical antibiotic regimens for paediatric bloodstream infections: application of a Bayesian decision model to local and pooled antimicrobial resistance surveillance data** *Journal of Antimicrobial Chemotherapy* 71(3); \doi{10.1093/jac/dkv397}
#' * Bielicki JA *et al.* (2020). **Evaluation of the coverage of 3 antibiotic regimens for neonatal sepsis in the hospital setting across Asian countries** *JAMA Netw Open.* 3(2):e1921124; \doi{10.1001/jamanetworkopen.2019.21124}
#' * Klinker KP *et al.* (2021). **Antimicrobial stewardship and antibiograms: importance of moving beyond traditional antibiograms**. *Therapeutic Advances in Infectious Disease*, May 5;8:20499361211011373; \doi{10.1177/20499361211011373}
@@ -446,11 +452,11 @@ antibiogram.default <- function(x,
meet_criteria(x, allow_class = "data.frame")
x <- ascertain_sir_classes(x, "x")
meet_criteria(wisca, allow_class = "logical", has_length = 1)
if (isTRUE(wisca)) {
if (wisca) {
if (!is.null(mo_transform) && !missing(mo_transform)) {
warning_("WISCA must be based on the species level as WISCA parameters are based on this. For that reason, {.arg mo_transform} will be ignored.")
}
mo_transform <- function(x) suppressMessages(suppressWarnings(paste(mo_genus(x, keep_synonyms = TRUE, language = NULL), mo_species(x, keep_synonyms = TRUE, language = NULL))))
mo_transform <- function(x) suppressMessages(suppressWarnings(as.mo(x, keep_synonyms = TRUE, language = NULL, info = FALSE)))
}
if ("antibiotics" %in% names(list(...))) {
deprecation_warning("antibiotics", "antimicrobials", fn = "antibiogram", is_argument = TRUE)
@@ -484,11 +490,39 @@ antibiogram.default <- function(x,
meet_criteria(parallel, allow_class = "logical", has_length = 1)
# parallel gate - identical pattern to as.sir()
if (requireNamespace("future.apply", quietly = TRUE) && !inherits(future::plan(), "sequential")) {
if (isFALSE(parallel)) {
message_("Assuming {.code parallel = TRUE} since parallel computing has been set up using the {.pkg future} package before. Set {.help [{.fun plan}](future::plan)} to sequential to prevent this.")
if (requireNamespace("future.apply", quietly = TRUE)) {
if (!inherits(future::plan(), "sequential")) {
if (isFALSE(parallel)) {
message_("Assuming {.code parallel = TRUE} since parallel computing has been set up using the {.pkg future} package before. Set {.help [{.fun plan}](future::plan)} to sequential to prevent this.")
}
parallel <- TRUE
}
if (wisca && interactive() && inherits(future::plan(), "sequential") && isFALSE(parallel) && simulations > 100) {
advised_multi <- ifelse(.Platform$OS.type == "windows" || in_rstudio(), "multisession", "multicore")
message_("Are you sure you want to run in non-parallel (=sequential) mode?", as_note = FALSE)
message_("WISCA can take a long time for the ", simulations * length(antimicrobials), " simulations you require, and you already have the {.pkg future} package installed.", as_note = FALSE)
q <- utils::menu(c(
"Yes, still run in sequential mode",
format_inline_("No, run in parallel mode and set {.help [future::plan(", advised_multi, ")](future::plan)}, and reset after WISCA finishes"),
format_inline_("No, run in parallel mode and set {.help [future::plan(", advised_multi, ")](future::plan)}, and do not reset afterwards"),
"Cancel WISCA calculation"
), graphics = FALSE, title = "")
if (q %in% c(4, 0)) {
return(invisible(NULL))
} else if (q %in% c(2, 3)) {
parallel <- TRUE
obj <- get(advised_multi, envir = asNamespace("future"))
future::plan(obj)
if (q == 2) {
on.exit({
# clean-up parallel setting
message_("Resetting {.fn future::plan}...", as_note = FALSE)
future::plan(future::sequential)
message_("Parallel setting was reset to `future::plan(future::sequential)`.", as_check = TRUE)
})
}
}
}
parallel <- TRUE
}
if (isTRUE(parallel)) {
stop_ifnot(
@@ -519,21 +553,26 @@ antibiogram.default <- function(x,
# transform MOs
x$`.mo` <- x[, col_mo, drop = TRUE]
if (is.null(mo_transform)) {
# leave as is, no transformation
} else if (is.function(mo_transform)) {
x$`.mo` <- mo_transform(x$`.mo`)
} else if (is.na(mo_transform)) {
x$`.mo` <- NA_character_
} else if (mo_transform == "gramstain") {
x$`.mo` <- mo_gramstain(x$`.mo`, language = language)
} else if (mo_transform == "shortname") {
x$`.mo` <- mo_shortname(x$`.mo`, language = language)
} else if (mo_transform == "name") {
x$`.mo` <- mo_name(x$`.mo`, language = language)
# leave as is, no transformation, but do add backup
x$`.mo.bak` <- x$`.mo`
} else {
x$`.mo` <- mo_property(x$`.mo`, property = mo_transform, language = language)
x$`.mo` <- as.mo(x$`.mo`, keep_synonyms = TRUE, info = FALSE)
x$`.mo.bak` <- x$`.mo`
if (is.function(mo_transform)) {
x$`.mo` <- mo_transform(x$`.mo`)
} else if (is.na(mo_transform)) {
x$`.mo` <- NA_character_
} else if (mo_transform == "gramstain") {
x$`.mo` <- mo_gramstain(x$`.mo`, language = language)
} else if (mo_transform == "shortname") {
x$`.mo` <- mo_shortname(x$`.mo`, language = language)
} else if (mo_transform == "name") {
x$`.mo` <- mo_name(x$`.mo`, language = language)
} else {
x$`.mo` <- mo_property(x$`.mo`, property = mo_transform, language = language)
}
}
x$`.mo`[is.na(x$`.mo`)] <- "(??)"
x$`.mo`[x$`.mo` %in% c(NA, "UNKNOWN")] <- "(??)"
# get syndromic groups
if (!is.null(syndromic_group)) {
@@ -702,7 +741,7 @@ antibiogram.default <- function(x,
wisca_parameters <- data.frame()
# WISCA START
# WISCA START ----
if (wisca == TRUE) {
if (isTRUE(has_syndromic_group)) {
colnames(out)[1] <- "syndromic_group"
@@ -736,6 +775,8 @@ antibiogram.default <- function(x,
}
wisca_parameters <- out
wisca_draws <- list()
wisca_components <- list()
# quantile probabilities are constant across all groups
probs <- if (interval_side == "two-tailed") {
@@ -751,6 +792,7 @@ antibiogram.default <- function(x,
use_parallel_wisca <- isTRUE(parallel) && n_workers > 1L && length(unique_groups) > 0L
if (use_parallel_wisca) {
## WISCA parallel ----
if (isTRUE(info)) {
message_("Running WISCA in parallel mode using ", n_workers, " workers...", as_note = FALSE, appendLF = FALSE)
}
@@ -759,13 +801,17 @@ antibiogram.default <- function(x,
chunks_per_group <- max(1L, ceiling(n_workers / length(unique_groups)))
chunk_sizes <- diff(c(0L, round(seq_len(chunks_per_group) * simulations / chunks_per_group)))
params_g_lookup <- list()
# precompute priors per group and build (group, chunk) job list
jobs <- unlist(lapply(unique_groups, function(g) {
params_g <- wisca_parameters[wisca_parameters$group == g, , drop = FALSE]
if (sum(params_g$n_tested, na.rm = TRUE) == 0L) {
return(NULL)
}
priors_g <- create_wisca_priors(params_g)
# store for later reassembly
params_g_lookup[[g]] <<- params_g
priors_g <- create_wisca_priors(params_g, sep = sep)
lapply(seq_along(chunk_sizes), function(ch) {
list(group = g, priors = priors_g, n_sims = chunk_sizes[ch])
})
@@ -773,24 +819,46 @@ antibiogram.default <- function(x,
jobs <- Filter(Negate(is.null), jobs)
flat <- future.apply::future_lapply(jobs, function(job) {
vapply(FUN.VALUE = double(1), seq_len(job$n_sims), function(i) {
simulate_coverage(job$priors)
})
n_p <- length(job$priors$gamma_posterior)
n_s <- job$n_sims
inc_mat <- matrix(NA_real_, nrow = n_s, ncol = n_p)
susc_mat <- matrix(NA_real_, nrow = n_s, ncol = n_p)
cov_vec <- numeric(n_s)
for (i in seq_len(n_s)) {
inc_raw <- stats::rgamma(n_p, shape = job$priors$gamma_posterior, scale = 1)
inc_norm <- inc_raw / sum(inc_raw)
susc <- stats::rbeta(n_p,
shape1 = job$priors$beta_posterior_1,
shape2 = job$priors$beta_posterior_2
)
inc_mat[i, ] <- inc_norm
susc_mat[i, ] <- susc
cov_vec[i] <- sum(inc_norm * susc)
}
list(coverage = cov_vec, incidence = inc_mat, susceptibility = susc_mat)
}, future.seed = TRUE)
# reassemble per group: concatenate chunks, then summarise
for (g in unique_groups) {
g_idx <- vapply(jobs, function(j) identical(j$group, g), logical(1))
if (!any(g_idx)) next
sims <- unlist(flat[g_idx], use.names = FALSE)
chunks <- flat[g_idx]
sims <- unlist(lapply(chunks, `[[`, "coverage"), use.names = FALSE)
inc_combined <- do.call(rbind, lapply(chunks, `[[`, "incidence"))
susc_combined <- do.call(rbind, lapply(chunks, `[[`, "susceptibility"))
colnames(inc_combined) <- as.character(params_g_lookup[[g]]$mo)
colnames(susc_combined) <- as.character(params_g_lookup[[g]]$mo)
wisca_draws[[g]] <- sims
wisca_components[[g]] <- list(incidence = inc_combined, susceptibility = susc_combined)
out_wisca$coverage[out_wisca$group == g] <- mean(sims)
ci_vals <- unname(stats::quantile(sims, probs = probs))
out_wisca$lower_ci[out_wisca$group == g] <- ci_vals[1]
out_wisca$upper_ci[out_wisca$group == g] <- ci_vals[2]
}
if (isTRUE(info)) message_(font_green_bg(" DONE "), as_note = FALSE)
if (isTRUE(info)) message_(font_green_bg("\u00a0DONE\u00a0"), as_note = FALSE)
} else {
## WISCA sequential ----
progress <- progress_ticker(
n = length(unique_groups) * simulations,
n_min = 25,
@@ -802,16 +870,35 @@ antibiogram.default <- function(x,
for (group in unique_groups) {
params_current <- wisca_parameters[wisca_parameters$group == group, , drop = FALSE]
if (sum(params_current$n_tested, na.rm = TRUE) == 0) next
priors_current <- create_wisca_priors(params_current)
coverage_simulations <- vapply(
FUN.VALUE = double(1),
seq_len(simulations), function(i) {
progress$tick()
simulate_coverage(priors_current)
}
priors_current <- create_wisca_priors(params_current, sep = sep)
# replace the vapply block in the sequential branch with:
n_pathogens_g <- length(priors_current$gamma_posterior)
sim_coverage <- numeric(simulations)
sim_incidence <- matrix(NA_real_, nrow = simulations, ncol = n_pathogens_g)
sim_susceptibility <- matrix(NA_real_, nrow = simulations, ncol = n_pathogens_g)
colnames(sim_incidence) <- as.character(params_current$mo)
colnames(sim_susceptibility) <- as.character(params_current$mo)
for (i in seq_len(simulations)) {
progress$tick()
inc_raw <- stats::rgamma(n_pathogens_g, shape = priors_current$gamma_posterior, scale = 1)
inc_norm <- inc_raw / sum(inc_raw)
susc <- stats::rbeta(n_pathogens_g,
shape1 = priors_current$beta_posterior_1,
shape2 = priors_current$beta_posterior_2
)
sim_incidence[i, ] <- inc_norm
sim_susceptibility[i, ] <- susc
sim_coverage[i] <- sum(inc_norm * susc)
}
wisca_draws[[group]] <- sim_coverage
wisca_components[[group]] <- list(
incidence = sim_incidence,
susceptibility = sim_susceptibility
)
out_wisca$coverage[out_wisca$group == group] <- mean(coverage_simulations)
ci_vals <- unname(stats::quantile(coverage_simulations, probs = probs))
out_wisca$coverage[out_wisca$group == group] <- mean(sim_coverage)
ci_vals <- unname(stats::quantile(sim_coverage, probs = probs))
out_wisca$lower_ci[out_wisca$group == group] <- ci_vals[1]
out_wisca$upper_ci[out_wisca$group == group] <- ci_vals[2]
}
@@ -1039,14 +1126,23 @@ antibiogram.default <- function(x,
}
}
if (wisca) {
names(wisca_draws) <- out$ab
names(wisca_components) <- out$ab
}
out <- structure(as_original_data_class(new_df, class(x), extra_class = "antibiogram"),
has_syndromic_group = has_syndromic_group,
combine_SI = combine_SI,
wisca = wisca,
conf_interval = conf_interval,
simulations = if (isFALSE(wisca)) NULL else simulations,
formatting_type = formatting_type,
wisca_parameters = as_original_data_class(wisca_parameters, class(x)),
long_numeric = as_original_data_class(long_numeric, class(x))
sep = sep,
wisca_parameters = if (isFALSE(wisca)) NULL else as_original_data_class(wisca_parameters, class(x)),
long_numeric = as_original_data_class(long_numeric, class(x)),
wisca_draws = if (isFALSE(wisca)) NULL else wisca_draws,
wisca_components = if (isFALSE(wisca)) NULL else wisca_components
)
rownames(out) <- NULL
out
@@ -1079,6 +1175,7 @@ antibiogram.grouped_df <- function(x,
stop_ifnot(is.null(mo_transform), "{.arg mo_transform} must not be set if creating an antibiogram using a grouped tibble. The groups will become the variables over which the antimicrobials are calculated, which could include the pathogen information (though not necessary). Nonetheless, this makes {.arg mo_transform} redundant.", call = FALSE)
stop_ifnot(is.null(syndromic_group), "{.arg syndromic_group} must not be set if creating an antibiogram using a grouped tibble. The groups will become the variables over which the antimicrobials are calculated, making {.arg syndromic_group} redundant.", call = FALSE)
meet_criteria(parallel, allow_class = "logical", has_length = 1)
meet_criteria(wisca, allow_class = "logical", has_length = 1)
groups <- attributes(x)$groups
n_groups <- NROW(groups)
@@ -1191,7 +1288,7 @@ antibiogram.grouped_df <- function(x,
new_out[, col_name] <- col_value
new_out <- new_out[, c(col_name, setdiff(names(new_out), col_name))] # set place to 1st col
if (isTRUE(wisca)) {
if (wisca) {
new_wisca_parameters[, col_name] <- col_value
new_wisca_parameters <- new_wisca_parameters[, c(col_name, setdiff(names(new_wisca_parameters), col_name))] # set place to 1st col
}
@@ -1211,14 +1308,25 @@ antibiogram.grouped_df <- function(x,
}
}
wisca_draws_all <- NULL
wisca_components_all <- NULL
if (wisca) {
wisca_draws_all <- unlist(lapply(results_raw, function(r) attributes(r)$wisca_draws), recursive = FALSE)
wisca_components_all <- unlist(lapply(results_raw, function(r) attributes(r)$wisca_components), recursive = FALSE)
}
out <- structure(as_original_data_class(out, class(x), extra_class = "antibiogram"),
has_syndromic_group = FALSE,
combine_SI = isTRUE(combine_SI),
wisca = isTRUE(wisca),
wisca = wisca,
conf_interval = conf_interval,
simulations = if (isFALSE(wisca)) NULL else simulations,
formatting_type = formatting_type,
wisca_parameters = as_original_data_class(wisca_parameters, class(x)),
long_numeric = as_original_data_class(long_numeric, class(x))
sep = sep,
wisca_parameters = if (isFALSE(wisca)) NULL else as_original_data_class(wisca_parameters, class(x)),
long_numeric = as_original_data_class(long_numeric, class(x)),
wisca_draws = if (isFALSE(wisca)) NULL else wisca_draws_all,
wisca_components = if (isFALSE(wisca)) NULL else wisca_components_all
)
rownames(out) <- NULL
out
@@ -1269,7 +1377,7 @@ wisca <- function(x,
)
}
create_wisca_priors <- function(data) {
create_wisca_priors <- function(data, sep) {
pathogens <- unique(data$mo)
n_pathogens <- length(pathogens)
@@ -1278,9 +1386,28 @@ create_wisca_priors <- function(data) {
multinomial_obs <- data$n_total
gamma_posterior <- gamma_prior + multinomial_obs
# beta priors
beta_prior_alpha <- rep(1, times = n_pathogens)
beta_prior_beta <- rep(1, times = n_pathogens)
# Beta priors: Jeffreys prior Beta(0.5, 0.5) by default (Bielicki et al., 2016)
beta_prior_alpha <- rep(0.5, n_pathogens)
beta_prior_beta <- rep(0.5, n_pathogens)
# strongly informative Beta(1, 9999) for intrinsically resistant bug-drug pairs (Bielicki et al., 2016)
is_intrinsic <- vapply(
FUN.VALUE = logical(1),
seq_len(nrow(data)),
function(i) {
# split by " + ", or wherever `sep` is set to
ab_components <- as.ab(trimws(strsplit(as.character(data$ab[i]), trimws(sep), fixed = TRUE)[[1]]))
ab_components <- ab_components[!is.na(ab_components)]
length(ab_components) > 0 &&
all(vapply(
FUN.VALUE = logical(1),
ab_components,
function(ab) any(AMR::intrinsic_resistant$mo == data$mo[i] & AMR::intrinsic_resistant$ab == ab)
))
}
)
beta_prior_alpha[is_intrinsic] <- 1
beta_prior_beta[is_intrinsic] <- 9999
r <- data$n_susceptible
n <- data$n_tested
@@ -1334,9 +1461,9 @@ retrieve_wisca_parameters <- function(wisca_model, ...) {
#' @rawNamespace if(getRversion() >= "3.0.0") S3method(pillar::tbl_sum, antibiogram)
tbl_sum.antibiogram <- function(x, ...) {
dims <- paste(format(NROW(x), big.mark = ","), AMR_env$cross_icon, format(NCOL(x), big.mark = ","))
names(dims) <- "An antibiogram"
names(dims) <- "An Antibiogram"
if (isTRUE(attributes(x)$wisca)) {
dims <- c(dims, Type = paste0("WISCA with ", attributes(x)$conf_interval * 100, "% CI"))
dims <- c(dims, Type = paste0("WISCA with ", attributes(x)$conf_interval * 100, "% CI, ", attributes(x)$simulations, " simulations"))
} else if (isTRUE(attributes(x)$formatting_type >= 13)) {
dims <- c(dims, Type = paste0("Non-WISCA with ", attributes(x)$conf_interval * 100, "% CI"))
} else {
@@ -1352,9 +1479,14 @@ tbl_format_footer.antibiogram <- function(x, ...) {
if (NROW(x) == 0) {
return(footer)
}
wisca_text <- ifelse(isTRUE(attributes(x)$wisca),
paste0("\n# ", font_bold("Be aware"), " that in a WISCA, overlapping CIs indicate ", font_bold("non-inferiority"), "."),
""
)
c(footer, font_subtle(paste0(
"# Use `ggplot2::autoplot()` or base R `plot()` to create a plot of this antibiogram,\n",
"# or use it directly in R Markdown or Quarto, see ", word_wrap("?antibiogram")
"# or use it directly in R Markdown or Quarto, see ", word_wrap("?antibiogram"), ".",
wisca_text
)))
}
@@ -1415,8 +1547,29 @@ barplot.antibiogram <- function(height, ...) {
#' @rdname antibiogram
# this prevents the requirement for putting the dependency in Imports:
#' @rawNamespace if(getRversion() >= "3.0.0") S3method(ggplot2::autoplot, antibiogram)
autoplot.antibiogram <- function(object, ...) {
#' @param geom The plotting style for the point estimate. One of `"pointrange"` (default), `"point"`, `"col"`/`"bar"`, or `"errorbar"`. `"pointrange"` is recommended for coverage data: bars imply a meaningful baseline at zero, which coverage estimates rarely have.
#' @param ci Logical, whether to draw the credible/confidence interval. Defaults to `TRUE`. Ignored (forced `TRUE`) when `geom = "pointrange"` or `"errorbar"`, since the interval is intrinsic to those geoms.
#' @param sort Logical, whether to order regimens by coverage. Defaults to `TRUE`. When faceted (per pathogen) or grouped (syndromic), ordering is applied within each panel/group.
#' @param flip Logical, whether to draw regimens on the y-axis (horizontal). Defaults to `NULL`, which flips automatically when any regimen label exceeds 20 characters (long combination names read poorly on the x-axis). Set `TRUE`/`FALSE` to override.
#' @param caption Text to show as caption, will explain non-inferiority for WISCA models.
autoplot.antibiogram <- function(object,
geom = c("pointrange", "point", "col", "bar", "errorbar"),
ci = TRUE,
sort = TRUE,
flip = NULL,
caption = NULL,
...) {
geom <- match.arg(geom)
if (geom == "bar") geom <- "col"
meet_criteria(ci, allow_class = "logical", has_length = 1)
meet_criteria(sort, allow_class = "logical", has_length = 1)
meet_criteria(flip, allow_class = "logical", has_length = 1, allow_NULL = TRUE)
meet_criteria(caption, allow_class = "logical", has_length = 1, allow_NULL = TRUE, allow_NA = TRUE)
df <- attributes(object)$long_numeric
combine_SI <- isTRUE(attributes(object)$combine_SI)
is_wisca <- isTRUE(attributes(object)$wisca)
if (!"mo" %in% colnames(df)) {
df$mo <- ""
}
@@ -1429,36 +1582,273 @@ autoplot.antibiogram <- function(object, ...) {
} else if ("syndromic_group" %in% colnames(df)) {
group_name <- colnames(object)[1]
}
out <- ggplot2::ggplot(df,
has_syndromic <- "syndromic_group" %in% colnames(df)
has_facet <- !all(as.character(df$mo) == "", na.rm = TRUE)
# coverage on the percentage scale
df$.coverage <- df$coverage * 100
df$.lower <- df$lower_ci * 100
df$.upper <- df$upper_ci * 100
# decide orientation: auto-flip when labels are long
if (is.null(flip)) {
flip <- max(nchar(as.character(df$ab)), na.rm = TRUE) > 20
}
# ordering by coverage, applied within facet/group so each panel ranks correctly
if (isTRUE(sort)) {
split_keys <- interaction(
if (has_facet) as.character(df$mo) else rep("", nrow(df)),
if (has_syndromic) df$syndromic_group else rep("", nrow(df)),
drop = TRUE
)
# build a within-group rank, then a global ordered factor whose level order
# respects that rank; reorder_within-style without the tidytext dependency
ord <- order(split_keys, df$.coverage)
df <- df[ord, , drop = FALSE]
df$ab <- factor(df$ab, levels = unique(df$ab[order(split_keys[ord], df$.coverage[ord])]))
# note: with multiple facets the level order is a compromise (one global
# axis), acceptable because each facet shows its own subset in coverage order
}
fill_var <- if (has_syndromic) "syndromic_group" else NULL
out <- ggplot2::ggplot(
df,
mapping = ggplot2::aes(
x = ab,
y = coverage * 100,
fill = if ("syndromic_group" %in% colnames(df)) {
syndromic_group
} else {
NULL
}
y = .coverage,
fill = if (has_syndromic) syndromic_group else NULL,
colour = if (has_syndromic) syndromic_group else NULL
)
) +
ggplot2::geom_col(position = ggplot2::position_dodge2(preserve = "single")) +
ggplot2::geom_errorbar(
mapping = ggplot2::aes(ymin = lower_ci * 100, ymax = upper_ci * 100),
position = ggplot2::position_dodge2(preserve = "single", width = 1)
)
dodge <- ggplot2::position_dodge2(preserve = "single", width = 0.6)
if (geom == "col") {
out <- out + ggplot2::geom_col(position = ggplot2::position_dodge2(preserve = "single"))
if (isTRUE(ci)) {
out <- out + ggplot2::geom_errorbar(
mapping = ggplot2::aes(ymin = .lower, ymax = .upper),
position = ggplot2::position_dodge2(preserve = "single", width = 1),
width = 0.7
)
}
} else if (geom == "point") {
out <- out + ggplot2::geom_point(position = dodge, size = 2)
if (isTRUE(ci)) {
out <- out + ggplot2::geom_errorbar(
mapping = ggplot2::aes(ymin = .lower, ymax = .upper),
position = dodge, width = 0.4
)
}
} else if (geom == "errorbar") {
out <- out + ggplot2::geom_errorbar(
mapping = ggplot2::aes(ymin = .lower, ymax = .upper),
position = dodge, width = 0.4
)
} else {
# pointrange (default)
out <- out + ggplot2::geom_pointrange(
mapping = ggplot2::aes(ymin = .lower, ymax = .upper),
position = dodge, size = 0.5
)
}
if (is.null(caption)) {
if (is_wisca) {
out <- out + ggplot2::labs(caption = "Overlapping credible intervals:\nclinically non-inferior (Bielicki 2020)")
}
} else if (!caption %in% c(FALSE, NA)) {
out <- out + ggplot2::labs(caption = caption)
}
out <- out +
ggplot2::labs(
y = ifelse(combine_SI, "%SI", "%S"),
x = NULL,
fill = if (has_syndromic) group_name else NULL,
colour = if (has_syndromic) group_name else NULL
)
if (isTRUE(flip)) {
out <- out + ggplot2::coord_flip()
}
if (has_facet) {
out <- out + ggplot2::facet_wrap("mo")
}
out
}
#' @param wisca_plot_type Either `"susceptibility_incidence"` (default) or `"posterior_coverage"`.
#' @param ... Currently unused.
#' @rdname antibiogram
#' @export
wisca_plot <- function(wisca_model,
wisca_plot_type = c("susceptibility_incidence", "posterior_coverage"),
...) {
stop_ifnot_installed("ggplot2")
stop_ifnot(
isTRUE(attributes(wisca_model)$wisca),
"This function only applies to WISCA models."
)
wisca_plot_type <- match.arg(wisca_plot_type)
sep <- attributes(wisca_model)$sep %||% " + "
if (wisca_plot_type == "posterior_coverage") {
plot_wisca_posterior_coverage(wisca_model, sep = sep)
} else {
plot_wisca_susceptibility_incidence(wisca_model, sep = sep)
}
}
# ---- posterior_coverage ----
plot_wisca_posterior_coverage <- function(wisca_model, sep) {
draws <- attributes(wisca_model)$wisca_draws
stop_if(
is.null(draws),
"No simulation draws found. Re-run {.fun wisca} with the latest AMR version to retain draws."
)
if (!is.null(sep)) {
names(draws) <- gsub(sep, paste0(trimws(sep, which = "right"), "\n"), names(draws), fixed = TRUE)
}
df <- do.call(rbind, lapply(names(draws), function(nm) {
data.frame(regimen = nm, coverage = draws[[nm]] * 100, stringsAsFactors = FALSE)
}))
medians <- tapply(df$coverage, df$regimen, stats::median)
df$regimen <- factor(df$regimen, levels = names(sort(medians, decreasing = TRUE)))
ggplot2::ggplot(df, ggplot2::aes(x = coverage, fill = regimen, colour = regimen)) +
ggplot2::geom_density(alpha = 0.15, linewidth = 0.7) +
ggplot2::scale_y_continuous(n.breaks = 5, expand = ggplot2::expansion(c(0, 0.05))) +
ggplot2::scale_x_continuous(
labels = function(x) paste0(x, "%"),
n.breaks = 5,
limits = c(NA, 100)
) +
ggplot2::labs(
y = ifelse(isTRUE(attributes(object)$combine_SI), "%SI", "%S"),
x = NULL,
fill = if ("syndromic_group" %in% colnames(df)) {
group_name
} else {
NULL
}
title = "WISCA",
subtitle = "Posteriors coverage",
x = "Coverage",
y = "Relative likelihood",
fill = translate_AMR("Regimen", language = get_AMR_locale()),
colour = translate_AMR("Regimen", language = get_AMR_locale())
) +
ggplot2::theme(
legend.position = "right",
legend.key.spacing.y = ggplot2::unit(0.25, "lines"),
plot.title = ggplot2::element_text(size = 12, face = "bold")
)
if (!all(as.character(df$mo) == "", na.rm = TRUE)) {
out <- out +
ggplot2::facet_wrap("mo")
}
# ---- susceptibility_incidence scatter, faceted by regimen ----
plot_wisca_susceptibility_incidence <- function(wisca_model, sep) {
components <- attributes(wisca_model)$wisca_components
stop_if(
is.null(components),
"No simulation components found. Re-run {.fun wisca} with the latest AMR version to retain draws."
)
df_list <- lapply(names(components), function(g) {
comp <- components[[g]]
n_sims <- nrow(comp$incidence)
n_path <- ncol(comp$incidence)
mo_names <- colnames(comp$incidence)
reg_label <- g
if (!is.null(sep)) {
reg_label <- gsub(sep, paste0(trimws(sep, which = "right"), "\n"), g, fixed = TRUE)
}
data.frame(
regimen = rep(reg_label, n_sims * n_path),
pathogen = rep(mo_names, each = n_sims),
incidence = as.vector(comp$incidence) * 100,
susceptibility = as.vector(comp$susceptibility) * 100,
stringsAsFactors = FALSE
)
})
df <- do.call(rbind, df_list)
df$pathogen <- mo_shortname(df$pathogen, keep_synonyms = TRUE, info = FALSE)
# order pathogens by median incidence across all regimens
med_inc <- tapply(df$incidence, df$pathogen, stats::median)
df$pathogen <- factor(df$pathogen, levels = names(sort(med_inc, decreasing = TRUE)))
# order regimens by median coverage (from wisca_draws)
draws <- attributes(wisca_model)$wisca_draws
if (!is.null(draws)) {
med_cov <- vapply(names(draws), function(g) stats::median(draws[[g]]), double(1))
reg_labels <- unique(df$regimen)
# match order: draws names -> display labels
draw_order <- names(sort(med_cov, decreasing = TRUE))
label_order <- vapply(draw_order, function(g) {
if (!is.null(sep)) gsub(sep, paste0(trimws(sep, which = "right"), "\n"), g, fixed = TRUE) else g
}, character(1))
label_order <- label_order[label_order %in% reg_labels]
df$regimen <- factor(df$regimen, levels = label_order)
}
out
# retrieve coverage + CI
coverage <- attributes(wisca_model)$long_numeric
if (!is.null(sep)) {
coverage$ab <- gsub(sep, paste0(trimws(sep, which = "right"), "\n"), coverage$ab, fixed = TRUE)
}
df$coverage <- coverage$coverage[match(df$regimen, coverage$ab)] * 100
df$lower_ci <- coverage$lower_ci[match(df$regimen, coverage$ab)] * 100
df$upper_ci <- coverage$upper_ci[match(df$regimen, coverage$ab)] * 100
ci_df <- df[!duplicated(df$regimen), c("regimen", "coverage", "lower_ci", "upper_ci"), drop = FALSE]
ggplot2::ggplot(df, ggplot2::aes(x = susceptibility, y = incidence, colour = pathogen)) +
ggplot2::geom_rect(
data = ci_df,
ggplot2::aes(xmin = lower_ci, xmax = upper_ci, ymin = -Inf, ymax = Inf),
fill = "grey50", alpha = 0.15, colour = NA,
inherit.aes = FALSE
) +
ggplot2::geom_vline(
data = ci_df,
ggplot2::aes(xintercept = coverage),
linewidth = 0.5,
linetype = 2,
colour = "grey50",
inherit.aes = FALSE
) +
ggplot2::geom_point(size = 0.5, alpha = 0.2, shape = 16) +
ggplot2::facet_wrap(~regimen) +
ggplot2::scale_y_continuous(
labels = function(x) paste0(x, "%"),
n.breaks = 5
) +
ggplot2::scale_x_continuous(
labels = function(x) paste0(x, "%"),
limits = c(0, 100)
) +
ggplot2::labs(
title = "WISCA",
subtitle = "Susceptibility vs. incidence weight",
x = translate_AMR("Susceptibility", language = get_AMR_locale()),
y = translate_AMR("Incidence weight (normalised)", language = get_AMR_locale()),
colour = translate_AMR("Pathogen", language = get_AMR_locale()),
caption = paste(attributes(wisca_model)$simulations, "Monte Carlo simulations")
) +
ggplot2::guides(
colour = ggplot2::guide_legend(
override.aes = list(alpha = 1, size = 3)
)
) +
ggplot2::theme(
legend.position = "right",
legend.key.spacing.y = ggplot2::unit(0.25, "lines"),
legend.text = ggplot2::element_text(face = "italic"),
plot.title = ggplot2::element_text(size = 12, face = "bold")
)
}
#' @method knit_print antibiogram

View File

@@ -195,11 +195,13 @@ add_custom_microorganisms <- function(x) {
if (!"fullname" %in% colnames(x)) {
x$fullname <- trimws2(paste(x$genus, x$species, x$subspecies))
}
if (!"domain" %in% colnames(x)) x$domain <- ""
if (!"kingdom" %in% colnames(x)) x$kingdom <- ""
if (!"phylum" %in% colnames(x)) x$phylum <- ""
if (!"class" %in% colnames(x)) x$class <- ""
if (!"order" %in% colnames(x)) x$order <- ""
if (!"family" %in% colnames(x)) x$family <- ""
x$domain[is.na(x$domain)] <- ""
x$kingdom[is.na(x$kingdom)] <- ""
x$phylum[is.na(x$phylum)] <- ""
x$class[is.na(x$class)] <- ""
@@ -217,6 +219,7 @@ add_custom_microorganisms <- function(x) {
# fill in taxonomy based on genus
genus_to_check <- gsub("^(.*)[^a-zA-Z].*", "\\1", x$genus, perl = TRUE)
x$domain[which(x$domain == "" & genus_to_check != "")] <- AMR_env$MO_lookup$domain[match(genus_to_check[which(x$domain == "" & genus_to_check != "")], AMR_env$MO_lookup$genus)]
x$kingdom[which(x$kingdom == "" & genus_to_check != "")] <- AMR_env$MO_lookup$kingdom[match(genus_to_check[which(x$kingdom == "" & genus_to_check != "")], AMR_env$MO_lookup$genus)]
x$phylum[which(x$phylum == "" & genus_to_check != "")] <- AMR_env$MO_lookup$phylum[match(genus_to_check[which(x$phylum == "" & genus_to_check != "")], AMR_env$MO_lookup$genus)]
x$class[which(x$class == "" & genus_to_check != "")] <- AMR_env$MO_lookup$class[match(genus_to_check[which(x$class == "" & genus_to_check != "")], AMR_env$MO_lookup$genus)]
@@ -229,9 +232,9 @@ add_custom_microorganisms <- function(x) {
x$prevalence[is.na(x$prevalence)] <- 1.25
x$status <- "accepted"
x$ref <- paste("Self-added,", format(Sys.Date(), "%Y"))
x$kingdom_index <- AMR_env$MO_lookup$kingdom_index[match(genus_to_check, AMR_env$MO_lookup$genus)]
# complete missing kingdom index, so mo_matching_score() will not return NA
x$kingdom_index[is.na(x$kingdom_index)] <- 1
x$domain_index <- AMR_env$MO_lookup$domain_index[match(genus_to_check, AMR_env$MO_lookup$genus)]
# complete missing domain index, so mo_matching_score() will not return NA
x$domain_index[is.na(x$domain_index)] <- 1
x$fullname_lower <- tolower(x$fullname)
x$full_first <- substr(x$fullname_lower, 1, 1)
x$species_first <- tolower(substr(x$species, 1, 1))

View File

@@ -77,13 +77,13 @@
#'
#' Synonyms (i.e. trade names) were derived from the PubChem Compound ID (column `cid`) and are consequently only available where a CID is available.
#' @inheritSection AMR Download Our Reference Data
#' @source
#' @references
#'
#' * `r TAXONOMY_VERSION$ATC_DDD$citation` Accessed from <`r TAXONOMY_VERSION$ATC_DDD$url`> on `r documentation_date(TAXONOMY_VERSION$ATC_DDD$accessed_date)`.
#'
#' * `r TAXONOMY_VERSION$LOINC$citation` Accessed from <`r TAXONOMY_VERSION$LOINC$url`> on `r documentation_date(TAXONOMY_VERSION$LOINC$accessed_date)`.
#'
#' * European Commission Public Health PHARMACEUTICALS - COMMUNITY REGISTER: <https://ec.europa.eu/health/documents/community-register/html/reg_hum_atc.htm>
#' * European Commission Public Health PHARMACEUTICALS - COMMUNITY REGISTER: <https://ec.europa.eu/health/documents/community-register/html/index_en.htm>
#' @inheritSection WHOCC WHOCC
#' @seealso [microorganisms], [intrinsic_resistant]
#' @examples
@@ -107,10 +107,11 @@
#' - `mo`\cr ID of microorganism as used by this package. ***This is a unique identifier.***
#' - `fullname`\cr Full name, like `"Escherichia coli"`. For the taxonomic ranks genus, species and subspecies, this is the 'pasted' text of genus, species, and subspecies. For all taxonomic ranks higher than genus, this is the name of the taxon. ***This is a unique identifier.***
#' - `status` \cr Status of the taxon, either `r vector_or(microorganisms$status, documentation = TRUE)`
#' - `kingdom`, `phylum`, `class`, `order`, `family`, `genus`, `species`, `subspecies`\cr Taxonomic rank of the microorganism. Note that for fungi, *phylum* is equal to their taxonomic *division*. Also, for fungi, *subkingdom* and *subdivision* were left out since they do not occur in the bacterial taxonomy.
#' - `domain`, `kingdom`, `phylum`, `class`, `order`, `family`, `genus`, `species`, `subspecies`\cr Taxonomic rank of the microorganism. Note that for fungi, *phylum* is used for their taxonomic *division*. Also, for fungi, *subkingdom* and *subdivision* were left out since they do not occur in the bacterial taxonomy. For all species outside the domains of Bacteria and Archaea, the `domain` and `kingdom` are identical.
#' - `rank`\cr Text of the taxonomic rank of the microorganism, such as `"species"` or `"genus"`
#' - `ref`\cr Author(s) and year of related scientific publication. This contains only the *first surname* and year of the *latest* authors, e.g. "Wallis *et al.* 2006 *emend.* Smith and Jones 2018" becomes "Smith *et al.*, 2018". This field is directly retrieved from the source specified in the column `source`. Moreover, accents were removed to comply with CRAN that only allows ASCII characters.
#' - `ref`\cr Abbreviated authority citation for the nomenclatural act that established the current name combination, following ICNP conventions. For species described in their current genus (*sp. nov.*), this is the original description author(s) and year. For species transferred to a different genus (*comb. nov.*), this is the reclassification author(s) and year. Emendations are excluded. For synonyms, this is the authority under which the synonym was originally published. This field is directly retrieved from the source specified in the column `source`. Diacritics were removed to comply with CRAN, that only allows ASCII characters.
#' - `oxygen_tolerance` \cr Oxygen tolerance, either `r vector_or(microorganisms$oxygen_tolerance, documentation = TRUE)`. These data were retrieved from BacDive (see *Source*). Items that contain "likely" are missing from BacDive and were extrapolated from other species within the same genus to guess the oxygen tolerance. Currently `r round(length(microorganisms$oxygen_tolerance[which(!is.na(microorganisms$oxygen_tolerance))]) / nrow(microorganisms[which(microorganisms$kingdom == "Bacteria"), ]) * 100, 1)`% of all `r format_included_data_number(nrow(microorganisms[which(microorganisms$kingdom == "Bacteria"), ]))` bacteria in the data set contain an oxygen tolerance.
#' - `morphology` \cr Morphology (cell shape), either `r vector_or(microorganisms$morphology, documentation = TRUE)`. These data were retrieved from BacDive (see *Source*). Genera that are clinically established as coccobacilli (the HACEK group and beyond, such as *Haemophilus* and *Acinetobacter*) are classified as such regardless of BacDive majority vote. Items that contain "likely" are missing from BacDive and were extrapolated from other species within the same genus. Currently `r round(length(microorganisms$morphology[which(!is.na(microorganisms$morphology))]) / nrow(microorganisms[which(microorganisms$kingdom == "Bacteria"), ]) * 100, 1)`% of all `r format_included_data_number(nrow(microorganisms[which(microorganisms$kingdom == "Bacteria"), ]))` bacteria in the data set contain a morphology.
#' - `source`\cr Either `r vector_or(microorganisms$source, documentation = TRUE)` (see *Source*)
#' - `lpsn`\cr Identifier ('Record number') of `r TAXONOMY_VERSION$LPSN$name`. This will be the first/highest LPSN identifier to keep one identifier per row. For example, *Acetobacter ascendens* has LPSN Record number 7864 and 11011. Only the first is available in the `microorganisms` data set. ***This is a unique identifier***, though available for only `r format_included_data_number(sum(!is.na(microorganisms$lpsn)))` records.
#' - `lpsn_parent`\cr LPSN identifier of the parent taxon
@@ -150,7 +151,7 @@
#'
#' The syntax used to transform the original data to a cleansed \R format, can be [found here](https://github.com/msberends/AMR/blob/main/data-raw/_reproduction_scripts/reproduction_of_microorganisms.R).
#' @inheritSection AMR Download Our Reference Data
#' @source
#' @references
#' Taxonomic entries were imported in this order of importance:
#' 1. `r TAXONOMY_VERSION$LPSN$name`:\cr\cr
#' `r TAXONOMY_VERSION$LPSN$citation` Accessed from <`r TAXONOMY_VERSION$LPSN$url`> on `r documentation_date(TAXONOMY_VERSION$LPSN$accessed_date)`.
@@ -339,6 +340,7 @@
#' This data set is internally used by:
#' * [not_intrinsic_resistant()] (an [antimicrobial selector][antimicrobial_selectors])
#' * [mo_is_intrinsic_resistant()]
#' * [wisca()] to model \eqn{\beta(1, 9999)} for resistant bug-drug combinations, per \doi{10.1093/jac/dkv397}
#' @inheritSection AMR Download Our Reference Data
#' @examples
#' intrinsic_resistant

View File

@@ -83,34 +83,31 @@ as.disk <- function(x, na.rm = FALSE) {
na_before <- length(x[is.na(x)])
# heavily based on cleaner::clean_double():
clean_double2 <- function(x, remove = "[^0-9.,-]", fixed = FALSE) {
# extract a plausible numeric disk zone value from character input
extract_disk_value <- function(x) {
x <- as.character(x)
# normalise decimal separators
x <- gsub(",", ".", x, fixed = TRUE)
# remove ending dot/comma
x <- gsub("[,.]$", "", x)
# only keep last dot/comma
reverse <- function(x) vapply(FUN.VALUE = character(1), lapply(strsplit(x, NULL), rev), paste, collapse = "")
x <- sub("{{dot}}", ".",
gsub(".", "",
reverse(sub(".", "}}tod{{",
reverse(x),
fixed = TRUE
)),
fixed = TRUE
),
fixed = TRUE
)
x_clean <- gsub(remove, "", x, ignore.case = TRUE, fixed = fixed)
# remove everything that is not a number or dot
as.double(gsub("[^0-9.]+", "", x_clean))
# strip known context: leading/trailing whitespace, SIR interpretations,
# comparison operators, semicolons, and surrounding whitespace
x <- trimws(x)
# remove trailing SIR interpretation (e.g., "42; S", "28 R")
x <- gsub("[;[:space:]]+[SIRsir]$", "", x)
# remove leading comparison operators (e.g., ">=20", "<=6")
x <- gsub("^[<>=]+\\s*", "", x)
x <- trimws(x)
# now the remainder must be a plausible standalone number
out <- rep(NA_real_, length(x))
is_numeric <- grepl("^[0-9]+\\.?[0-9]*$", x)
out[is_numeric] <- as.double(x[is_numeric])
out
}
# round up and make it an integer
x <- as.integer(ceiling(clean_double2(x)))
# round up and coerce to integer
x <- as.integer(ceiling(extract_disk_value(x)))
# valid disk diffusion zones: 0-50 mm
x[x < 0 | x > 50] <- NA_integer_
# disks can never be less than 0 mm or more than 50 mm
x[x < 0 | x > 99] <- NA_integer_
x[x > 50] <- 50L
na_after <- length(x[is.na(x)])
if (na_before != na_after) {

View File

@@ -134,7 +134,7 @@
#' @seealso [key_antimicrobials()]
#' @export
#' @return A [logical] vector
#' @source Methodology of these functions is strictly based on:
#' @references Methodology of these functions is strictly based on:
#'
#' - **M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 5th Edition**, 2022, *Clinical and Laboratory Standards Institute (CLSI)*. <https://clsi.org/standards/products/microbiology/documents/m39/>.
#'

View File

@@ -107,7 +107,7 @@ format_eucast_version_nr <- function(version, markdown = TRUE) {
#' @rdname interpretive_rules
#' @export
#' @return The input of `x`, possibly with edited values of antimicrobials. Or, if `verbose = TRUE`, a [data.frame] with all original and new values of the affected bug-drug combinations.
#' @source
#' @references
#' - EUCAST Expert Rules. Version 2.0, 2012.\cr
#' Leclercq et al. **EUCAST expert rules in antimicrobial susceptibility testing.** *Clin Microbiol Infect.* 2013;19(2):141-60; \doi{https://doi.org/10.1111/j.1469-0691.2011.03703.x}
#' - EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes Tables. Version 3.1, 2016. [(link)](https://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Expert_Rules/Expert_rules_intrinsic_exceptional_V3.1.pdf)
@@ -485,14 +485,13 @@ interpretive_rules <- function(x,
if (any(c("all", "other") %in% rules)) {
if (isTRUE(info)) {
cat(paste0("\n", font_grey(strrep("-", 0.95 * getOption("width", 100))), "\n"))
cat(word_wrap(
paste0(
"Rules by the ",
font_bold(paste0("AMR package v", utils::packageDescription("AMR")$Version)),
" (", format(as.Date(utils::packageDescription("AMR")$Date), format = "%Y"),
"), see {.help [{.fun interpretive_rules}](AMR::interpretive_rules)}\n"
)
))
message_(
"Rules by the ",
font_bold(paste0("AMR package v", utils::packageDescription("AMR")$Version)),
" (", format(as.Date(utils::packageDescription("AMR")$Date), format = "%Y"),
"), see {.help [{.fun interpretive_rules}](AMR::interpretive_rules)}",
as_note = FALSE
)
cat("\n\n")
}
ab_enzyme <- subset(AMR::antimicrobials, name %like% "/")[, c("ab", "name"), drop = FALSE]
@@ -523,10 +522,11 @@ interpretive_rules <- function(x,
tolower(ab_enzyme$enzyme_name[i]), " ({.field ", font_bold(col_enzyme), "}) = R"
)
if (isTRUE(info)) {
cat(word_wrap(rule_current,
width = getOption("width") - 30,
message_(rule_current,
as_note = FALSE,
appendLF = FALSE,
extra_indent = 6
))
)
}
run_changes <- edit_sir(
x = x,
@@ -625,6 +625,7 @@ interpretive_rules <- function(x,
} else if (!is.null(list(...)$eucast_rules_df)) {
# deprecated parameter name kept for backward compatibility
interpretive_rules_df_total <- list(...)$eucast_rules_df
warning("Used interpretive_rules(x, eucast_rules_df = ...) - Do use newer argument interpretive_rules_df now.")
} else {
# internal data file, created in data-raw/_pre_commit_checks.R
interpretive_rules_df_total <- INTERPRETIVE_RULES_DF

View File

@@ -161,11 +161,11 @@ key_antimicrobials <- function(x = NULL,
if (is.null(col_mo)) {
warning_("in {.fun key_antimicrobials}: no column found for {.arg col_mo}, ignoring antibiotics set in {.arg gram_negative} and {.arg gram_positive}, and antimycotics set in {.arg antifungal}")
gramstain <- NA_character_
kingdom <- NA_character_
domain <- NA_character_
} else {
x.mo <- as.mo(x[, col_mo, drop = TRUE])
gramstain <- mo_gramstain(x.mo, language = NULL)
kingdom <- mo_kingdom(x.mo, language = NULL)
domain <- mo_domain(x.mo, language = NULL)
}
AMR_string <- function(x, values, name, filter, cols = cols) {
@@ -219,11 +219,11 @@ key_antimicrobials <- function(x = NULL,
cols = cols
)
key_ab[which(kingdom == "Fungi")] <- AMR_string(
key_ab[which(domain == "Fungi")] <- AMR_string(
x = x,
values = antifungal,
name = "antifungal",
filter = kingdom == "Fungi",
filter = domain == "Fungi",
cols = cols
)

77
R/mo.R
View File

@@ -29,7 +29,7 @@
#' Transform Arbitrary Input to Valid Microbial Taxonomy
#'
#' Use this function to get a valid microorganism code ([`mo`]) based on arbitrary user input. Determination is done using intelligent rules and the complete taxonomic tree of the kingdoms `r vector_and(unique(microorganisms$kingdom[which(!grepl("(unknown|Fungi)", microorganisms$kingdom))]), quotes = FALSE)`, and most microbial species from the kingdom Fungi (see *Source*). The input can be almost anything: a full name (like `"Staphylococcus aureus"`), an abbreviated name (such as `"S. aureus"`), an abbreviation known in the field (such as `"MRSA"`), or just a genus. See *Examples*.
#' Use this function to get a valid microorganism code ([`mo`]) based on arbitrary user input. Determination is done using intelligent rules and the complete taxonomic tree of the domains `r vector_and(unique(microorganisms$domain[which(!grepl("(unknown|Fungi)", microorganisms$domain))]), quotes = FALSE)`, and most microbial species from the domain Fungi (see *Source*). The input can be almost anything: a full name (like `"Staphylococcus aureus"`), an abbreviated name (such as `"S. aureus"`), an abbreviation known in the field (such as `"MRSA"`), or just a genus. See *Examples*.
#' @param x A [character] vector or a [data.frame] with one or two columns.
#' @param Becker A [logical] to indicate whether staphylococci should be categorised into coagulase-negative staphylococci ("CoNS") and coagulase-positive staphylococci ("CoPS") instead of their own species, according to Karsten Becker *et al.* (see *Source*). Please see *Details* for a full list of staphylococcal species that will be converted.
#'
@@ -37,14 +37,14 @@
#' @param Lancefield A [logical] to indicate whether a beta-haemolytic *Streptococcus* should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield (see *Source*). These streptococci will be categorised in their first group, e.g. *Streptococcus dysgalactiae* will be group C, although officially it was also categorised into groups G and L. . Please see *Details* for a full list of streptococcal species that will be converted.
#'
#' This excludes enterococci at default (who are in group D), use `Lancefield = "all"` to also categorise all enterococci as group D.
#' @param minimum_matching_score A numeric value to set as the lower limit for the [MO matching score][mo_matching_score()]. When left blank, this will be determined automatically based on the character length of `x`, its [taxonomic kingdom][microorganisms] and [human pathogenicity][mo_matching_score()].
#' @param minimum_matching_score A numeric value to set as the lower limit for the [MO matching score][mo_matching_score()]. When left blank, this will be determined automatically based on the character length of `x`, its [taxonomic domain][microorganisms] and [human pathogenicity][mo_matching_score()].
#' @param keep_synonyms A [logical] to indicate if outdated, previously valid taxonomic names must be preserved and not be corrected to currently accepted names. Do note that the term "synonym" is in this case jargon from the field of microbial taxonomy - it is not in place to denote that e.g. "Streptococcus Group A" is a synonym of *S. pyogenes*. Though this is practically the case, taxonomically it is not as "Streptococcus Group A" is not even a valid taxonomic name.
#'
#' The default is `FALSE`, which will return a note if outdated taxonomic names were processed. The default can be set with the package option [`AMR_keep_synonyms`][AMR-options], i.e. `options(AMR_keep_synonyms = TRUE)` or `options(AMR_keep_synonyms = FALSE)`.
#' @param reference_df A [data.frame] to be used for extra reference when translating `x` to a valid [`mo`]. See [set_mo_source()] and [get_mo_source()] to automate the usage of your own codes (e.g. used in your analysis or organisation).
#' @param ignore_pattern A Perl-compatible [regular expression][base::regex] (case-insensitive) of which all matches in `x` must return `NA`. This can be convenient to exclude known non-relevant input and can also be set with the package option [`AMR_ignore_pattern`][AMR-options], e.g. `options(AMR_ignore_pattern = "(not reported|contaminated flora)")`.
#' @param cleaning_regex A Perl-compatible [regular expression][base::regex] (case-insensitive) to clean the input of `x`. Every matched part in `x` will be removed. At default, this is the outcome of [mo_cleaning_regex()], which removes texts between brackets and texts such as "species" and "serovar". The default can be set with the package option [`AMR_cleaning_regex`][AMR-options].
#' @param only_fungi A [logical] to indicate if only fungi must be found, making sure that e.g. misspellings always return records from the kingdom of Fungi. This can be set globally for [all microorganism functions][mo_property()] with the package option [`AMR_only_fungi`][AMR-options], i.e. `options(AMR_only_fungi = TRUE)`.
#' @param only_fungi A [logical] to indicate if only fungi must be found, making sure that e.g. misspellings always return records from the domain of Fungi. This can be set globally for [all microorganism functions][mo_property()] with the package option [`AMR_only_fungi`][AMR-options], i.e. `options(AMR_only_fungi = TRUE)`.
#' @param language Language to translate text like "no growth", which defaults to the system language (see [get_AMR_locale()]).
#' @param info A [logical] to indicate that info must be printed, e.g. a progress bar when more than 25 items are to be coerced, or a list with outdated taxonomic names. The default is `TRUE` only in interactive mode.
#' @param ... Other arguments passed on to functions.
@@ -64,7 +64,7 @@
#' | | | \---> subspecies, a 3-5 letter acronym
#' | | \----> species, a 3-6 letter acronym
#' | \----> genus, a 4-8 letter acronym
#' \----> kingdom: A (Archaea), AN (Animalia), B (Bacteria),
#' \----> domain: A (Archaea), AN (Animalia), B (Bacteria),
#' C (Chromista), F (Fungi), PL (Plantae),
#' P (Protozoa)
#' ```
@@ -77,7 +77,7 @@
#'
#' ### Coping with Uncertain Results
#'
#' Results of non-exact taxonomic input are based on their [matching score][mo_matching_score()]. The lowest allowed score can be set with the `minimum_matching_score` argument. At default this will be determined based on the character length of the input, the [taxonomic kingdom][microorganisms], and the [human pathogenicity][mo_matching_score()] of the taxonomic outcome. If values are matched with uncertainty, a message will be shown to suggest the user to inspect the results with [mo_uncertainties()], which returns a [data.frame] with all specifications.
#' Results of non-exact taxonomic input are based on their [matching score][mo_matching_score()]. The lowest allowed score can be set with the `minimum_matching_score` argument. At default this will be determined based on the character length of the input, the [taxonomic domain][microorganisms], and the [human pathogenicity][mo_matching_score()] of the taxonomic outcome. If values are matched with uncertainty, a message will be shown to suggest the user to inspect the results with [mo_uncertainties()], which returns a [data.frame] with all specifications.
#'
#' To increase the quality of matching, the `cleaning_regex` argument is used to clean the input. This must be a [regular expression][base::regex] that matches parts of the input that should be removed before the input is matched against the [available microbial taxonomy][microorganisms]. It will be matched Perl-compatible and case-insensitive. The default value of `cleaning_regex` is the outcome of the helper function [mo_cleaning_regex()].
#'
@@ -241,7 +241,7 @@ as.mo <- function(x,
out[is.na(out) & toupper(x) %in% AMR_env$MO_lookup$mo] <- toupper(x[is.na(out) & toupper(x) %in% AMR_env$MO_lookup$mo])
# From full name ----
out[is.na(out) & x_lower %in% AMR_env$MO_lookup$fullname_lower] <- AMR_env$MO_lookup$mo[match(x_lower[is.na(out) & x_lower %in% AMR_env$MO_lookup$fullname_lower], AMR_env$MO_lookup$fullname_lower)]
# one exception: "Fungi" matches the kingdom, but instead it should return the 'unknown' code for fungi
# one exception: "Fungi" matches the domain, but instead it should return the 'unknown' code for fungi
out[out == "F_[KNG]_FUNGI"] <- "F_FUNGUS"
# From known codes ----
ind <- is.na(out) & toupper(x) %in% AMR::microorganisms.codes$code
@@ -300,7 +300,7 @@ as.mo <- function(x,
MO_lookup_current <- AMR_env$MO_lookup
if (isTRUE(only_fungi)) {
MO_lookup_current <- MO_lookup_current[MO_lookup_current$kingdom == "Fungi", , drop = FALSE]
MO_lookup_current <- MO_lookup_current[MO_lookup_current$domain == "Fungi", , drop = FALSE]
}
# run it
@@ -322,6 +322,15 @@ as.mo <- function(x,
return(as.character(MO_lookup_current$mo[match(x_out, MO_lookup_current$fullname_lower)]))
}
# Issue #287: "X complex" is not a distinct taxon - strip " complex" and try "X"
if (grepl(" complex$", x_out, ignore.case = FALSE)) {
x_out <- sub(" complex$", "", x_out)
x_search_cleaned <- sub(" [Cc]omplex$", "", x_search_cleaned)
if (x_out %in% MO_lookup_current$fullname_lower) {
return(as.character(MO_lookup_current$mo[match(x_out, MO_lookup_current$fullname_lower)]))
}
}
# input must not be too short
if (nchar(x_out) < 3) {
return("UNKNOWN")
@@ -343,6 +352,18 @@ as.mo <- function(x,
(MO_lookup_current$species_first == substr(x_parts[2], 1, 1) |
MO_lookup_current$subspecies_first == substr(x_parts[2], 1, 1) |
MO_lookup_current$subspecies_first == substr(x_parts[3], 1, 1)))
# Issue #288: if the species (and subspecies) word(s) in the input exactly match
# exactly one candidate, use only that candidate and bypass the 0.55 cutoff.
# This prevents prevalent bacteria from outranking a rarer organism whose species
# epithet is an unambiguous exact match, e.g. "S. apiospermum" → Scedosporium.
sp_exact <- tolower(MO_lookup_current$species[filtr]) == x_parts[2]
if (length(x_parts) == 3) {
sp_exact <- sp_exact & tolower(MO_lookup_current$subspecies[filtr]) == x_parts[3]
}
if (sum(sp_exact) == 1) {
filtr <- filtr[sp_exact]
minimum_matching_score <- 0
}
} else {
filtr <- which(MO_lookup_current$full_first == substr(x_parts[1], 1, 1) |
MO_lookup_current$species_first == substr(x_parts[2], 1, 1) |
@@ -385,8 +406,8 @@ as.mo <- function(x,
minimum_matching_score_current <- min(0.6, min(10, nchar(x_search_cleaned)) * 0.08)
# correct back for prevalence
minimum_matching_score_current <- minimum_matching_score_current / MO_lookup_current$prevalence[match(mo_to_search, MO_lookup_current$fullname)]
# correct back for kingdom
minimum_matching_score_current <- minimum_matching_score_current / MO_lookup_current$kingdom_index[match(mo_to_search, MO_lookup_current$fullname)]
# correct back for domain
minimum_matching_score_current <- minimum_matching_score_current / MO_lookup_current$domain_index[match(mo_to_search, MO_lookup_current$fullname)]
minimum_matching_score_current <- pmax(minimum_matching_score_current, m)
if (length(x_parts) > 1 && all(m <= 0.55, na.rm = TRUE)) {
# if the highest score is 0.5, we have nothing serious - 0.5 is the lowest for pathogenic group 1
@@ -647,7 +668,7 @@ NA_mo_ <- set_clean_class(NA_character_,
pillar_shaft.mo <- function(x, ...) {
add_MO_lookup_to_AMR_env()
out <- trimws(format(x))
# grey out the kingdom (part until first "_")
# grey out the domain (part until first "_")
out[!is.na(x)] <- gsub("^([A-Z]+_)(.*)", paste0(pillar::style_subtle("\\1"), "\\2"), out[!is.na(x)], perl = TRUE)
# and grey out every _
out[!is.na(x)] <- gsub("_", pillar::style_subtle("_"), out[!is.na(x)])
@@ -673,9 +694,7 @@ pillar_shaft.mo <- function(x, ...) {
(!is.null(df) && !all(unlist(df[, which(mo_cols), drop = FALSE]) %in% all_mos))) {
# markup old mo codes
out[!x %in% all_mos] <- font_italic(
pillar::style_na(x[!x %in% all_mos],
collapse = NULL
),
pillar::style_na(x[!x %in% all_mos]),
collapse = NULL
)
# throw a warning with the affected column name(s)
@@ -685,7 +704,7 @@ pillar_shaft.mo <- function(x, ...) {
col <- "The data"
}
warning_(
col, " contains old MO codes (from a previous AMR package version). ",
col, " contains old MO codes (from another AMR package version). ",
"Please update your MO codes with {.help [{.fun as.mo}](AMR::as.mo)}.",
call = FALSE
)
@@ -1002,17 +1021,19 @@ print.mo_uncertainties <- function(x, n = 10, ...) {
message_(out2, as_note = FALSE)
}
other_matches <- paste0(
"Also matched: ",
vector_and(
paste0(
candidates_formatted,
font_blue(paste0(" (", scores_formatted, ")"), collapse = NULL)
),
quotes = FALSE, sort = FALSE
if (x[i, ]$candidates != "") {
other_matches <- paste0(
"Also matched: ",
vector_and(
paste0(
candidates_formatted,
font_blue(paste0(" (", scores_formatted, ")"), collapse = NULL)
),
quotes = FALSE, sort = FALSE
)
)
)
message_(other_matches, as_note = FALSE)
message_(other_matches, as_note = FALSE)
}
}
if (isTRUE(any_maxed_out)) {
@@ -1228,13 +1249,13 @@ replace_old_mo_codes <- function(x, property) {
solved_unique <- unlist(lapply(
strsplit(affected_unique, ""),
function(m) {
kingdom <- paste0("^", m[1])
domain <- paste0("^", m[1])
name <- m[3:length(m)]
name[name == "_"] <- " "
name <- tolower(paste0(name, ".*", collapse = ""))
name <- gsub(" .*", " ", name, fixed = TRUE)
name <- paste0("^", name)
results <- AMR_env$MO_lookup$mo[AMR_env$MO_lookup$kingdom %like_case% kingdom &
results <- AMR_env$MO_lookup$mo[AMR_env$MO_lookup$domain %like_case% domain &
AMR_env$MO_lookup$fullname_lower %like_case% name]
if (length(results) > 1) {
all_direct_matches <<- FALSE
@@ -1258,14 +1279,14 @@ replace_old_mo_codes <- function(x, property) {
warning_(
"in {.help [{.fun mo_", property, "}](AMR::mo_", property, ")}: the input contained ", n_matched,
" old MO code", ifelse(n_matched == 1, "", "s"),
" (", n_unique, "from a previous AMR package version). ",
" (", n_unique, "from another AMR package version). ",
"Please update your MO codes with {.help [{.fun as.mo}](AMR::as.mo)} to increase speed."
)
} else {
warning_(
"in {.help [{.fun as.mo}](AMR::as.mo)}: the input contained ", n_matched,
" old MO code", ifelse(n_matched == 1, "", "s"),
" (", n_unique, "from a previous AMR package version). ",
" (", n_unique, "from another AMR package version). ",
n_solved, " old MO code", ifelse(n_solved == 1, "", "s"),
ifelse(n_solved == 1, " was", " were"),
ifelse(all_direct_matches, " updated ", font_bold(" guessed ")),

View File

@@ -47,7 +47,7 @@
#' * \eqn{l_n} is the length of \eqn{n};
#' * \eqn{lev} is the [Levenshtein distance function](https://en.wikipedia.org/wiki/Levenshtein_distance) (counting any insertion as 1, and any deletion or substitution as 2) that is needed to change \eqn{x} into \eqn{n};
#' * \eqn{p_n} is the human pathogenic prevalence group of \eqn{n}, as described below;
#' * \eqn{k_n} is the taxonomic kingdom of \eqn{n}, set as Bacteria = 1, Fungi = 1.25, Protozoa = 1.5, Chromista = 1.75, Archaea = 2, others = 3.
#' * \eqn{k_n} is the taxonomic domain ('kingdom' until taxonomic reclassification of 2024) of \eqn{n}, set as Bacteria = 1, Fungi = 1.25, Protozoa = 1.5, Chromista = 1.75, Archaea = 2, others = 3.
#'
#' The grouping into human pathogenic prevalence \eqn{p} is based on recent work from Bartlett *et al.* (2022, \doi{10.1099/mic.0.001269}) who extensively studied medical-scientific literature to categorise all bacterial species into these groups:
#'
@@ -122,8 +122,8 @@ mo_matching_score <- function(x, n) {
# human pathogenic prevalence (1 to 3), see ?as.mo
p_n <- AMR_env$MO_lookup[match(n, AMR_env$MO_lookup$fullname), "prevalence", drop = TRUE]
# kingdom index (Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5)
k_n <- AMR_env$MO_lookup[match(n, AMR_env$MO_lookup$fullname), "kingdom_index", drop = TRUE]
# domain index (Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5)
k_n <- AMR_env$MO_lookup[match(n, AMR_env$MO_lookup$fullname), "domain_index", drop = TRUE]
# matching score:
(l_n - 0.5 * l_n.lev) / (l_n * p_n * k_n)

View File

@@ -42,21 +42,23 @@
#' - `mo_ref("Enterobacter aerogenes")` will return `"Tindall et al., 2017"` (with a note about the renaming)
#' - `mo_ref("Enterobacter aerogenes", keep_synonyms = TRUE)` will return `"Hormaeche et al., 1960"` (with a once-per-session warning that the name is outdated)
#'
#' The short name ([mo_shortname()]) returns the first character of the genus and the full species, such as `"E. coli"`, for species and subspecies. Exceptions are abbreviations of staphylococci (such as *"CoNS"*, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (such as *"GBS"*, Group B Streptococci). Please bear in mind that e.g. *E. coli* could mean *Escherichia coli* (kingdom of Bacteria) as well as *Entamoeba coli* (kingdom of Protozoa). Returning to the full name will be done using [as.mo()] internally, giving priority to bacteria and human pathogens, i.e. `"E. coli"` will be considered *Escherichia coli*. As a result, `mo_fullname(mo_shortname("Entamoeba coli"))` returns `"Escherichia coli"`.
#' [mo_ref()] returns the abbreviated authority of the nomenclatural act that created the queried name combination. When `keep_synonyms = FALSE` (default), this is the authority of the currently accepted name. When `keep_synonyms = TRUE`, this is the authority under which the queried (possibly outdated) name was published. Emendations (changes to the species description without a name change) are not reflected; only the combination or original description authority is returned.
#'
#' Since the top-level of the taxonomy is sometimes referred to as 'kingdom' and sometimes as 'domain', the functions [mo_kingdom()] and [mo_domain()] return the exact same results.
#' The short name ([mo_shortname()]) returns the first character of the genus and the full species, such as `"E. coli"`, for species and subspecies. Exceptions are abbreviations of staphylococci (such as *"CoNS"*, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (such as *"GBS"*, Group B Streptococci). Please bear in mind that e.g. *E. coli* could mean *Escherichia coli* (kingdom of Bacteria) as well as *Entamoeba coli* (kingdom of Protozoa). Returning to the full name will be done using [as.mo()] internally, giving priority to bacteria and human pathogens, i.e. `"E. coli"` will always be considered *Escherichia coli*. As a result, `mo_fullname(mo_shortname("Entamoeba coli"))` returns `"Escherichia coli"`.
#'
#' Following the formal introduction of the new kingdom rank into prokaryotic nomenclature by G"{o}ker and Oren (2024, \doi{10.1099/ijsem.0.006242}), [mo_kingdom()] and [mo_domain()] return different results for bacteria and archaea: [mo_kingdom()] returns the new formal kingdom (e.g. "Pseudomonadati", "Bacillati"), while [mo_domain()] returns the new domain (e.g. "Bacteria", "Archaea"). For non-prokaryotic organisms, both functions return identical results.
#'
#' Determination of human pathogenicity ([mo_pathogenicity()]) is strongly based on Bartlett *et al.* (2022, \doi{10.1099/mic.0.001269}). This function returns a [factor] with the levels *Pathogenic*, *Potentially pathogenic*, *Non-pathogenic*, and *Unknown*.
#'
#' Determination of the Gram stain ([mo_gramstain()]) will be based on the taxonomic kingdom and phylum. Originally, Cavalier-Smith defined the so-called subkingdoms Negibacteria and Posibacteria (2002, [PMID 11837318](https://pubmed.ncbi.nlm.nih.gov/11837318/)), and only considered these phyla as Posibacteria: Actinobacteria, Chloroflexi, Firmicutes, and Tenericutes. These phyla were later renamed to Actinomycetota, Chloroflexota, Bacillota, and Mycoplasmatota (2021, [PMID 34694987](https://pubmed.ncbi.nlm.nih.gov/34694987/)). Bacteria in these phyla are considered Gram-positive in this `AMR` package, except for members of the class Negativicutes (within phylum Bacillota) which are Gram-negative. All other bacteria are considered Gram-negative. Species outside the kingdom of Bacteria will return a value `NA`. Functions [mo_is_gram_negative()] and [mo_is_gram_positive()] always return `TRUE` or `FALSE` (or `NA` when the input is `NA` or the MO code is `UNKNOWN`), thus always return `FALSE` for species outside the taxonomic kingdom of Bacteria.
#' Determination of the Gram stain ([mo_gramstain()] is based on the taxonomic kingdom and phylum. Originally, Cavalier-Smith defined the so-called subkingdoms Negibacteria and Posibacteria (2002, [PMID 11837318](https://pubmed.ncbi.nlm.nih.gov/11837318/)), and only considered these phyla as Posibacteria: Actinobacteria, Chloroflexi, Firmicutes, and Tenericutes. These phyla were later renamed to Actinomycetota, Chloroflexota, Bacillota, and Mycoplasmatota (2021, [PMID 34694987](https://pubmed.ncbi.nlm.nih.gov/34694987/)). Bacteria in these phyla are considered Gram-positive in this `AMR` package, except for members of the class Negativicutes (within phylum Bacillota) which are Gram-negative. All other bacteria are considered Gram-negative. Species outside the kingdom of Bacteria will return a value `NA`. Functions [mo_is_gram_negative()] and [mo_is_gram_positive()] always return `TRUE` or `FALSE` (or `NA` when the input is `NA` or the MO code is `UNKNOWN`), thus always return `FALSE` for species outside the taxonomic kingdom of Bacteria.
#'
#' Determination of yeasts ([mo_is_yeast()]) will be based on the taxonomic kingdom and class. *Budding yeasts* are yeasts that reproduce asexually through a process called budding, where a new cell develops from a small protrusion on the parent cell. Taxonomically, these are members of the phylum Ascomycota, class Saccharomycetes (also called Hemiascomycetes) or Pichiomycetes. *True yeasts* quite specifically refers to yeasts in the underlying order Saccharomycetales (such as *Saccharomyces cerevisiae*). Thus, for all microorganisms that are member of the taxonomic class Saccharomycetes or Pichiomycetes, the function will return `TRUE`. It returns `FALSE` otherwise (or `NA` when the input is `NA` or the MO code is `UNKNOWN`).
#' Determination of yeasts ([mo_is_yeast()]) is based on the taxonomic kingdom and class. *Budding yeasts* are yeasts that reproduce asexually through a process called budding, where a new cell develops from a small protrusion on the parent cell. Taxonomically, these are members of the phylum Ascomycota, class Saccharomycetes (also called Hemiascomycetes) or Pichiomycetes. *True yeasts* quite specifically refers to yeasts in the underlying order Saccharomycetales (such as *Saccharomyces cerevisiae*). Thus, for all microorganisms that are member of the taxonomic class Saccharomycetes or Pichiomycetes, the function will return `TRUE`. It returns `FALSE` otherwise (or `NA` when the input is `NA` or the MO code is `UNKNOWN`).
#'
#' Determination of intrinsic resistance ([mo_is_intrinsic_resistant()]) will be based on the [intrinsic_resistant] data set, which is based on `r format_eucast_version_nr(names(EUCAST_VERSION_EXPECTED_PHENOTYPES[1]))`. The [mo_is_intrinsic_resistant()] function can be vectorised over both argument `x` (input for microorganisms) and `ab` (input for antimicrobials).
#' Determination of intrinsic resistance ([mo_is_intrinsic_resistant()]) is based on the [intrinsic_resistant] data set, which is based on `r format_eucast_version_nr(names(EUCAST_VERSION_EXPECTED_PHENOTYPES[1]))`. The [mo_is_intrinsic_resistant()] function can be vectorised over both argument `x` (input for microorganisms) and `ab` (input for antimicrobials).
#'
#' Determination of bacterial oxygen tolerance ([mo_oxygen_tolerance()]) will be based on BacDive, see *Source*. The function [mo_is_anaerobic()] only returns `TRUE` if the oxygen tolerance is `"anaerobe"`, indicting an obligate anaerobic species or genus. It always returns `FALSE` for species outside the taxonomic kingdom of Bacteria.
#' Determination of both bacterial oxygen tolerance ([mo_oxygen_tolerance()]) and morphology ([mo_morphology()]) are based on BacDive, see *Source*. The function [mo_is_anaerobic()] only returns `TRUE` if the oxygen tolerance is `"anaerobe"`, indicating an obligate anaerobic species or genus. It always returns `FALSE` for species outside the taxonomic kingdom of Bacteria.
#'
#' The function [mo_url()] will return the direct URL to the online database entry, which also shows the scientific reference of the concerned species. [This MycoBank URL](`r TAXONOMY_VERSION$MycoBank$url`) will be used for fungi wherever available , [this LPSN URL](`r TAXONOMY_VERSION$MycoBank$url`) for bacteria wherever available, and [this GBIF link](`r TAXONOMY_VERSION$GBIF$url`) otherwise.
#' The function [mo_url()] will return the direct URL to the online database entry, which also shows the scientific reference of the concerned species. [This MycoBank URL](`r TAXONOMY_VERSION$MycoBank$url`) is used for fungi wherever available , [this LPSN URL](`r TAXONOMY_VERSION$MycoBank$url`) for bacteria wherever available, and [this GBIF link](`r TAXONOMY_VERSION$GBIF$url`) otherwise.
#'
#' SNOMED codes ([mo_snomed()]) was last updated on `r documentation_date(TAXONOMY_VERSION$SNOMED$accessed_date)`. See *Source* and the [microorganisms] data set for more info.
#'
@@ -100,8 +102,10 @@
#'
#' # other properties ---------------------------------------------------------
#'
#' mo_pathogenicity("Klebsiella pneumoniae")
#' mo_morphology("Klebsiella pneumoniae")
#' mo_gramstain("Klebsiella pneumoniae")
#' mo_gramstain("Klebsiella pneumoniae", add_morphology = TRUE)
#' mo_pathogenicity("Klebsiella pneumoniae")
#' mo_snomed("Klebsiella pneumoniae")
#' mo_type("Klebsiella pneumoniae")
#' mo_rank("Klebsiella pneumoniae")
@@ -249,8 +253,8 @@ mo_shortname <- function(x, language = get_AMR_locale(), keep_synonyms = getOpti
}
# get first char of genus and complete species in English
genera <- mo_genus(x.mo, language = NULL, keep_synonyms = keep_synonyms)
shortnames <- paste0(substr(genera, 1, 1), ". ", replace_empty(mo_species(x.mo, language = NULL, keep_synonyms = keep_synonyms)))
genera <- mo_genus(x.mo, language = NULL, keep_synonyms = keep_synonyms, ...)
shortnames <- paste0(substr(genera, 1, 1), ". ", replace_empty(mo_species(x.mo, language = NULL, keep_synonyms = keep_synonyms, ...)))
# exceptions for where no species is known
shortnames[shortnames %like% ".[.] spp[.]"] <- genera[shortnames %like% ".[.] spp[.]"]
@@ -262,7 +266,7 @@ mo_shortname <- function(x, language = get_AMR_locale(), keep_synonyms = getOpti
# unknown species etc.
shortnames[shortnames %like% "unknown"] <- paste0("(", trimws2(gsub("[^a-zA-Z -]", "", shortnames[shortnames %like% "unknown"], perl = TRUE)), ")")
shortnames[mo_rank(x.mo) %in% c("kingdom", "phylum", "class", "order", "family")] <- mo_name(x.mo[mo_rank(x.mo) %in% c("kingdom", "phylum", "class", "order", "family")], language = NULL, keep_synonyms = keep_synonyms)
shortnames[mo_rank(x.mo, keep_synonyms = TRUE, ...) %in% c("domain", "kingdom", "phylum", "class", "order", "family")] <- mo_name(x.mo[mo_rank(x.mo, keep_synonyms = TRUE, ...) %in% c("domain", "kingdom", "phylum", "class", "order", "family")], language = NULL, keep_synonyms = keep_synonyms, ...)
shortnames[is.na(x.mo)] <- NA_character_
load_mo_uncertainties(metadata)
@@ -379,7 +383,18 @@ mo_kingdom <- function(x, language = get_AMR_locale(), keep_synonyms = getOption
language <- validate_language(language)
meet_criteria(keep_synonyms, allow_class = "logical", has_length = 1)
translate_into_language(mo_validate(x = x, property = "kingdom", language = language, keep_synonyms = keep_synonyms, ...), language = language, only_unknown = TRUE)
x.mo <- as.mo(x, language = language, keep_synonyms = keep_synonyms, ...)
for (new_kingdom in c("Archaea", "Bacteria")) {
if (any(mo_domain(x.mo) == new_kingdom, na.rm = TRUE) && message_not_thrown_before("mo_kingdom", new_kingdom, entire_session = TRUE)) {
message_(
"Since {.pkg AMR v3.1.0}, {.help [{.fun mo_kingdom}](AMR::mo_kingdom)} returns the taxonomic kingdom as defined by G\u00f6ker and Oren (2024), who formally introduced a new kingdom rank into prokaryotic nomenclature ({.href [DOI: 10.1099/ijsem.0.006242](https://doi.org/10.1099/ijsem.0.006242)}). ",
"{.strong The former kingdom of ", new_kingdom, "} was divided into four new kingdoms under the {.strong new domain of ", new_kingdom, "}. ",
"For the old behaviour, use {.help [{.fun mo_domain}](AMR::mo_domain)}. ",
"This note will be shown once per session."
)
}
}
translate_into_language(mo_validate(x = x.mo, property = "kingdom", language = language, keep_synonyms = keep_synonyms, ...), language = language, only_unknown = TRUE)
}
#' @rdname mo_property
@@ -389,7 +404,11 @@ mo_domain <- function(x, language = get_AMR_locale(), keep_synonyms = getOption(
# this tries to find the data and an 'mo' column
x <- find_mo_col(fn = "mo_domain")
}
mo_kingdom(x = x, language = language, keep_synonyms = keep_synonyms, ...)
meet_criteria(x, allow_NA = TRUE)
language <- validate_language(language)
meet_criteria(keep_synonyms, allow_class = "logical", has_length = 1)
translate_into_language(mo_validate(x = x, property = "domain", language = language, keep_synonyms = keep_synonyms, ...), language = language, only_unknown = TRUE)
}
#' @rdname mo_property
@@ -404,7 +423,8 @@ mo_type <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("A
meet_criteria(keep_synonyms, allow_class = "logical", has_length = 1)
x.mo <- as.mo(x, language = language, keep_synonyms = keep_synonyms, ...)
out <- mo_kingdom(x.mo, language = NULL, keep_synonyms = keep_synonyms)
out <- mo_domain(x.mo, language = NULL, keep_synonyms = keep_synonyms)
out <- gsub(" \\{.*\\}", "", out) # strip curly brackets
out[which(mo_is_yeast(x.mo, keep_synonyms = keep_synonyms))] <- "Yeasts"
translate_into_language(out, language = language, only_unknown = FALSE)
}
@@ -440,7 +460,7 @@ mo_pathogenicity <- function(x, language = get_AMR_locale(), keep_synonyms = get
metadata <- get_mo_uncertainties()
prev <- AMR_env$MO_lookup$prevalence[match(x.mo, AMR_env$MO_lookup$mo)]
kngd <- AMR_env$MO_lookup$kingdom[match(x.mo, AMR_env$MO_lookup$mo)]
kngd <- AMR_env$MO_lookup$domain[match(x.mo, AMR_env$MO_lookup$mo)]
rank <- AMR_env$MO_lookup$rank[match(x.mo, AMR_env$MO_lookup$mo)]
out <- factor(
@@ -460,8 +480,9 @@ mo_pathogenicity <- function(x, language = get_AMR_locale(), keep_synonyms = get
}
#' @rdname mo_property
#' @param add_morphology a [logical] to indicate whether the morphology (from [mo_morphology()]) should be added to the Gram stain result, e.g. `"Gram-negative rods"` instead of `"Gram-negative"`. The default is `FALSE`.
#' @export
mo_gramstain <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) {
mo_gramstain <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), add_morphology = FALSE, ...) {
if (missing(x)) {
# this tries to find the data and an 'mo' column
x <- find_mo_col(fn = "mo_gramstain")
@@ -469,13 +490,14 @@ mo_gramstain <- function(x, language = get_AMR_locale(), keep_synonyms = getOpti
meet_criteria(x, allow_NA = TRUE)
language <- validate_language(language)
meet_criteria(keep_synonyms, allow_class = "logical", has_length = 1)
meet_criteria(add_morphology, allow_class = "logical", has_length = 1)
x.mo <- as.mo(x, language = language, keep_synonyms = keep_synonyms, ...)
metadata <- get_mo_uncertainties()
x <- rep(NA_character_, length(x))
# make all bacteria Gram negative
x[mo_kingdom(x.mo, language = NULL, keep_synonyms = keep_synonyms) == "Bacteria"] <- "Gram-negative"
x[mo_domain(x.mo, language = NULL, keep_synonyms = keep_synonyms) == "Bacteria"] <- "Gram-negative"
# overwrite these 4 phyla with Gram-positives
# Source: https://itis.gov/servlet/SingleRpt/SingleRpt?search_topic=TSN&search_value=956097 (Cavalier-Smith, 2002)
x[(mo_phylum(x.mo, language = NULL, keep_synonyms = keep_synonyms) %in% c(
@@ -494,6 +516,12 @@ mo_gramstain <- function(x, language = get_AMR_locale(), keep_synonyms = getOpti
# and of course our own ID for Gram-positives
| x.mo %in% c("B_GRAMP", "B_ANAER-POS")] <- "Gram-positive"
if (isTRUE(add_morphology)) {
morphs <- mo_morphology(x.mo, language = NULL)
morphs[is.na(x)] <- ""
x[!is.na(x)] <- paste(x[!is.na(x)], tolower(morphs[!is.na(x)]))
}
load_mo_uncertainties(metadata)
translate_into_language(x, language = language, only_unknown = FALSE)
}
@@ -552,12 +580,12 @@ mo_is_yeast <- function(x, language = get_AMR_locale(), keep_synonyms = getOptio
x.mo <- as.mo(x, language = language, keep_synonyms = keep_synonyms, ...)
metadata <- get_mo_uncertainties()
x.kingdom <- mo_kingdom(x.mo, language = NULL, keep_synonyms = keep_synonyms)
x.domain <- mo_domain(x.mo, language = NULL, keep_synonyms = keep_synonyms)
x.class <- mo_class(x.mo, language = NULL, keep_synonyms = keep_synonyms)
load_mo_uncertainties(metadata)
out <- x.mo == "F_YEAST" | (x.kingdom == "Fungi" & x.class %in% c("Saccharomycetes", "Pichiomycetes"))
out <- x.mo == "F_YEAST" | (x.domain == "Fungi" & x.class %in% c("Saccharomycetes", "Pichiomycetes"))
out[x.mo %in% c(NA_character_, "UNKNOWN")] <- NA
out
}
@@ -634,6 +662,21 @@ mo_is_anaerobic <- function(x, language = get_AMR_locale(), keep_synonyms = getO
out
}
#' @rdname mo_property
#' @export
mo_morphology <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) {
if (missing(x)) {
# this tries to find the data and an 'mo' column
x <- find_mo_col(fn = "mo_morphology")
}
meet_criteria(x, allow_NA = TRUE)
language <- validate_language(language)
meet_criteria(keep_synonyms, allow_class = "logical", has_length = 1)
out <- mo_validate(x = x, property = "morphology", language = language, keep_synonyms = keep_synonyms, ...)
gsub("^(\\w)", "\\U\\1", out, perl = TRUE)
}
#' @rdname mo_property
#' @export
mo_snomed <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) {
@@ -768,6 +811,7 @@ mo_taxonomy <- function(x, language = get_AMR_locale(), keep_synonyms = getOptio
metadata <- get_mo_uncertainties()
out <- list(
domain = mo_domain(x, language = language, keep_synonyms = keep_synonyms),
kingdom = mo_kingdom(x, language = language, keep_synonyms = keep_synonyms),
phylum = mo_phylum(x, language = language, keep_synonyms = keep_synonyms),
class = mo_class(x, language = language, keep_synonyms = keep_synonyms),
@@ -885,6 +929,7 @@ mo_info <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("A
status = mo_status(y, language = language, keep_synonyms = keep_synonyms),
synonyms = mo_synonyms(y, keep_synonyms = keep_synonyms),
gramstain = mo_gramstain(y, language = language, keep_synonyms = keep_synonyms),
morphology = mo_morphology(y, language = language, keep_synonyms = keep_synonyms),
oxygen_tolerance = mo_oxygen_tolerance(y, language = language, keep_synonyms = keep_synonyms),
url = unname(mo_url(y, open = FALSE, keep_synonyms = keep_synonyms)),
ref = mo_ref(y, keep_synonyms = keep_synonyms),
@@ -978,11 +1023,11 @@ mo_validate <- function(x, property, language, keep_synonyms = keep_synonyms, ..
dots <- list(...)
Becker <- dots$Becker
if (is.null(Becker) || property %in% c("kingdom", "phylum", "class", "order", "family", "genus")) {
if (is.null(Becker) || property %in% c("domain", "kingdom", "phylum", "class", "order", "family", "genus")) {
Becker <- FALSE
}
Lancefield <- dots$Lancefield
if (is.null(Lancefield) || property %in% c("kingdom", "phylum", "class", "order", "family", "genus")) {
if (is.null(Lancefield) || property %in% c("domain", "kingdom", "phylum", "class", "order", "family", "genus")) {
Lancefield <- FALSE
}
has_Becker_or_Lancefield <- Becker %in% c(TRUE, "all") || Lancefield %in% c(TRUE, "all")

View File

@@ -100,7 +100,7 @@
#' ```
#'
#' Using `only_all_tested` has no impact when only using one antibiotic as input.
#' @source **M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 5th Edition**, 2022, *Clinical and Laboratory Standards Institute (CLSI)*. <https://clsi.org/standards/products/microbiology/documents/m39/>.
#' @references **M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 5th Edition**, 2022, *Clinical and Laboratory Standards Institute (CLSI)*. <https://clsi.org/standards/products/microbiology/documents/m39/>.
#' @seealso [AMR::count()] to count resistant and susceptible isolates.
#' @return A [double] or, when `as_percent = TRUE`, a [character].
#' @rdname proportion

View File

@@ -175,7 +175,7 @@ VALID_SIR_LEVELS <- c("S", "SDD", "I", "R", "NI", "WT", "NWT", "NS")
#' @aliases sir
#' @export
#' @seealso [as.mic()], [as.disk()], [as.mo()]
#' @source
#' @references
#' For interpretations of minimum inhibitory concentration (MIC) values and disk diffusion diameters:
#'
#' - **CLSI M39: Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data**, `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI")$guideline)))`, *Clinical and Laboratory Standards Institute* (CLSI). <https://clsi.org/standards/products/microbiology/documents/m39/>.
@@ -525,7 +525,7 @@ as.sir.default <- function(x,
} else if (!all(is.na(x)) && !identical(levels(x), VALID_SIR_LEVELS) && !all(x %in% c(VALID_SIR_LEVELS, NA))) {
if (all(x %unlike% "(S|I|R)", na.rm = TRUE) && !all(x %in% c(1, 2, 3, 4, 5), na.rm = TRUE)) {
# check if they are actually MICs or disks
if (all_valid_mics(x)) {
if (all_valid_mics(x) && !(all_valid_disks(x) && identical(x, floor(x)))) {
warning_("in {.help [{.fun as.sir}](AMR::as.sir)}: input values were guessed to be MIC values - preferably transform them with {.help [{.fun as.mic}](AMR::as.mic)} before running {.help [{.fun as.sir}](AMR::as.sir)}.")
return(as.sir(as.mic(x), ...))
} else if (all_valid_disks(x)) {
@@ -1654,7 +1654,7 @@ as_sir_method <- function(method_short,
mo_current_other <- structure("UNKNOWN", class = c("mo", "character"))
# formatted for notes
mo_formatted <- mo_current_name
if (!mo_current_rank %in% c("kingdom", "phylum", "class", "order")) {
if (!mo_current_rank %in% c("domain", "kingdom", "phylum", "class", "order")) {
mo_formatted <- font_italic(mo_formatted, collapse = NULL)
}
ab_formatted <- paste0(

Binary file not shown.

View File

@@ -31,7 +31,7 @@
#'
#' All antimicrobial drugs and their official names, ATC codes, ATC groups and defined daily dose (DDD) are included in this package, using the WHO Collaborating Centre for Drug Statistics Methodology.
#' @section WHOCC:
#' This package contains **all ~550 antibiotic, antimycotic and antiviral drugs** and their Anatomical Therapeutic Chemical (ATC) codes, ATC groups and Defined Daily Dose (DDD) from the World Health Organization Collaborating Centre for Drug Statistics Methodology (WHOCC, <https://atcddd.fhi.no>) and the Pharmaceuticals Community Register of the European Commission (<https://ec.europa.eu/health/documents/community-register/html/reg_hum_atc.htm>).
#' This package contains **all ~550 antibiotic, antimycotic and antiviral drugs** and their Anatomical Therapeutic Chemical (ATC) codes, ATC groups and Defined Daily Dose (DDD) from the World Health Organization Collaborating Centre for Drug Statistics Methodology (WHOCC, <https://atcddd.fhi.no>) and the Pharmaceuticals Community Register of the European Commission (<https://ec.europa.eu/health/documents/community-register/html/index_en.htm>).
#'
#' These have become the gold standard for international drug utilisation monitoring and research.
#'