diff --git a/DESCRIPTION b/DESCRIPTION index 645817dcf..b817b7a57 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -49,7 +49,6 @@ Suggests: ggplot2, knitr, openxlsx, - parallelly, pillar, progress, readxl, diff --git a/NEWS.md b/NEWS.md index c9ce4c764..d25480ca8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,12 @@ # AMR 3.0.1.9053 +This will become release v3.1.0, intended for launch end of May. + ### New * Support for clinical breakpoints of 2026 of both CLSI and EUCAST, by adding all of their over 5,700 new clinical breakpoints to the `clinical_breakpoints` data set for usage in `as.sir()`. EUCAST 2026 is now the new default guideline for all MIC and disk diffusion interpretations. -* Integration with the **tidymodels** framework to allow seamless use of SIR, MIC and disk data in modelling pipelines via `recipes` +* Support for the [`future`](https://future.futureverse.org) package and its framework, as the previous implementation of parallel computing was slow + - **Breaking change**: `as.sir()` with `parallel = TRUE` now requires a non-sequential `future::plan()` to be active before the call — e.g., `future::plan(future::multisession)` — and throws an informative error if none is set. +* Integration with the *tidymodels* framework to allow seamless use of SIR, MIC and disk data in modelling pipelines via `recipes` - `step_mic_log2()` to transform `` columns with log2, and `step_sir_numeric()` to convert `` columns to numeric - New `tidyselect` helpers: - `all_sir()`, `all_sir_predictors()` @@ -21,7 +25,6 @@ * Two new `NA` objects, `NA_ab_` and `NA_mo_`, analogous to base R's `NA_character_` and `NA_integer_`, for use in pipelines that require typed missing values ### Fixes -* Fixed multiple bugs in the `parallel = TRUE` mode of `as.sir()` for data frames * Fixed a bug in `as.sir()` where values that were purely numeric (e.g., `"1"`) and matched the broad SIR-matching regex would be incorrectly stripped of all content by the Unicode letter filter * Fixed a bug in `as.mic()` where MIC values in scientific notation (e.g., `"1e-3"`) were incorrectly handled because the letter `e` was removed along with other Unicode letters; scientific notation `e` is now preserved * Fixed a bug in `as.ab()` where certain AB codes containing "PH" or "TH" (such as `ETH`, `MTH`, `PHE`, `PHN`, `STH`, `THA`, `THI1`) would incorrectly return `NA` when combined in a vector with any untranslatable value (#245) @@ -39,8 +42,7 @@ * Fixed `as.sir()` for data frames incorrectly treating metadata columns (e.g. `patient`, `ward`) as antibiotic columns when their names coincidentally matched an antibiotic code; column content is now validated against AMR data patterns before inclusion * Improved parallel computing in `as.sir()`: when the number of AB columns is smaller than the number of available cores, rows are now split into batches so all cores stay active (row-batch mode). Previously, a 6-column dataset on a 16-core machine would only use 6 cores; now all 16 are used, with each worker processing a smaller row slice (lower per-worker memory pressure) * Fixed false-positive `"as_wt_nwt is no longer used"` warnings that appeared during parallel `as.sir()` runs; `as_wt_nwt` is now excluded from the unused-argument check in `as_sir_method()` -* **Breaking change**: `as.sir()` with `parallel = TRUE` now requires a non-sequential `future::plan()` to be active before the call — e.g., `future::plan(future::multisession)` — and throws an informative error if none is set; previously `as.sir()` would silently set up and tear down a `multisession` plan itself, which was slow and caused version-mismatch issues with `load_all()` workflows -* Fixed `as.sir()` ignoring `info = FALSE` for columns with no breakpoints (e.g. cefoxitin against *E. coli*): an operator-precedence bug (`&&`/`||`) caused the "Interpreting MIC values" intro message to fire unconditionally when `nrow(breakpoints) == 0`, regardless of `info`; the progress bar title was also not gated by `info` +* Fixed `as.sir()` ignoring `info = FALSE` for columns with no breakpoints (e.g. cefoxitin against *E. coli*) ### Updates * `as.sir()` with `parallel = TRUE` now uses `future.apply::future_lapply()` instead of `parallel::mclapply()`/`parallel::parLapply()`, enabling transparent support for any `future` backend (including `mirai_multisession`) on all platforms; `future` and `future.apply` are now listed under `Suggests` @@ -59,7 +61,6 @@ * This results in more reliable behaviour compared to previous versions for capped MIC values * Removed the `"inverse"` option, which has now become redundant * `ab_group()` now returns values consist with the AMR selectors (#246) -* Added two new `NA` objects, `NA_ab_` and `NA_mo_`, analogous to base R's `NA_character_` and `NA_integer_`, for use in pipelines that require typed missing values # AMR 3.0.1 diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R index bae37579f..d321816aa 100644 --- a/R/aa_helper_functions.R +++ b/R/aa_helper_functions.R @@ -1681,28 +1681,6 @@ readRDS_AMR <- function(file, refhook = NULL) { readRDS(con, refhook = refhook) } -get_n_cores <- function(max_cores = Inf) { - if (pkg_is_available("parallelly", min_version = "0.8.0", also_load = FALSE)) { - available_cores <- import_fn("availableCores", "parallelly") - n_cores <- min(available_cores(), na.rm = TRUE) - } else { - # `parallel` is part of base R since 2.14.0, but detectCores() is not very precise on exotic systems like Docker and quota-set Linux environments - n_cores <- parallel::detectCores()[1] - if (is.na(n_cores)) { - n_cores <- 1 - } - } - max_cores <- floor(max_cores) - if (max_cores == 0) { - n_cores <- 1 - } else if (max_cores < 0) { - n_cores <- max(1, n_cores - abs(max_cores)) - } else if (max_cores > 0) { - n_cores <- min(n_cores, max_cores) - } - n_cores -} - # Support `where()` if tidyselect not installed ---- if (!is.null(import_fn("where", "tidyselect", error_on_fail = FALSE))) { # tidyselect::where() exists, retrieve from their namespace to make `where()`s work across the package in default arguments diff --git a/R/sir.R b/R/sir.R index b922b5577..d95ebdf0f 100755 --- a/R/sir.R +++ b/R/sir.R @@ -718,7 +718,6 @@ as.sir.disk <- function(x, #' @rdname as.sir #' @param parallel A [logical] to indicate if parallel computing must be used, defaults to `FALSE`. Requires the [`future.apply`][future.apply::future_lapply()] package. **A non-sequential [future::plan()] must already be active before setting `parallel = TRUE`** — for example, `future::plan(future::multisession)`. An error is thrown if `parallel = TRUE` is used without a plan set by the user. Parallelism distributes columns (and optionally row batches) across workers; it is most beneficial when there are many antibiotic columns and a large number of rows. -#' @param max_cores Maximum number of workers to use when `parallel = TRUE`. Use a negative value to subtract that number from the available workers, e.g. a value of `-2` means at most `nbrOfWorkers() - 2` workers will be used. Defaults to `-1` (all but one worker). There will never be more workers used than there are antibiotic columns to analyse. #' @export as.sir.data.frame <- function(x, ..., @@ -738,7 +737,6 @@ as.sir.data.frame <- function(x, verbose = FALSE, info = interactive(), parallel = FALSE, - max_cores = -1, conserve_capped_values = NULL) { meet_criteria(x, allow_class = "data.frame") # will also check for dimensions > 0 meet_criteria(col_mo, allow_class = "character", is_in = colnames(x), allow_NULL = TRUE) @@ -757,7 +755,6 @@ as.sir.data.frame <- function(x, meet_criteria(verbose, allow_class = "logical", has_length = 1) meet_criteria(info, allow_class = "logical", has_length = 1) meet_criteria(parallel, allow_class = "logical", has_length = 1) - meet_criteria(max_cores, allow_class = c("numeric", "integer"), has_length = 1) x.bak <- x if (isTRUE(info) && message_not_thrown_before("as.sir", "sir_interpretation_history")) { @@ -912,41 +909,36 @@ as.sir.data.frame <- function(x, } # set up parallel computing + if (requireNamespace("future.apply", quietly = TRUE) && !inherits(future::plan(), "sequential")) { + if (isFALSE(parallel)) { + message_("Assuming {.code parallel = TRUE} since parallel computing has been set up using the {.pkg future} package before. Set {.help [{.fun plan}](future::plan)} to sequential to prevent this.") + } + parallel <- TRUE + } if (isTRUE(parallel)) { - if (!requireNamespace("future.apply", quietly = TRUE)) { - stop_( - "Setting {.arg parallel} to {.code TRUE} requires the {.pkg future.apply} package.\n", - "Install it with: ", highlight_code('install.packages("future.apply")'), "." - ) - } - if (inherits(future::plan(), "sequential")) { - stop_( - "Setting {.arg parallel} to {.code TRUE} requires a non-sequential {.help [future::plan](future::plan)} to be active.\n", - "Set a parallel plan before calling {.help [{.fun as.sir}](AMR::as.sir)}, for example:\n", - highlight_code("future::plan(future::multisession)"), "\n", - "Or on Linux/macOS for fork-based workers:\n", - highlight_code("future::plan(future::multicore)"), "\n", - "See {.help [future::plan](future::plan)} for all available strategies.", - call = FALSE - ) - } + stop_ifnot( + requireNamespace("future.apply", quietly = TRUE), + "Setting {.code parallel = TRUE} requires the {.pkg future.apply} package.\n", + "Install it with {.code install.packages(\"future.apply\")}." + ) + stop_if(inherits(future::plan(), "sequential"), + "Setting {.code parallel = TRUE} requires a non-sequential {.help [{.fun future::plan}](future::plan)} to be active.\n", + "For your system, you could first run: {.code library(future); ", + ifelse(.Platform$OS.type == "windows" || in_rstudio(), + "plan(multisession)", + "plan(multicore)" + ), + "}", + call = FALSE + ) + n_workers <- future::nbrOfWorkers() - n_cores <- if (max_cores < 0L) { - max(1L, n_workers + max_cores) - } else { - min(max_cores, n_workers) - } - n_cores <- min(n_cores, length(ab_cols)) + n_cores <- min(n_workers, length(ab_cols)) } else { n_workers <- 1L n_cores <- 1L } - if (isTRUE(info)) { - message_(as_note = FALSE) # empty line - message_("Processing columns:", as_note = FALSE) - } - # In parallel mode suppress per-column messages: workers print simultaneously and # their output would be interleaved on the console. is_parallel_run <- isTRUE(parallel) && n_cores > 1 && length(ab_cols) > 1 @@ -957,10 +949,10 @@ as.sir.data.frame <- function(x, # gets work. pieces_per_col = ceil(n_workers / n_cols) gives ~n_workers jobs # total; each job processes one column on one row slice, which also reduces # per-worker memory pressure (smaller breakpoints search space). - pieces_per_col <- if (is_parallel_run && length(ab_cols) < n_workers) { - ceiling(n_workers / length(ab_cols)) + if (is_parallel_run && length(ab_cols) < n_workers) { + pieces_per_col <- ceiling(n_workers / length(ab_cols)) } else { - 1L + pieces_per_col <- 1L } run_as_sir_column <- function(i, rows = NULL) { @@ -1049,7 +1041,7 @@ as.sir.data.frame <- function(x, ab <- ab_col ab_coerced <- suppressWarnings(as.ab(ab, info = FALSE)) show_message <- FALSE - if (!all(x[row_idx, ab, drop = TRUE] %in% c("S", "SDD", "I", "R", "NI", NA), na.rm = TRUE)) { + if (!all(x[row_idx, ab, drop = TRUE] %in% c(VALID_SIR_LEVELS, NA), na.rm = TRUE)) { show_message <- TRUE if (isTRUE(effective_info)) { message_("\u00a0\u00a0", .amr_env$bullet_icon, " Cleaning values in column ", paste0("{.field ", font_bold(ab), "}"), " (", @@ -1086,9 +1078,9 @@ as.sir.data.frame <- function(x, if (isTRUE(info)) { message_(as_note = FALSE) if (pieces_per_col > 1L) { - message_("Running in parallel mode using ", n_cores, " out of ", get_n_cores(Inf), " workers, on columns ", vector_and(paste0("{.field ", font_bold(ab_cols, collapse = NULL), "}"), quotes = FALSE, sort = FALSE), " (", pieces_per_col, " row slices per column)...", as_note = FALSE, appendLF = FALSE) + message_("Running in parallel mode using ", n_cores, " workers, on columns ", vector_and(paste0("{.field ", font_bold(ab_cols, collapse = NULL), "}"), quotes = FALSE, sort = FALSE), " (", pieces_per_col, " row slices per column)...", as_note = FALSE, appendLF = FALSE) } else { - message_("Running in parallel mode using ", n_cores, " out of ", get_n_cores(Inf), " workers, on columns ", vector_and(paste0("{.field ", font_bold(ab_cols, collapse = NULL), "}"), quotes = FALSE, sort = FALSE), "...", as_note = FALSE, appendLF = FALSE) + message_("Running in parallel mode using ", n_cores, " workers, on columns ", vector_and(paste0("{.field ", font_bold(ab_cols, collapse = NULL), "}"), quotes = FALSE, sort = FALSE), "...", as_note = FALSE, appendLF = FALSE) } } if (pieces_per_col > 1L) { @@ -1108,7 +1100,7 @@ as.sir.data.frame <- function(x, pieces <- flat[vapply(jobs, function(j) j$col == ci, logical(1L))] list( result = as.sir(do.call(c, lapply(pieces, function(p) as.character(p$result)))), - log = { + log = { logs <- Filter(Negate(is.null), lapply(pieces, function(p) p$log)) if (length(logs) > 0L) do.call(rbind_AMR, logs) else NULL } @@ -1125,12 +1117,16 @@ as.sir.data.frame <- function(x, } } else { # sequential mode (non-parallel) - if (isTRUE(info) && get_n_cores(Inf) > 1 && NROW(x) * NCOL(x) > 10000) { + if (isTRUE(info) && n_cores > 1 && NROW(x) * NCOL(x) > 10000) { + suggest <- ifelse(.Platform$OS.type == "windows" || in_rstudio(), + "plan(multisession)", + "plan(multicore)" + ) message_(as_note = FALSE) if (requireNamespace("future.apply", quietly = TRUE)) { - message_("Running in sequential mode. To speed up processing, set a parallel plan first (e.g., ", highlight_code("future::plan(future::multisession)"), ") and then set {.arg parallel} to {.code TRUE}.\n") + message_("Running in sequential mode. To speed up processing, set a parallel {.help [{.fun future::plan}](future::plan)} such as {.code ", suggest, "}.") } else { - message_("Running in sequential mode. To speed up processing, install the {.pkg future.apply} package, set a parallel plan first (e.g., ", highlight_code("future::plan(future::multisession)"), ") and then set {.arg parallel} to {.code TRUE}.\n") + message_("Running in sequential mode. To speed up processing, install the {.pkg future.apply} package and then set {.code parallel = TRUE}.\n") } } # this will contain a progress bar already @@ -2102,7 +2098,7 @@ sir_interpretation_history <- function(clean = FALSE) { #' @noRd print.sir_log <- function(x, ...) { if (NROW(x) == 0) { - message_("No results to print. First run {.help [{.fun as.sir}](AMR::as.sir)} on MIC values or disk diffusion zones (or on a {.cls data.frame} containing any of these) to print a {.val logbook} data set here.") + message_("No results to print. First run {.help [{.fun as.sir}](AMR::as.sir)} on MIC values or disk diffusion zones (or on a {.cls data.frame} containing any of these) to print a 'logbook' data set here.") return(invisible(NULL)) } class(x) <- class(x)[class(x) != "sir_log"] @@ -2344,7 +2340,7 @@ coerce_reference_data_columns <- function(x) { ref <- AMR::clinical_breakpoints for (col in names(ref)) { col_ref <- ref[[col]] - col_x <- x[[col]] + col_x <- x[[col]] if (identical(class(col_ref), class(col_x))) next if (col == "mo") { x[[col]] <- suppressMessages(as.mo(col_x)) diff --git a/index.md b/index.md index c512a8175..78a02c0a6 100644 --- a/index.md +++ b/index.md @@ -26,12 +26,9 @@

- amr-for-r.org

-

- doi.org/10.18637/jss.v104.i03

@@ -174,24 +171,26 @@ example_isolates %>% #> ℹ Using column mo as input for `mo_fullname()` #> ℹ Using column mo as input for `mo_is_gram_negative()` #> ℹ Using column mo as input for `mo_is_intrinsic_resistant()` -#> ℹ Determining intrinsic resistance based on 'EUCAST Expected Resistant -#> Phenotypes' v1.2 (2023). This note will be shown once per session. -#> ℹ For `aminoglycosides()` using columns GEN (gentamicin), TOB (tobramycin), AMK -#> (amikacin), and KAN (kanamycin) -#> ℹ For `carbapenems()` using columns IPM (imipenem) and MEM (meropenem) +#> ℹ Determining intrinsic resistance based on 'EUCAST Expected +#> Resistant Phenotypes' v1.2 (2023). This note will be shown +#> once per session. +#> ℹ For `aminoglycosides()` using columns GEN (gentamicin), TOB +#> (tobramycin), AMK (amikacin), and KAN (kanamycin) +#> ℹ For `carbapenems()` using columns IPM (imipenem) and MEM +#> (meropenem) #> # A tibble: 35 × 7 -#> bacteria GEN TOB AMK KAN IPM MEM -#> -#> 1 Pseudomonas aeruginosa I S NA R S NA -#> 2 Pseudomonas aeruginosa I S NA R S NA -#> 3 Pseudomonas aeruginosa I S NA R S NA -#> 4 Pseudomonas aeruginosa S S S R NA S -#> 5 Pseudomonas aeruginosa S S S R S S -#> 6 Pseudomonas aeruginosa S S S R S S -#> 7 Stenotrophomonas maltophilia R R R R R R -#> 8 Pseudomonas aeruginosa S S S R NA S -#> 9 Pseudomonas aeruginosa S S S R NA S -#> 10 Pseudomonas aeruginosa S S S R S S +#> bacteria GEN TOB AMK KAN IPM MEM +#> +#> 1 Pseudomonas aer… I S NA R S NA +#> 2 Pseudomonas aer… I S NA R S NA +#> 3 Pseudomonas aer… I S NA R S NA +#> 4 Pseudomonas aer… S S S R NA S +#> 5 Pseudomonas aer… S S S R S S +#> 6 Pseudomonas aer… S S S R S S +#> 7 Stenotrophomona… R R R R R R +#> 8 Pseudomonas aer… S S S R NA S +#> 9 Pseudomonas aer… S S S R NA S +#> 10 Pseudomonas aer… S S S R S S #> # ℹ 25 more rows ``` @@ -215,23 +214,24 @@ output format automatically (such as markdown, LaTeX, HTML, etc.). ``` r antibiogram(example_isolates, antimicrobials = c(aminoglycosides(), carbapenems())) -#> ℹ For `aminoglycosides()` using columns GEN (gentamicin), TOB (tobramycin), AMK -#> (amikacin), and KAN (kanamycin) -#> ℹ For `carbapenems()` using columns IPM (imipenem) and MEM (meropenem) +#> ℹ For `aminoglycosides()` using columns GEN (gentamicin), TOB +#> (tobramycin), AMK (amikacin), and KAN (kanamycin) +#> ℹ For `carbapenems()` using columns IPM (imipenem) and MEM +#> (meropenem) ``` -| Pathogen | Amikacin | Gentamicin | Imipenem | Kanamycin | Meropenem | Tobramycin | -|:---|:---|:---|:---|:---|:---|:---| -| CoNS | 0% (0-8%,N=43) | 86% (82-90%,N=309) | 52% (37-67%,N=48) | 0% (0-8%,N=43) | 52% (37-67%,N=48) | 22% (12-35%,N=55) | -| *E. coli* | 100% (98-100%,N=171) | 98% (96-99%,N=460) | 100% (99-100%,N=422) | NA | 100% (99-100%,N=418) | 97% (96-99%,N=462) | -| *E. faecalis* | 0% (0-9%,N=39) | 0% (0-9%,N=39) | 100% (91-100%,N=38) | 0% (0-9%,N=39) | NA | 0% (0-9%,N=39) | -| *K. pneumoniae* | NA | 90% (79-96%,N=58) | 100% (93-100%,N=51) | NA | 100% (93-100%,N=53) | 90% (79-96%,N=58) | -| *P. aeruginosa* | NA | 100% (88-100%,N=30) | NA | 0% (0-12%,N=30) | NA | 100% (88-100%,N=30) | -| *P. mirabilis* | NA | 94% (80-99%,N=34) | 94% (79-99%,N=32) | NA | NA | 94% (80-99%,N=34) | -| *S. aureus* | NA | 99% (97-100%,N=233) | NA | NA | NA | 98% (92-100%,N=86) | -| *S. epidermidis* | 0% (0-8%,N=44) | 79% (71-85%,N=163) | NA | 0% (0-8%,N=44) | NA | 51% (40-61%,N=89) | -| *S. hominis* | NA | 92% (84-97%,N=80) | NA | NA | NA | 85% (74-93%,N=62) | -| *S. pneumoniae* | 0% (0-3%,N=117) | 0% (0-3%,N=117) | NA | 0% (0-3%,N=117) | NA | 0% (0-3%,N=117) | +| Pathogen | Amikacin | Gentamicin | Imipenem | Kanamycin | Meropenem | Tobramycin | +|:-----------------|:---------------------|:--------------------|:---------------------|:----------------|:---------------------|:--------------------| +| CoNS | 0% (0-8%,N=43) | 86% (82-90%,N=309) | 52% (37-67%,N=48) | 0% (0-8%,N=43) | 52% (37-67%,N=48) | 22% (12-35%,N=55) | +| *E. coli* | 100% (98-100%,N=171) | 98% (96-99%,N=460) | 100% (99-100%,N=422) | NA | 100% (99-100%,N=418) | 97% (96-99%,N=462) | +| *E. faecalis* | 0% (0-9%,N=39) | 0% (0-9%,N=39) | 100% (91-100%,N=38) | 0% (0-9%,N=39) | NA | 0% (0-9%,N=39) | +| *K. pneumoniae* | NA | 90% (79-96%,N=58) | 100% (93-100%,N=51) | NA | 100% (93-100%,N=53) | 90% (79-96%,N=58) | +| *P. aeruginosa* | NA | 100% (88-100%,N=30) | NA | 0% (0-12%,N=30) | NA | 100% (88-100%,N=30) | +| *P. mirabilis* | NA | 94% (80-99%,N=34) | 94% (79-99%,N=32) | NA | NA | 94% (80-99%,N=34) | +| *S. aureus* | NA | 99% (97-100%,N=233) | NA | NA | NA | 98% (92-100%,N=86) | +| *S. epidermidis* | 0% (0-8%,N=44) | 79% (71-85%,N=163) | NA | 0% (0-8%,N=44) | NA | 51% (40-61%,N=89) | +| *S. hominis* | NA | 92% (84-97%,N=80) | NA | NA | NA | 85% (74-93%,N=62) | +| *S. pneumoniae* | 0% (0-3%,N=117) | 0% (0-3%,N=117) | NA | 0% (0-3%,N=117) | NA | 0% (0-3%,N=117) | In combination antibiograms, it is clear that combined antimicrobials yield higher empiric coverage: @@ -242,10 +242,10 @@ antibiogram(example_isolates, mo_transform = "gramstain") ``` -| Pathogen | Piperacillin/tazobactam | Piperacillin/tazobactam + Gentamicin | Piperacillin/tazobactam + Tobramycin | -|:---|:---|:---|:---| -| Gram-negative | 88% (85-91%,N=641) | 99% (97-99%,N=691) | 98% (97-99%,N=693) | -| Gram-positive | 86% (82-89%,N=345) | 98% (96-98%,N=1044) | 95% (93-97%,N=550) | +| Pathogen | Piperacillin/tazobactam | Piperacillin/tazobactam + Gentamicin | Piperacillin/tazobactam + Tobramycin | +|:--------------|:------------------------|:-------------------------------------|:-------------------------------------| +| Gram-negative | 88% (85-91%,N=641) | 99% (97-99%,N=691) | 98% (97-99%,N=693) | +| Gram-positive | 86% (82-89%,N=345) | 98% (96-98%,N=1044) | 95% (93-97%,N=550) | Like many other functions in this package, `antibiogram()` comes with support for 28 languages that are often detected automatically based on @@ -318,16 +318,18 @@ example_isolates %>% summarise(across(c(GEN, TOB), list(total_R = resistance, conf_int = function(x) sir_confidence_interval(x, collapse = "-")))) -#> ℹ `resistance()` assumes the EUCAST guideline and thus considers the 'I' -#> category susceptible. Set the `guideline` argument or the `AMR_guideline` -#> option to either "CLSI" or "EUCAST", see `?AMR-options`. +#> ℹ `resistance()` assumes the EUCAST guideline and thus +#> considers the 'I' category susceptible. Set the `guideline` +#> argument or the `AMR_guideline` option to either "CLSI" or +#> "EUCAST", see `?AMR-options`. #> ℹ This message will be shown once per session. #> # A tibble: 3 × 5 -#> ward GEN_total_R GEN_conf_int TOB_total_R TOB_conf_int -#> -#> 1 Clinical 0.229 0.205-0.254 0.315 0.284-0.347 -#> 2 ICU 0.290 0.253-0.33 0.400 0.353-0.449 -#> 3 Outpatient 0.2 0.131-0.285 0.368 0.254-0.493 +#> ward GEN_total_R GEN_conf_int TOB_total_R +#> +#> 1 Clinical 0.229 0.205-0.254 0.315 +#> 2 ICU 0.290 0.253-0.33 0.400 +#> 3 Outpatient 0.2 0.131-0.285 0.368 +#> # ℹ 1 more variable: TOB_conf_int ``` Or use [antimicrobial @@ -344,15 +346,16 @@ out <- example_isolates %>% # calculate AMR using resistance(), over all aminoglycosides and polymyxins: summarise(across(c(aminoglycosides(), polymyxins()), resistance)) -#> ℹ For `aminoglycosides()` using columns GEN (gentamicin), TOB (tobramycin), AMK -#> (amikacin), and KAN (kanamycin) +#> ℹ For `aminoglycosides()` using columns GEN (gentamicin), TOB +#> (tobramycin), AMK (amikacin), and KAN (kanamycin) #> ℹ For `polymyxins()` using column COL (colistin) #> Warning: There was 1 warning in `summarise()`. -#> ℹ In argument: `across(c(aminoglycosides(), polymyxins()), resistance)`. +#> ℹ In argument: `across(c(aminoglycosides(), polymyxins()), +#> resistance)`. #> ℹ In group 3: `ward = "Outpatient"`. #> Caused by warning: -#> ! Introducing NA: only 23 results available for KAN in group: ward = "Outpatient" -#> (whilst `minimum = 30`). +#> ! Introducing NA: only 23 results available for KAN in group: +#> ward = "Outpatient" (whilst `minimum = 30`). out #> # A tibble: 3 × 6 #> ward GEN TOB AMK KAN COL @@ -366,11 +369,12 @@ out # transform the antibiotic columns to names: out %>% set_ab_names() #> # A tibble: 3 × 6 -#> ward gentamicin tobramycin amikacin kanamycin colistin -#> -#> 1 Clinical 0.229 0.315 0.626 1 0.780 -#> 2 ICU 0.290 0.400 0.662 1 0.857 -#> 3 Outpatient 0.2 0.368 0.605 NA 0.889 +#> ward gentamicin tobramycin amikacin kanamycin +#> +#> 1 Clinical 0.229 0.315 0.626 1 +#> 2 ICU 0.290 0.400 0.662 1 +#> 3 Outpatient 0.2 0.368 0.605 NA +#> # ℹ 1 more variable: colistin ``` ``` r diff --git a/man/amr-tidymodels.Rd b/man/amr-tidymodels.Rd index 079fcb039..2352636ed 100644 --- a/man/amr-tidymodels.Rd +++ b/man/amr-tidymodels.Rd @@ -31,24 +31,22 @@ step_sir_numeric(recipe, ..., role = NA, trained = FALSE, columns = NULL, skip = FALSE, id = recipes::rand_id("sir_numeric")) } \arguments{ -\item{recipe}{A recipe object. The step will be added to the -sequence of operations for this recipe.} +\item{recipe}{A recipe object. The step will be added to the sequence of +operations for this recipe.} -\item{...}{One or more selector functions to choose variables -for this step. See \code{\link[recipes:selections]{selections()}} for more details.} +\item{...}{One or more selector functions to choose variables for this step. +See \code{\link[recipes:selections]{selections()}} for more details.} -\item{role}{Not used by this step since no new variables are -created.} +\item{role}{Not used by this step since no new variables are created.} -\item{trained}{A logical to indicate if the quantities for -preprocessing have been estimated.} +\item{trained}{A logical to indicate if the quantities for preprocessing have +been estimated.} -\item{skip}{A logical. Should the step be skipped when the -recipe is baked by \code{\link[recipes:bake]{bake()}}? While all operations are baked -when \code{\link[recipes:prep]{prep()}} is run, some operations may not be able to be -conducted on new data (e.g. processing the outcome variable(s)). -Care should be taken when using \code{skip = TRUE} as it may affect -the computations for subsequent operations.} +\item{skip}{A logical. Should the step be skipped when the recipe is baked by +\code{\link[recipes:bake]{bake()}}? While all operations are baked when \code{\link[recipes:prep]{prep()}} is run, some +operations may not be able to be conducted on new data (e.g. processing the +outcome variable(s)). Care should be taken when using \code{skip = TRUE} as it +may affect the computations for subsequent operations.} \item{id}{A character string that is unique to this step to identify it.} } diff --git a/man/antibiogram.Rd b/man/antibiogram.Rd index bf7f232ea..940a2fd51 100644 --- a/man/antibiogram.Rd +++ b/man/antibiogram.Rd @@ -72,7 +72,7 @@ retrieve_wisca_parameters(wisca_model, ...) \item{ab_transform}{A character to transform antimicrobial input - must be one of the column names of the \link{antimicrobials} data set (defaults to \code{"name"}): \code{"ab"}, \code{"cid"}, \code{"name"}, \code{"group"}, \code{"atc"}, \code{"atc_group1"}, \code{"atc_group2"}, \code{"abbreviations"}, \code{"synonyms"}, \code{"oral_ddd"}, \code{"oral_units"}, \code{"iv_ddd"}, \code{"iv_units"}, or \code{"loinc"}. Can also be \code{NULL} to not transform the input.} -\item{syndromic_group}{A column name of \code{x}, or values calculated to split rows of \code{x}, e.g. by using \code{\link[=ifelse]{ifelse()}} or \code{\link[dplyr:case_when]{case_when()}}. See \emph{Examples}.} +\item{syndromic_group}{A column name of \code{x}, or values calculated to split rows of \code{x}, e.g. by using \code{\link[=ifelse]{ifelse()}} or \code{\link[dplyr:case-and-replace-when]{case_when()}}. See \emph{Examples}.} \item{add_total_n}{\emph{(deprecated in favour of \code{formatting_type})} A \link{logical} to indicate whether \code{n_tested} available numbers per pathogen should be added to the table (default is \code{TRUE}). This will add the lowest and highest number of available isolates per antimicrobial (e.g, if for \emph{E. coli} 200 isolates are available for ciprofloxacin and 150 for amoxicillin, the returned number will be "150-200"). This option is unavailable when \code{wisca = TRUE}; in that case, use \code{\link[=retrieve_wisca_parameters]{retrieve_wisca_parameters()}} to get the parameters used for WISCA.} diff --git a/man/as.sir.Rd b/man/as.sir.Rd index 9ae4de47f..957357982 100644 --- a/man/as.sir.Rd +++ b/man/as.sir.Rd @@ -73,7 +73,7 @@ is_sir_eligible(x, threshold = 0.05) include_PKPD = getOption("AMR_include_PKPD", TRUE), breakpoint_type = getOption("AMR_breakpoint_type", "human"), host = NULL, language = get_AMR_locale(), verbose = FALSE, info = interactive(), - parallel = FALSE, max_cores = -1, conserve_capped_values = NULL) + parallel = FALSE, conserve_capped_values = NULL) sir_interpretation_history(clean = FALSE) } @@ -152,8 +152,6 @@ The default \code{"conservative"} setting ensures cautious handling of uncertain \item{parallel}{A \link{logical} to indicate if parallel computing must be used, defaults to \code{FALSE}. Requires the \code{\link[future.apply:future_lapply]{future.apply}} package. \strong{A non-sequential \code{\link[future:plan]{future::plan()}} must already be active before setting \code{parallel = TRUE}} — for example, \code{future::plan(future::multisession)}. An error is thrown if \code{parallel = TRUE} is used without a plan set by the user. Parallelism distributes columns (and optionally row batches) across workers; it is most beneficial when there are many antibiotic columns and a large number of rows.} -\item{max_cores}{Maximum number of workers to use when \code{parallel = TRUE}. Use a negative value to subtract that number from the available workers, e.g. a value of \code{-2} means at most \code{nbrOfWorkers() - 2} workers will be used. Defaults to \code{-1} (all but one worker). There will never be more workers used than there are antibiotic columns to analyse.} - \item{clean}{A \link{logical} to indicate whether previously stored results should be forgotten after returning the 'logbook' with results.} } \value{ diff --git a/man/custom_eucast_rules.Rd b/man/custom_eucast_rules.Rd index 1ab96ad2d..355578f88 100644 --- a/man/custom_eucast_rules.Rd +++ b/man/custom_eucast_rules.Rd @@ -19,7 +19,7 @@ Define custom EUCAST rules for your organisation or specific analysis and use th Some organisations have their own adoption of EUCAST rules. This function can be used to define custom EUCAST rules to be used in the \code{\link[=eucast_rules]{eucast_rules()}} function. \subsection{Basics}{ -If you are familiar with the \code{\link[dplyr:case_when]{case_when()}} function of the \code{dplyr} package, you will recognise the input method to set your own rules. Rules must be set using what \R considers to be the 'formula notation'. The rule itself is written \emph{before} the tilde (\code{~}) and the consequence of the rule is written \emph{after} the tilde: +If you are familiar with the \code{\link[dplyr:case-and-replace-when]{case_when()}} function of the \code{dplyr} package, you will recognise the input method to set your own rules. Rules must be set using what \R considers to be the 'formula notation'. The rule itself is written \emph{before} the tilde (\code{~}) and the consequence of the rule is written \emph{after} the tilde: \if{html}{\out{
}}\preformatted{x <- custom_eucast_rules(TZP == "S" ~ aminopenicillins == "S", TZP == "R" ~ aminopenicillins == "R") diff --git a/man/custom_mdro_guideline.Rd b/man/custom_mdro_guideline.Rd index 85300ac7d..83fea9f46 100644 --- a/man/custom_mdro_guideline.Rd +++ b/man/custom_mdro_guideline.Rd @@ -26,7 +26,7 @@ Define custom a MDRO guideline for your organisation or specific analysis and us Using a custom MDRO guideline is of importance if you have custom rules to determine MDROs in your hospital, e.g., rules that are dependent on ward, state of contact isolation or other variables in your data. \subsection{Basics}{ -If you are familiar with the \code{\link[dplyr:case_when]{case_when()}} function of the \code{dplyr} package, you will recognise the input method to set your own rules. Rules must be set using what \R considers to be the 'formula notation'. The rule itself is written \emph{before} the tilde (\code{~}) and the consequence of the rule is written \emph{after} the tilde: +If you are familiar with the \code{\link[dplyr:case-and-replace-when]{case_when()}} function of the \code{dplyr} package, you will recognise the input method to set your own rules. Rules must be set using what \R considers to be the 'formula notation'. The rule itself is written \emph{before} the tilde (\code{~}) and the consequence of the rule is written \emph{after} the tilde: \if{html}{\out{
}}\preformatted{custom <- custom_mdro_guideline(CIP == "R" & age > 60 ~ "Elderly Type A", ERY == "R" & age > 60 ~ "Elderly Type B") diff --git a/man/g.test.Rd b/man/g.test.Rd index dc82a0dee..39a42bc1f 100644 --- a/man/g.test.Rd +++ b/man/g.test.Rd @@ -45,8 +45,9 @@ A list with class \code{"htest"} containing the following \item{residuals}{the Pearson residuals, \code{(observed - expected) / sqrt(expected)}.} \item{stdres}{standardized residuals, - \code{(observed - expected) / sqrt(V)}, where \code{V} is the residual cell variance (Agresti, 2007, - section 2.4.5 for the case where \code{x} is a matrix, \code{n * p * (1 - p)} otherwise).} + \code{(observed - expected) / sqrt(V)}, where \code{V} is the + residual cell variance (Agresti, 2007, section 2.4.5 + for the case where \code{x} is a matrix, \code{n * p * (1 - p)} otherwise).} } \description{ \code{\link[=g.test]{g.test()}} performs chi-squared contingency table tests and goodness-of-fit tests, just like \code{\link[=chisq.test]{chisq.test()}} but is more reliable (1). A \emph{G}-test can be used to see whether the number of observations in each category fits a theoretical expectation (called a \strong{\emph{G}-test of goodness-of-fit}), or to see whether the proportions of one variable are different for different values of the other variable (called a \strong{\emph{G}-test of independence}). diff --git a/man/pca.Rd b/man/pca.Rd index 163c65e2f..5e88c1efc 100644 --- a/man/pca.Rd +++ b/man/pca.Rd @@ -32,7 +32,7 @@ pca(x, ..., retx = TRUE, center = TRUE, scale. = TRUE, tol = NULL, standard deviations are less than or equal to \code{tol} times the standard deviation of the first component.) With the default null setting, no components are omitted (unless \code{rank.} is specified - less than \code{min(dim(x))}.). Other settings for tol could be + less than \code{min(dim(x))}.). Other settings for \code{tol} could be \code{tol = 0} or \code{tol = sqrt(.Machine$double.eps)}, which would omit essentially constant components.} diff --git a/tests/testthat/test-sir.R b/tests/testthat/test-sir.R index 2e877c8d2..598217c0e 100644 --- a/tests/testthat/test-sir.R +++ b/tests/testthat/test-sir.R @@ -408,13 +408,13 @@ test_that("test-sir.R", { # Issue #278: re-running as.sir() on already- data must preserve columns df_already_sir <- data.frame( - mo = "B_ESCHR_COLI", + mo = "B_ESCHR_COLI", AMC = as.mic(c("1", "2", "4")), GEN = sample(c("S", "I", "R"), 3, replace = TRUE), stringsAsFactors = FALSE ) - first_pass <- suppressMessages(as.sir(df_already_sir, col_mo = "mo", info = FALSE)) - second_pass <- suppressMessages(as.sir(first_pass, col_mo = "mo", info = FALSE)) + first_pass <- suppressMessages(as.sir(df_already_sir, col_mo = "mo", info = FALSE)) + second_pass <- suppressMessages(as.sir(first_pass, col_mo = "mo", info = FALSE)) expect_equal(ncol(first_pass), ncol(second_pass)) expect_true(is.sir(second_pass[["AMC"]])) expect_true(is.sir(second_pass[["GEN"]])) @@ -424,15 +424,15 @@ test_that("test-sir.R", { # Issue #278: metadata columns whose names coincidentally match antibiotic # codes (e.g. 'patient' -> OXY, 'ward' -> PRU) must not be processed df_meta <- data.frame( - mo = "B_ESCHR_COLI", + mo = "B_ESCHR_COLI", patient = paste0("Pt_", 1:20), - ward = rep(c("ICU", "Surgery", "Outpatient", "ED"), 5), - AMC = as.mic(rep(c("1", "2", "4", "8"), 5)), + ward = rep(c("ICU", "Surgery", "Outpatient", "ED"), 5), + AMC = as.mic(rep(c("1", "2", "4", "8"), 5)), stringsAsFactors = FALSE ) df_meta_sir <- suppressMessages(as.sir(df_meta, col_mo = "mo", info = FALSE)) expect_true("patient" %in% colnames(df_meta_sir)) - expect_true("ward" %in% colnames(df_meta_sir)) + expect_true("ward" %in% colnames(df_meta_sir)) expect_false(is.sir(df_meta_sir[["patient"]])) expect_false(is.sir(df_meta_sir[["ward"]])) expect_true(is.sir(df_meta_sir[["AMC"]])) @@ -444,7 +444,7 @@ test_that("test-sir.R", { set.seed(42) n_par <- 200 df_par <- data.frame( - mo = "B_ESCHR_COLI", + mo = "B_ESCHR_COLI", AMC = as.mic(sample(c("0.25", "0.5", "1", "2", "4", "8", "16", "32"), n_par, TRUE)), GEN = as.mic(sample(c("0.5", "1", "2", "4", "8", "16", "32", "64"), n_par, TRUE)), CIP = as.mic(sample(c("0.001", "0.002", "0.004", "0.008", "0.016", "0.032"), n_par, TRUE)), @@ -506,14 +506,16 @@ test_that("test-sir.R", { # verify identical output to sequential for a dataset with 2 AB columns so # pieces_per_col = ceiling(max_cores / 2) >= 2 and row batching activates df_wide <- data.frame( - mo = "B_ESCHR_COLI", + mo = "B_ESCHR_COLI", AMC = as.mic(sample(c("1", "2", "4", "8"), n_par, TRUE)), GEN = as.mic(sample(c("1", "2", "4", "8"), n_par, TRUE)), stringsAsFactors = FALSE ) sir_wide_seq <- suppressMessages(as.sir(df_wide, col_mo = "mo", info = FALSE)) - sir_wide_par <- suppressMessages(as.sir(df_wide, col_mo = "mo", info = FALSE, - parallel = TRUE, max_cores = 8L)) + sir_wide_par <- suppressMessages(as.sir(df_wide, + col_mo = "mo", info = FALSE, + parallel = TRUE, max_cores = 8L + )) expect_identical(sir_wide_seq[["AMC"]], sir_wide_par[["AMC"]]) expect_identical(sir_wide_seq[["GEN"]], sir_wide_par[["GEN"]]) @@ -536,9 +538,9 @@ test_that("custom reference_data: non-EUCAST/CLSI guideline produces R", { # coerce_reference_data_columns() will coerce mo/ab to the right class. my_bp <- clinical_breakpoints[clinical_breakpoints$method == "MIC" & clinical_breakpoints$type == "human", ][1, ] - my_bp$guideline <- "MyLab 2025" - my_bp$mo <- "B_ACHRMB_XYLS" # plain character — coerced to - my_bp$ab <- "MEM" # plain character — coerced to + my_bp$guideline <- "MyLab 2025" + my_bp$mo <- "B_ACHRMB_XYLS" # plain character — coerced to + my_bp$ab <- "MEM" # plain character — coerced to my_bp$breakpoint_S <- 8 my_bp$breakpoint_R <- 32 @@ -556,26 +558,30 @@ test_that("custom reference_data: non-EUCAST/CLSI guideline produces R", { # guideline explicitly set: same result when it matches the data expect_equal(as.character(suppressMessages( - as.sir(as.mic(64), mo = "B_ACHRMB_XYLS", ab = "MEM", - guideline = "MyLab 2025", reference_data = my_bp) + as.sir(as.mic(64), + mo = "B_ACHRMB_XYLS", ab = "MEM", + guideline = "MyLab 2025", reference_data = my_bp + ) )), "R") }) test_that("custom reference_data: host = NA acts as host-agnostic fallback", { my_bp <- clinical_breakpoints[clinical_breakpoints$method == "MIC" & clinical_breakpoints$type == "human", ][1, ] - my_bp$guideline <- "MyLab 2025" - my_bp$mo <- "B_ACHRMB_XYLS" - my_bp$ab <- "MEM" - my_bp$type <- "animal" - my_bp$host <- NA # logical NA — coerced to character by coerce_reference_data_columns() + my_bp$guideline <- "MyLab 2025" + my_bp$mo <- "B_ACHRMB_XYLS" + my_bp$ab <- "MEM" + my_bp$type <- "animal" + my_bp$host <- NA # logical NA — coerced to character by coerce_reference_data_columns() my_bp$breakpoint_S <- 8 my_bp$breakpoint_R <- 32 # NA host should match when no species-specific row exists result <- suppressMessages( - as.sir(as.mic(64), mo = "B_ACHRMB_XYLS", ab = "MEM", - host = "dogs", breakpoint_type = "animal", reference_data = my_bp) + as.sir(as.mic(64), + mo = "B_ACHRMB_XYLS", ab = "MEM", + host = "dogs", breakpoint_type = "animal", reference_data = my_bp + ) ) expect_equal(as.character(result), "R") }) diff --git a/tools/benchmark_parallel.png b/tools/benchmark_parallel.png new file mode 100644 index 000000000..a73c821d0 Binary files /dev/null and b/tools/benchmark_parallel.png differ