mirror of
https://github.com/msberends/AMR.git
synced 2025-12-22 02:20:20 +01:00
(v3.0.1.9005) re-add tidymodels implementation
This commit is contained in:
@@ -966,8 +966,13 @@ get_current_data <- function(arg_name, call) {
|
||||
# an element `.data` will be in the environment when using dplyr::select()
|
||||
return(env$`.data`)
|
||||
} else if (valid_df(env$training)) {
|
||||
# an element `training` will be in the environment when using some tidymodels functions such as `prep()`
|
||||
return(env$training)
|
||||
if (!is.null(env$x) && valid_df(env$x$template)) {
|
||||
# an element `x$template` will be in the environment when using some tidymodels functions such as `prep()`
|
||||
return(env$x$template)
|
||||
} else {
|
||||
# this is a fallback for some tidymodels functions such as `prep()`
|
||||
return(env$training)
|
||||
}
|
||||
} else if (valid_df(env$data)) {
|
||||
# an element `data` will be in the environment when using older dplyr versions, or some tidymodels functions such as `fit()`
|
||||
return(env$data)
|
||||
|
||||
@@ -163,7 +163,7 @@
|
||||
#' antimicrobials = c("TZP", "TZP+TOB", "TZP+GEN"))
|
||||
#' ```
|
||||
#'
|
||||
#' WISCA uses a sophisticated Bayesian decision model to combine both local and pooled antimicrobial resistance data. This approach not only evaluates local patterns but can also draw on multi-centre datasets to improve regimen accuracy, even in low-incidence infections like paediatric bloodstream infections (BSIs).
|
||||
#' WISCA uses a sophisticated Bayesian decision model to combine both local and pooled antimicrobial resistance data. This approach not only evaluates local patterns but can also draw on multi-centre data sets to improve regimen accuracy, even in low-incidence infections like paediatric bloodstream infections (BSIs).
|
||||
#'
|
||||
#' ### Grouped tibbles
|
||||
#'
|
||||
|
||||
@@ -99,7 +99,8 @@ atc_online_property <- function(atc_code,
|
||||
read_html <- import_fn("read_html", "xml2")
|
||||
|
||||
if (!all(atc_code %in% unlist(AMR::antimicrobials$atc))) {
|
||||
atc_code <- as.character(ab_atc(atc_code, only_first = TRUE))
|
||||
missing <- atc_code %unlike% "[A-Z][0-9][0-9][A-Z][A-Z][0-9][0-9]"
|
||||
atc_code[missing] <- as.character(ab_atc(atc_code[missing], only_first = TRUE))
|
||||
}
|
||||
|
||||
if (!has_internet()) {
|
||||
|
||||
24
R/data.R
24
R/data.R
@@ -282,7 +282,7 @@
|
||||
|
||||
#' Data Set with Clinical Breakpoints for SIR Interpretation
|
||||
#'
|
||||
#' @description Data set containing clinical breakpoints to interpret MIC and disk diffusion to SIR values, according to international guidelines. This dataset contain breakpoints for humans, `r length(unique(clinical_breakpoints$host[!clinical_breakpoints$host %in% clinical_breakpoints$type]))` different animal groups, and ECOFFs.
|
||||
#' @description Data set containing clinical breakpoints to interpret MIC and disk diffusion to SIR values, according to international guidelines. This data set contains breakpoints for humans, `r length(unique(clinical_breakpoints$host[!clinical_breakpoints$host %in% clinical_breakpoints$type]))` different animal groups, and ECOFFs.
|
||||
#'
|
||||
#' These breakpoints are currently implemented:
|
||||
#' - For **clinical microbiology**: EUCAST `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST" & type == "human")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST" & type == "human")$guideline)))` and CLSI `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "human")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "human")$guideline)))`;
|
||||
@@ -362,14 +362,14 @@
|
||||
#' dosage
|
||||
"dosage"
|
||||
|
||||
# TODO #' Data Set with `r format(nrow(esbl_isolates), big.mark = " ")` ESBL Isolates
|
||||
# TODO #'
|
||||
# TODO #' A data set containing `r format(nrow(esbl_isolates), big.mark = " ")` microbial isolates with MIC values of common antibiotics and a binary `esbl` column for extended-spectrum beta-lactamase (ESBL) production. This data set contains randomised fictitious data but reflects reality and can be used to practise AMR-related machine learning, e.g., classification modelling with [tidymodels](https://amr-for-r.org/articles/AMR_with_tidymodels.html).
|
||||
# TODO #' @format A [tibble][tibble::tibble] with `r format(nrow(esbl_isolates), big.mark = " ")` observations and `r ncol(esbl_isolates)` variables:
|
||||
# TODO #' - `esbl`\cr Logical indicator if the isolate is ESBL-producing
|
||||
# TODO #' - `genus`\cr Genus of the microorganism
|
||||
# TODO #' - `AMC:COL`\cr MIC values for 17 antimicrobial agents, transformed to class [`mic`] (see [as.mic()])
|
||||
# TODO #' @details See our [tidymodels integration][amr-tidymodels] for an example using this data set.
|
||||
# TODO #' @examples
|
||||
# TODO #' esbl_isolates
|
||||
# TODO "esbl_isolates"
|
||||
#' Data Set with `r format(nrow(esbl_isolates), big.mark = " ")` ESBL Isolates
|
||||
#'
|
||||
#' A data set containing `r format(nrow(esbl_isolates), big.mark = " ")` microbial isolates with MIC values of common antibiotics and a binary `esbl` column for extended-spectrum beta-lactamase (ESBL) production. This data set contains randomised fictitious data but reflects reality and can be used to practise AMR-related machine learning, e.g., classification modelling with [tidymodels](https://amr-for-r.org/articles/AMR_with_tidymodels.html).
|
||||
#' @format A [tibble][tibble::tibble] with `r format(nrow(esbl_isolates), big.mark = " ")` observations and `r ncol(esbl_isolates)` variables:
|
||||
#' - `esbl`\cr Logical indicator if the isolate is ESBL-producing
|
||||
#' - `genus`\cr Genus of the microorganism
|
||||
#' - `AMC:COL`\cr MIC values for 17 antimicrobial agents, transformed to class [`mic`] (see [as.mic()])
|
||||
#' @details See our [tidymodels integration][amr-tidymodels] for an example using this data set.
|
||||
#' @examples
|
||||
#' esbl_isolates
|
||||
"esbl_isolates"
|
||||
|
||||
@@ -1,20 +1,21 @@
|
||||
#' AMR Extensions for Tidymodels
|
||||
#'
|
||||
#' This family of functions allows using AMR-specific data types such as `<mic>` and `<sir>` inside `tidymodels` pipelines.
|
||||
#' This family of functions allows using AMR-specific data types such as `<sir>` and `<mic>` inside `tidymodels` pipelines.
|
||||
#' @inheritParams recipes::step_center
|
||||
#' @details
|
||||
#' You can read more in our online [AMR with tidymodels introduction](https://amr-for-r.org/articles/AMR_with_tidymodels.html).
|
||||
#'
|
||||
#' Tidyselect helpers include:
|
||||
#' - [all_mic()] and [all_mic_predictors()] to select `<mic>` columns
|
||||
#' - [all_sir()] and [all_sir_predictors()] to select `<sir>` columns
|
||||
#' - [all_sir()] and [all_sir_predictors()] to select [`<sir>`][as.sir()] columns
|
||||
#' - [all_mic()] and [all_mic_predictors()] to select [`<mic>`][as.mic()] columns
|
||||
#' - [all_disk()] and [all_disk_predictors()] to select [`<disk>`][as.disk()] columns
|
||||
#'
|
||||
#' Pre-processing pipeline steps include:
|
||||
#' - [step_mic_log2()] to convert MIC columns to numeric (via `as.numeric()`) and apply a log2 transform, to be used with [all_mic_predictors()]
|
||||
#' - [step_sir_numeric()] to convert SIR columns to numeric (via `as.numeric()`), to be used with [all_sir_predictors()]: `"S"` = 1, `"I"`/`"SDD"` = 2, `"R"` = 3. All other values are rendered `NA`. Keep this in mind for further processing, especially if the model does not allow for `NA` values.
|
||||
#' - [step_mic_log2()] to convert MIC columns to numeric (via `as.numeric()`) and apply a log2 transform, to be used with [all_mic_predictors()]
|
||||
#'
|
||||
#' These steps integrate with `recipes::recipe()` and work like standard preprocessing steps. They are useful for preparing data for modelling, especially with classification models.
|
||||
#' @seealso [recipes::recipe()], [as.mic()], [as.sir()]
|
||||
#' @seealso [recipes::recipe()], [as.sir()], [as.mic()], [as.disk()]
|
||||
#' @name amr-tidymodels
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
@@ -66,35 +67,55 @@
|
||||
#' bind_cols(out_testing)
|
||||
#'
|
||||
#' # Evaluate predictions using standard classification metrics
|
||||
#' our_metrics <- metric_set(accuracy, kap, ppv, npv)
|
||||
#' our_metrics <- metric_set(accuracy,
|
||||
#' recall,
|
||||
#' precision,
|
||||
#' sensitivity,
|
||||
#' specificity,
|
||||
#' ppv,
|
||||
#' npv)
|
||||
#' metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class)
|
||||
#'
|
||||
#' # Show performance
|
||||
#' metrics
|
||||
#' }
|
||||
all_mic <- function() {
|
||||
x <- tidymodels_amr_select(levels(NA_mic_))
|
||||
names(x)
|
||||
}
|
||||
|
||||
#' @rdname amr-tidymodels
|
||||
#' @export
|
||||
all_mic_predictors <- function() {
|
||||
x <- tidymodels_amr_select(levels(NA_mic_))
|
||||
intersect(x, recipes::has_role("predictor"))
|
||||
}
|
||||
|
||||
#' @rdname amr-tidymodels
|
||||
#' @export
|
||||
all_sir <- function() {
|
||||
x <- tidymodels_amr_select(levels(NA_sir_))
|
||||
x <- tidymodels_amr_select(class = "sir")
|
||||
names(x)
|
||||
}
|
||||
|
||||
#' @rdname amr-tidymodels
|
||||
#' @export
|
||||
all_sir_predictors <- function() {
|
||||
x <- tidymodels_amr_select(levels(NA_sir_))
|
||||
x <- tidymodels_amr_select(class = "sir")
|
||||
intersect(x, recipes::has_role("predictor"))
|
||||
}
|
||||
|
||||
#' @rdname amr-tidymodels
|
||||
#' @export
|
||||
all_mic <- function() {
|
||||
x <- tidymodels_amr_select(class = "mic")
|
||||
names(x)
|
||||
}
|
||||
|
||||
#' @rdname amr-tidymodels
|
||||
#' @export
|
||||
all_mic_predictors <- function() {
|
||||
x <- tidymodels_amr_select(class = "mic")
|
||||
intersect(x, recipes::has_role("predictor"))
|
||||
}
|
||||
|
||||
#' @rdname amr-tidymodels
|
||||
#' @export
|
||||
all_disk <- function() {
|
||||
x <- tidymodels_amr_select(class = "disk")
|
||||
names(x)
|
||||
}
|
||||
|
||||
#' @rdname amr-tidymodels
|
||||
#' @export
|
||||
all_disk_predictors <- function() {
|
||||
x <- tidymodels_amr_select(class = "disk")
|
||||
intersect(x, recipes::has_role("predictor"))
|
||||
}
|
||||
|
||||
@@ -160,7 +181,6 @@ bake.step_mic_log2 <- function(object, new_data, ...) {
|
||||
print.step_mic_log2 <- function(x, width = max(20, options()$width - 35), ...) {
|
||||
title <- "Log2 transformation of MIC columns"
|
||||
recipes::print_step(x$columns, x$terms, x$trained, title, width)
|
||||
invisible(x)
|
||||
}
|
||||
|
||||
#' @rawNamespace if(getRversion() >= "3.0.0") S3method(recipes::tidy, step_mic_log2)
|
||||
@@ -236,7 +256,6 @@ bake.step_sir_numeric <- function(object, new_data, ...) {
|
||||
print.step_sir_numeric <- function(x, width = max(20, options()$width - 35), ...) {
|
||||
title <- "Numeric transformation of SIR columns"
|
||||
recipes::print_step(x$columns, x$terms, x$trained, title, width)
|
||||
invisible(x)
|
||||
}
|
||||
|
||||
#' @rawNamespace if(getRversion() >= "3.0.0") S3method(recipes::tidy, step_sir_numeric)
|
||||
@@ -250,13 +269,13 @@ tidy.step_sir_numeric <- function(x, ...) {
|
||||
res
|
||||
}
|
||||
|
||||
tidymodels_amr_select <- function(check_vector) {
|
||||
tidymodels_amr_select <- function(class) {
|
||||
df <- get_current_data()
|
||||
ind <- which(
|
||||
vapply(
|
||||
FUN.VALUE = logical(1),
|
||||
df,
|
||||
function(x) all(x %in% c(check_vector, NA), na.rm = TRUE) & any(x %in% check_vector),
|
||||
function(x) inherits(x, class),
|
||||
USE.NAMES = TRUE
|
||||
),
|
||||
useNames = TRUE
|
||||
Reference in New Issue
Block a user