(v3.0.0.9003) eucast_rules fix, new tidymodels integration

2026-02-09 13:52:54 +01:00 · 2025-06-13 14:03:21 +02:00
parent 3742e9e994
commit 72db2b2562
22 changed files with 760 additions and 107 deletions
--- a/man/amr-tidymodels.Rd
+++ b/man/amr-tidymodels.Rd
@@ -0,0 +1,122 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/tidymodels.R
+\name{amr-tidymodels}
+\alias{amr-tidymodels}
+\alias{all_mic}
+\alias{all_mic_predictors}
+\alias{all_sir}
+\alias{all_sir_predictors}
+\alias{step_mic_log2}
+\alias{step_sir_numeric}
+\title{AMR Extensions for Tidymodels}
+\usage{
+all_mic()
+
+all_mic_predictors()
+
+all_sir()
+
+all_sir_predictors()
+
+step_mic_log2(recipe, ..., role = NA, trained = FALSE, columns = NULL,
+  skip = FALSE, id = recipes::rand_id("mic_log2"))
+
+step_sir_numeric(recipe, ..., role = NA, trained = FALSE, columns = NULL,
+  skip = FALSE, id = recipes::rand_id("sir_numeric"))
+}
+\arguments{
+\item{recipe}{A recipe object. The step will be added to the sequence of
+operations for this recipe.}
+
+\item{...}{One or more selector functions to choose variables for this step.
+See \code{\link[recipes:selections]{selections()}} for more details.}
+
+\item{role}{Not used by this step since no new variables are created.}
+
+\item{trained}{A logical to indicate if the quantities for preprocessing have
+been estimated.}
+
+\item{skip}{A logical. Should the step be skipped when the recipe is baked by
+\code{\link[recipes:bake]{bake()}}? While all operations are baked when \code{\link[recipes:prep]{prep()}} is run, some
+operations may not be able to be conducted on new data (e.g. processing the
+outcome variable(s)). Care should be taken when using \code{skip = TRUE} as it
+may affect the computations for subsequent operations.}
+
+\item{id}{A character string that is unique to this step to identify it.}
+}
+\description{
+This family of functions allows using AMR-specific data types such as \verb{<mic>} and \verb{<sir>} inside \code{tidymodels} pipelines.
+}
+\details{
+You can read more in our online \href{https://amr-for-r.org/articles/AMR_with_tidymodels.html}{AMR with tidymodels introduction}.
+
+Tidyselect helpers include:
+\itemize{
+\item \code{\link[=all_mic]{all_mic()}} and \code{\link[=all_mic_predictors]{all_mic_predictors()}} to select \verb{<mic>} columns
+\item \code{\link[=all_sir]{all_sir()}} and \code{\link[=all_sir_predictors]{all_sir_predictors()}} to select \verb{<sir>} columns
+}
+
+Pre-processing pipeline steps include:
+\itemize{
+\item \code{\link[=step_mic_log2]{step_mic_log2()}} to convert MIC columns to numeric (via \code{as.numeric()}) and apply a log2 transform, to be used with \code{\link[=all_mic_predictors]{all_mic_predictors()}}
+\item \code{\link[=step_sir_numeric]{step_sir_numeric()}} to convert SIR columns to numeric (via \code{as.numeric()}), to be used with \code{\link[=all_sir_predictors]{all_sir_predictors()}}: \code{"S"} = 1, \code{"I"}/\code{"SDD"} = 2, \code{"R"} = 3. All other values are rendered \code{NA}. Keep this in mind for further processing, especially if the model does not allow for \code{NA} values.
+}
+
+These steps integrate with \code{recipes::recipe()} and work like standard preprocessing steps. They are useful for preparing data for modelling, especially with classification models.
+}
+\examples{
+library(tidymodels)
+
+# The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703
+# Presence of ESBL genes was predicted based on raw MIC values.
+
+
+# example data set in the AMR package
+esbl_isolates
+
+# Prepare a binary outcome and convert to ordered factor
+data <- esbl_isolates \%>\%
+  mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE))
+
+# Split into training and testing sets
+split <- initial_split(data)
+training_data <- training(split)
+testing_data <- testing(split)
+
+# Create and prep a recipe with MIC log2 transformation
+mic_recipe <- recipe(esbl ~ ., data = training_data) \%>\%
+  # Optionally remove non-predictive variables
+  remove_role(genus, old_role = "predictor") \%>\%
+  # Apply the log2 transformation to all MIC predictors
+  step_mic_log2(all_mic_predictors()) \%>\%
+  prep()
+
+# View prepped recipe
+mic_recipe
+
+# Apply the recipe to training and testing data
+out_training <- bake(mic_recipe, new_data = NULL)
+out_testing <- bake(mic_recipe, new_data = testing_data)
+
+# Fit a logistic regression model
+fitted <- logistic_reg(mode = "classification") \%>\%
+  set_engine("glm") \%>\%
+  fit(esbl ~ ., data = out_training)
+
+# Generate predictions on the test set
+predictions <- predict(fitted, out_testing) \%>\%
+  bind_cols(out_testing)
+
+# Evaluate predictions using standard classification metrics
+our_metrics <- metric_set(accuracy, kap, ppv, npv)
+metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class)
+
+# Show performance:
+# - negative predictive value (NPV) of ~98\%
+# - positive predictive value (PPV) of ~94\%
+metrics
+}
+\seealso{
+\code{\link[recipes:recipe]{recipes::recipe()}}, \code{\link[=as.mic]{as.mic()}}, \code{\link[=as.sir]{as.sir()}}
+}
+\keyword{internal}