% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tidymodels.R \name{amr-tidymodels} \alias{amr-tidymodels} \alias{all_mic} \alias{all_mic_predictors} \alias{all_sir} \alias{all_sir_predictors} \alias{step_mic_log2} \alias{step_sir_numeric} \title{AMR Extensions for Tidymodels} \usage{ all_mic() all_mic_predictors() all_sir() all_sir_predictors() step_mic_log2(recipe, ..., role = NA, trained = FALSE, columns = NULL, skip = FALSE, id = recipes::rand_id("mic_log2")) step_sir_numeric(recipe, ..., role = NA, trained = FALSE, columns = NULL, skip = FALSE, id = recipes::rand_id("sir_numeric")) } \arguments{ \item{recipe}{A recipe object. The step will be added to the sequence of operations for this recipe.} \item{...}{One or more selector functions to choose variables for this step. See \code{\link[recipes:selections]{selections()}} for more details.} \item{role}{Not used by this step since no new variables are created.} \item{trained}{A logical to indicate if the quantities for preprocessing have been estimated.} \item{skip}{A logical. Should the step be skipped when the recipe is baked by \code{\link[recipes:bake]{bake()}}? While all operations are baked when \code{\link[recipes:prep]{prep()}} is run, some operations may not be able to be conducted on new data (e.g. processing the outcome variable(s)). Care should be taken when using \code{skip = TRUE} as it may affect the computations for subsequent operations.} \item{id}{A character string that is unique to this step to identify it.} } \description{ This family of functions allows using AMR-specific data types such as \verb{} and \verb{} inside \code{tidymodels} pipelines. } \details{ You can read more in our online \href{https://amr-for-r.org/articles/AMR_with_tidymodels.html}{AMR with tidymodels introduction}. Tidyselect helpers include: \itemize{ \item \code{\link[=all_mic]{all_mic()}} and \code{\link[=all_mic_predictors]{all_mic_predictors()}} to select \verb{} columns \item \code{\link[=all_sir]{all_sir()}} and \code{\link[=all_sir_predictors]{all_sir_predictors()}} to select \verb{} columns } Pre-processing pipeline steps include: \itemize{ \item \code{\link[=step_mic_log2]{step_mic_log2()}} to convert MIC columns to numeric (via \code{as.numeric()}) and apply a log2 transform, to be used with \code{\link[=all_mic_predictors]{all_mic_predictors()}} \item \code{\link[=step_sir_numeric]{step_sir_numeric()}} to convert SIR columns to numeric (via \code{as.numeric()}), to be used with \code{\link[=all_sir_predictors]{all_sir_predictors()}}: \code{"S"} = 1, \code{"I"}/\code{"SDD"} = 2, \code{"R"} = 3. All other values are rendered \code{NA}. Keep this in mind for further processing, especially if the model does not allow for \code{NA} values. } These steps integrate with \code{recipes::recipe()} and work like standard preprocessing steps. They are useful for preparing data for modelling, especially with classification models. } \examples{ library(tidymodels) # The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703 # Presence of ESBL genes was predicted based on raw MIC values. # example data set in the AMR package esbl_isolates # Prepare a binary outcome and convert to ordered factor data <- esbl_isolates \%>\% mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE)) # Split into training and testing sets split <- initial_split(data) training_data <- training(split) testing_data <- testing(split) # Create and prep a recipe with MIC log2 transformation mic_recipe <- recipe(esbl ~ ., data = training_data) \%>\% # Optionally remove non-predictive variables remove_role(genus, old_role = "predictor") \%>\% # Apply the log2 transformation to all MIC predictors step_mic_log2(all_mic_predictors()) \%>\% prep() # View prepped recipe mic_recipe # Apply the recipe to training and testing data out_training <- bake(mic_recipe, new_data = NULL) out_testing <- bake(mic_recipe, new_data = testing_data) # Fit a logistic regression model fitted <- logistic_reg(mode = "classification") \%>\% set_engine("glm") \%>\% fit(esbl ~ ., data = out_training) # Generate predictions on the test set predictions <- predict(fitted, out_testing) \%>\% bind_cols(out_testing) # Evaluate predictions using standard classification metrics our_metrics <- metric_set(accuracy, kap, ppv, npv) metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class) # Show performance: # - negative predictive value (NPV) of ~98\% # - positive predictive value (PPV) of ~94\% metrics } \seealso{ \code{\link[recipes:recipe]{recipes::recipe()}}, \code{\link[=as.mic]{as.mic()}}, \code{\link[=as.sir]{as.sir()}} } \keyword{internal}