mirror of
https://github.com/msberends/AMR.git
synced 2025-07-01 19:18:35 +02:00
123 lines
4.6 KiB
R
123 lines
4.6 KiB
R
% Generated by roxygen2: do not edit by hand
|
|
% Please edit documentation in R/tidymodels.R
|
|
\name{amr-tidymodels}
|
|
\alias{amr-tidymodels}
|
|
\alias{all_mic}
|
|
\alias{all_mic_predictors}
|
|
\alias{all_sir}
|
|
\alias{all_sir_predictors}
|
|
\alias{step_mic_log2}
|
|
\alias{step_sir_numeric}
|
|
\title{AMR Extensions for Tidymodels}
|
|
\usage{
|
|
all_mic()
|
|
|
|
all_mic_predictors()
|
|
|
|
all_sir()
|
|
|
|
all_sir_predictors()
|
|
|
|
step_mic_log2(recipe, ..., role = NA, trained = FALSE, columns = NULL,
|
|
skip = FALSE, id = recipes::rand_id("mic_log2"))
|
|
|
|
step_sir_numeric(recipe, ..., role = NA, trained = FALSE, columns = NULL,
|
|
skip = FALSE, id = recipes::rand_id("sir_numeric"))
|
|
}
|
|
\arguments{
|
|
\item{recipe}{A recipe object. The step will be added to the sequence of
|
|
operations for this recipe.}
|
|
|
|
\item{...}{One or more selector functions to choose variables for this step.
|
|
See \code{\link[recipes:selections]{selections()}} for more details.}
|
|
|
|
\item{role}{Not used by this step since no new variables are created.}
|
|
|
|
\item{trained}{A logical to indicate if the quantities for preprocessing have
|
|
been estimated.}
|
|
|
|
\item{skip}{A logical. Should the step be skipped when the recipe is baked by
|
|
\code{\link[recipes:bake]{bake()}}? While all operations are baked when \code{\link[recipes:prep]{prep()}} is run, some
|
|
operations may not be able to be conducted on new data (e.g. processing the
|
|
outcome variable(s)). Care should be taken when using \code{skip = TRUE} as it
|
|
may affect the computations for subsequent operations.}
|
|
|
|
\item{id}{A character string that is unique to this step to identify it.}
|
|
}
|
|
\description{
|
|
This family of functions allows using AMR-specific data types such as \verb{<mic>} and \verb{<sir>} inside \code{tidymodels} pipelines.
|
|
}
|
|
\details{
|
|
You can read more in our online \href{https://amr-for-r.org/articles/AMR_with_tidymodels.html}{AMR with tidymodels introduction}.
|
|
|
|
Tidyselect helpers include:
|
|
\itemize{
|
|
\item \code{\link[=all_mic]{all_mic()}} and \code{\link[=all_mic_predictors]{all_mic_predictors()}} to select \verb{<mic>} columns
|
|
\item \code{\link[=all_sir]{all_sir()}} and \code{\link[=all_sir_predictors]{all_sir_predictors()}} to select \verb{<sir>} columns
|
|
}
|
|
|
|
Pre-processing pipeline steps include:
|
|
\itemize{
|
|
\item \code{\link[=step_mic_log2]{step_mic_log2()}} to convert MIC columns to numeric (via \code{as.numeric()}) and apply a log2 transform, to be used with \code{\link[=all_mic_predictors]{all_mic_predictors()}}
|
|
\item \code{\link[=step_sir_numeric]{step_sir_numeric()}} to convert SIR columns to numeric (via \code{as.numeric()}), to be used with \code{\link[=all_sir_predictors]{all_sir_predictors()}}: \code{"S"} = 1, \code{"I"}/\code{"SDD"} = 2, \code{"R"} = 3. All other values are rendered \code{NA}. Keep this in mind for further processing, especially if the model does not allow for \code{NA} values.
|
|
}
|
|
|
|
These steps integrate with \code{recipes::recipe()} and work like standard preprocessing steps. They are useful for preparing data for modelling, especially with classification models.
|
|
}
|
|
\examples{
|
|
library(tidymodels)
|
|
|
|
# The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703
|
|
# Presence of ESBL genes was predicted based on raw MIC values.
|
|
|
|
|
|
# example data set in the AMR package
|
|
esbl_isolates
|
|
|
|
# Prepare a binary outcome and convert to ordered factor
|
|
data <- esbl_isolates \%>\%
|
|
mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE))
|
|
|
|
# Split into training and testing sets
|
|
split <- initial_split(data)
|
|
training_data <- training(split)
|
|
testing_data <- testing(split)
|
|
|
|
# Create and prep a recipe with MIC log2 transformation
|
|
mic_recipe <- recipe(esbl ~ ., data = training_data) \%>\%
|
|
# Optionally remove non-predictive variables
|
|
remove_role(genus, old_role = "predictor") \%>\%
|
|
# Apply the log2 transformation to all MIC predictors
|
|
step_mic_log2(all_mic_predictors()) \%>\%
|
|
prep()
|
|
|
|
# View prepped recipe
|
|
mic_recipe
|
|
|
|
# Apply the recipe to training and testing data
|
|
out_training <- bake(mic_recipe, new_data = NULL)
|
|
out_testing <- bake(mic_recipe, new_data = testing_data)
|
|
|
|
# Fit a logistic regression model
|
|
fitted <- logistic_reg(mode = "classification") \%>\%
|
|
set_engine("glm") \%>\%
|
|
fit(esbl ~ ., data = out_training)
|
|
|
|
# Generate predictions on the test set
|
|
predictions <- predict(fitted, out_testing) \%>\%
|
|
bind_cols(out_testing)
|
|
|
|
# Evaluate predictions using standard classification metrics
|
|
our_metrics <- metric_set(accuracy, kap, ppv, npv)
|
|
metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class)
|
|
|
|
# Show performance:
|
|
# - negative predictive value (NPV) of ~98\%
|
|
# - positive predictive value (PPV) of ~94\%
|
|
metrics
|
|
}
|
|
\seealso{
|
|
\code{\link[recipes:recipe]{recipes::recipe()}}, \code{\link[=as.mic]{as.mic()}}, \code{\link[=as.sir]{as.sir()}}
|
|
}
|
|
\keyword{internal}
|