mirror of
https://github.com/msberends/AMR.git
synced 2025-09-03 07:04:08 +02:00
(v3.0.0.9003) eucast_rules fix, new tidymodels integration
This commit is contained in:
122
man/amr-tidymodels.Rd
Normal file
122
man/amr-tidymodels.Rd
Normal file
@@ -0,0 +1,122 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/tidymodels.R
|
||||
\name{amr-tidymodels}
|
||||
\alias{amr-tidymodels}
|
||||
\alias{all_mic}
|
||||
\alias{all_mic_predictors}
|
||||
\alias{all_sir}
|
||||
\alias{all_sir_predictors}
|
||||
\alias{step_mic_log2}
|
||||
\alias{step_sir_numeric}
|
||||
\title{AMR Extensions for Tidymodels}
|
||||
\usage{
|
||||
all_mic()
|
||||
|
||||
all_mic_predictors()
|
||||
|
||||
all_sir()
|
||||
|
||||
all_sir_predictors()
|
||||
|
||||
step_mic_log2(recipe, ..., role = NA, trained = FALSE, columns = NULL,
|
||||
skip = FALSE, id = recipes::rand_id("mic_log2"))
|
||||
|
||||
step_sir_numeric(recipe, ..., role = NA, trained = FALSE, columns = NULL,
|
||||
skip = FALSE, id = recipes::rand_id("sir_numeric"))
|
||||
}
|
||||
\arguments{
|
||||
\item{recipe}{A recipe object. The step will be added to the sequence of
|
||||
operations for this recipe.}
|
||||
|
||||
\item{...}{One or more selector functions to choose variables for this step.
|
||||
See \code{\link[recipes:selections]{selections()}} for more details.}
|
||||
|
||||
\item{role}{Not used by this step since no new variables are created.}
|
||||
|
||||
\item{trained}{A logical to indicate if the quantities for preprocessing have
|
||||
been estimated.}
|
||||
|
||||
\item{skip}{A logical. Should the step be skipped when the recipe is baked by
|
||||
\code{\link[recipes:bake]{bake()}}? While all operations are baked when \code{\link[recipes:prep]{prep()}} is run, some
|
||||
operations may not be able to be conducted on new data (e.g. processing the
|
||||
outcome variable(s)). Care should be taken when using \code{skip = TRUE} as it
|
||||
may affect the computations for subsequent operations.}
|
||||
|
||||
\item{id}{A character string that is unique to this step to identify it.}
|
||||
}
|
||||
\description{
|
||||
This family of functions allows using AMR-specific data types such as \verb{<mic>} and \verb{<sir>} inside \code{tidymodels} pipelines.
|
||||
}
|
||||
\details{
|
||||
You can read more in our online \href{https://amr-for-r.org/articles/AMR_with_tidymodels.html}{AMR with tidymodels introduction}.
|
||||
|
||||
Tidyselect helpers include:
|
||||
\itemize{
|
||||
\item \code{\link[=all_mic]{all_mic()}} and \code{\link[=all_mic_predictors]{all_mic_predictors()}} to select \verb{<mic>} columns
|
||||
\item \code{\link[=all_sir]{all_sir()}} and \code{\link[=all_sir_predictors]{all_sir_predictors()}} to select \verb{<sir>} columns
|
||||
}
|
||||
|
||||
Pre-processing pipeline steps include:
|
||||
\itemize{
|
||||
\item \code{\link[=step_mic_log2]{step_mic_log2()}} to convert MIC columns to numeric (via \code{as.numeric()}) and apply a log2 transform, to be used with \code{\link[=all_mic_predictors]{all_mic_predictors()}}
|
||||
\item \code{\link[=step_sir_numeric]{step_sir_numeric()}} to convert SIR columns to numeric (via \code{as.numeric()}), to be used with \code{\link[=all_sir_predictors]{all_sir_predictors()}}: \code{"S"} = 1, \code{"I"}/\code{"SDD"} = 2, \code{"R"} = 3. All other values are rendered \code{NA}. Keep this in mind for further processing, especially if the model does not allow for \code{NA} values.
|
||||
}
|
||||
|
||||
These steps integrate with \code{recipes::recipe()} and work like standard preprocessing steps. They are useful for preparing data for modelling, especially with classification models.
|
||||
}
|
||||
\examples{
|
||||
library(tidymodels)
|
||||
|
||||
# The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703
|
||||
# Presence of ESBL genes was predicted based on raw MIC values.
|
||||
|
||||
|
||||
# example data set in the AMR package
|
||||
esbl_isolates
|
||||
|
||||
# Prepare a binary outcome and convert to ordered factor
|
||||
data <- esbl_isolates \%>\%
|
||||
mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE))
|
||||
|
||||
# Split into training and testing sets
|
||||
split <- initial_split(data)
|
||||
training_data <- training(split)
|
||||
testing_data <- testing(split)
|
||||
|
||||
# Create and prep a recipe with MIC log2 transformation
|
||||
mic_recipe <- recipe(esbl ~ ., data = training_data) \%>\%
|
||||
# Optionally remove non-predictive variables
|
||||
remove_role(genus, old_role = "predictor") \%>\%
|
||||
# Apply the log2 transformation to all MIC predictors
|
||||
step_mic_log2(all_mic_predictors()) \%>\%
|
||||
prep()
|
||||
|
||||
# View prepped recipe
|
||||
mic_recipe
|
||||
|
||||
# Apply the recipe to training and testing data
|
||||
out_training <- bake(mic_recipe, new_data = NULL)
|
||||
out_testing <- bake(mic_recipe, new_data = testing_data)
|
||||
|
||||
# Fit a logistic regression model
|
||||
fitted <- logistic_reg(mode = "classification") \%>\%
|
||||
set_engine("glm") \%>\%
|
||||
fit(esbl ~ ., data = out_training)
|
||||
|
||||
# Generate predictions on the test set
|
||||
predictions <- predict(fitted, out_testing) \%>\%
|
||||
bind_cols(out_testing)
|
||||
|
||||
# Evaluate predictions using standard classification metrics
|
||||
our_metrics <- metric_set(accuracy, kap, ppv, npv)
|
||||
metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class)
|
||||
|
||||
# Show performance:
|
||||
# - negative predictive value (NPV) of ~98\%
|
||||
# - positive predictive value (PPV) of ~94\%
|
||||
metrics
|
||||
}
|
||||
\seealso{
|
||||
\code{\link[recipes:recipe]{recipes::recipe()}}, \code{\link[=as.mic]{as.mic()}}, \code{\link[=as.sir]{as.sir()}}
|
||||
}
|
||||
\keyword{internal}
|
Reference in New Issue
Block a user