mirror of
https://github.com/msberends/AMR.git
synced 2025-07-19 23:53:15 +02:00
(v3.0.0.9011) allow names for age_groups()
This commit is contained in:
@ -1,5 +1,5 @@
|
||||
Package: AMR
|
||||
Version: 3.0.0.9010
|
||||
Version: 3.0.0.9011
|
||||
Date: 2025-07-17
|
||||
Title: Antimicrobial Resistance Data Analysis
|
||||
Description: Functions to simplify and standardise antimicrobial resistance (AMR)
|
||||
|
3
NEWS.md
3
NEWS.md
@ -1,4 +1,4 @@
|
||||
# AMR 3.0.0.9010
|
||||
# AMR 3.0.0.9011
|
||||
|
||||
This is primarily a bugfix release, though we added one nice feature too.
|
||||
|
||||
@ -17,6 +17,7 @@ This is primarily a bugfix release, though we added one nice feature too.
|
||||
* Fixed a bug in `ggplot_sir()` when using `combine_SI = FALSE` (#213)
|
||||
* Fixed all plotting to contain a separate colour for SDD (susceptible dose-dependent) (#223)
|
||||
* Fixed some specific Dutch translations for antimicrobials
|
||||
* Added `names` to `age_groups()` so that custom names can be given (#215)
|
||||
* Added note to `as.sir()` to make it explicit when higher-level taxonomic breakpoints are used (#218)
|
||||
* Updated `random_mic()` and `random_disk()` to set skewedness of the distribution and allow multiple microorganisms
|
||||
|
||||
|
@ -519,7 +519,7 @@ word_wrap <- function(...,
|
||||
)
|
||||
msg <- paste0(parts, collapse = "`")
|
||||
}
|
||||
msg <- gsub("`(.+?)`", font_grey_bg("\\1"), msg)
|
||||
msg <- gsub("`(.+?)`", font_grey_bg("`\\1`"), msg)
|
||||
|
||||
# clean introduced whitespace in between fullstops
|
||||
msg <- gsub("[.] +[.]", "..", msg)
|
||||
|
12
R/age.R
12
R/age.R
@ -128,9 +128,10 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE, ...) {
|
||||
|
||||
#' Split Ages into Age Groups
|
||||
#'
|
||||
#' Split ages into age groups defined by the `split` argument. This allows for easier demographic (antimicrobial resistance) analysis.
|
||||
#' Split ages into age groups defined by the `split` argument. This allows for easier demographic (antimicrobial resistance) analysis. The function returns an ordered [factor].
|
||||
#' @param x Age, e.g. calculated with [age()].
|
||||
#' @param split_at Values to split `x` at - the default is age groups 0-11, 12-24, 25-54, 55-74 and 75+. See *Details*.
|
||||
#' @param names Optional names to be given to the various age groups.
|
||||
#' @param na.rm A [logical] to indicate whether missing values should be removed.
|
||||
#' @details To split ages, the input for the `split_at` argument can be:
|
||||
#'
|
||||
@ -152,6 +153,7 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE, ...) {
|
||||
#'
|
||||
#' # split into 0-19, 20-49 and 50+
|
||||
#' age_groups(ages, c(20, 50))
|
||||
#' age_groups(ages, c(20, 50), names = c("Under 20 years", "20 to 50 years", "Over 50 years"))
|
||||
#'
|
||||
#' # split into groups of ten years
|
||||
#' age_groups(ages, 1:10 * 10)
|
||||
@ -181,9 +183,10 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE, ...) {
|
||||
#' )
|
||||
#' }
|
||||
#' }
|
||||
age_groups <- function(x, split_at = c(12, 25, 55, 75), na.rm = FALSE) {
|
||||
age_groups <- function(x, split_at = c(0, 12, 25, 55, 75), names = NULL, na.rm = FALSE) {
|
||||
meet_criteria(x, allow_class = c("numeric", "integer"), is_positive_or_zero = TRUE, is_finite = TRUE)
|
||||
meet_criteria(split_at, allow_class = c("numeric", "integer", "character"), is_positive_or_zero = TRUE, is_finite = TRUE)
|
||||
meet_criteria(names, allow_class = "character", allow_NULL = TRUE)
|
||||
meet_criteria(na.rm, allow_class = "logical", has_length = 1)
|
||||
|
||||
if (any(x < 0, na.rm = TRUE)) {
|
||||
@ -224,6 +227,11 @@ age_groups <- function(x, split_at = c(12, 25, 55, 75), na.rm = FALSE) {
|
||||
|
||||
agegroups <- factor(lbls[y], levels = lbls, ordered = TRUE)
|
||||
|
||||
if (!is.null(names)) {
|
||||
stop_ifnot(length(names) == length(levels(agegroups)), "`names` must have the same length as the number of age groups (", length(levels(agegroups)), ").")
|
||||
levels(agegroups) <- names
|
||||
}
|
||||
|
||||
if (isTRUE(na.rm)) {
|
||||
agegroups <- agegroups[!is.na(agegroups)]
|
||||
}
|
||||
|
@ -206,7 +206,7 @@ ggplot_sir <- function(data,
|
||||
meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE)
|
||||
language <- validate_language(language)
|
||||
meet_criteria(nrow, allow_class = c("numeric", "integer"), has_length = 1, allow_NULL = TRUE, is_positive = TRUE, is_finite = TRUE)
|
||||
meet_criteria(colours, allow_class = c("character", "logical"))
|
||||
meet_criteria(colours, allow_class = c("character", "logical"), allow_NULL = TRUE)
|
||||
meet_criteria(datalabels, allow_class = "logical", has_length = 1)
|
||||
meet_criteria(datalabels.size, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE)
|
||||
meet_criteria(datalabels.colour, allow_class = "character", has_length = 1)
|
||||
@ -246,7 +246,7 @@ ggplot_sir <- function(data,
|
||||
) +
|
||||
theme_sir()
|
||||
|
||||
if (fill == "interpretation") {
|
||||
if (fill == "interpretation" && !is.null(colours) && !isFALSE(colours)) {
|
||||
p <- suppressWarnings(p + scale_sir_colours(aesthetics = "fill", colours = colours))
|
||||
}
|
||||
|
||||
|
@ -19,56 +19,59 @@
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
#' @examples
|
||||
#' library(tidymodels)
|
||||
#' if (require("tidymodels")) {
|
||||
#'
|
||||
#' # The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703
|
||||
#' # Presence of ESBL genes was predicted based on raw MIC values.
|
||||
#' # The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703
|
||||
#' # Presence of ESBL genes was predicted based on raw MIC values.
|
||||
#'
|
||||
#'
|
||||
#' # example data set in the AMR package
|
||||
#' esbl_isolates
|
||||
#' # example data set in the AMR package
|
||||
#' esbl_isolates
|
||||
#'
|
||||
#' # Prepare a binary outcome and convert to ordered factor
|
||||
#' data <- esbl_isolates %>%
|
||||
#' mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE))
|
||||
#' # Prepare a binary outcome and convert to ordered factor
|
||||
#' data <- esbl_isolates %>%
|
||||
#' mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE))
|
||||
#'
|
||||
#' # Split into training and testing sets
|
||||
#' split <- initial_split(data)
|
||||
#' training_data <- training(split)
|
||||
#' testing_data <- testing(split)
|
||||
#' # Split into training and testing sets
|
||||
#' split <- initial_split(data)
|
||||
#' training_data <- training(split)
|
||||
#' testing_data <- testing(split)
|
||||
#'
|
||||
#' # Create and prep a recipe with MIC log2 transformation
|
||||
#' mic_recipe <- recipe(esbl ~ ., data = training_data) %>%
|
||||
#' # Optionally remove non-predictive variables
|
||||
#' remove_role(genus, old_role = "predictor") %>%
|
||||
#' # Apply the log2 transformation to all MIC predictors
|
||||
#' step_mic_log2(all_mic_predictors()) %>%
|
||||
#' prep()
|
||||
#' # Create and prep a recipe with MIC log2 transformation
|
||||
#' mic_recipe <- recipe(esbl ~ ., data = training_data) %>%
|
||||
#'
|
||||
#' # View prepped recipe
|
||||
#' mic_recipe
|
||||
#' # Optionally remove non-predictive variables
|
||||
#' remove_role(genus, old_role = "predictor") %>%
|
||||
#'
|
||||
#' # Apply the recipe to training and testing data
|
||||
#' out_training <- bake(mic_recipe, new_data = NULL)
|
||||
#' out_testing <- bake(mic_recipe, new_data = testing_data)
|
||||
#' # Apply the log2 transformation to all MIC predictors
|
||||
#' step_mic_log2(all_mic_predictors()) %>%
|
||||
#'
|
||||
#' # Fit a logistic regression model
|
||||
#' fitted <- logistic_reg(mode = "classification") %>%
|
||||
#' set_engine("glm") %>%
|
||||
#' fit(esbl ~ ., data = out_training)
|
||||
#' # And apply the preparation steps
|
||||
#' prep()
|
||||
#'
|
||||
#' # Generate predictions on the test set
|
||||
#' predictions <- predict(fitted, out_testing) %>%
|
||||
#' bind_cols(out_testing)
|
||||
#' # View prepped recipe
|
||||
#' mic_recipe
|
||||
#'
|
||||
#' # Evaluate predictions using standard classification metrics
|
||||
#' our_metrics <- metric_set(accuracy, kap, ppv, npv)
|
||||
#' metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class)
|
||||
#' # Apply the recipe to training and testing data
|
||||
#' out_training <- bake(mic_recipe, new_data = NULL)
|
||||
#' out_testing <- bake(mic_recipe, new_data = testing_data)
|
||||
#'
|
||||
#' # Show performance:
|
||||
#' # - negative predictive value (NPV) of ~98%
|
||||
#' # - positive predictive value (PPV) of ~94%
|
||||
#' metrics
|
||||
#' # Fit a logistic regression model
|
||||
#' fitted <- logistic_reg(mode = "classification") %>%
|
||||
#' set_engine("glm") %>%
|
||||
#' fit(esbl ~ ., data = out_training)
|
||||
#'
|
||||
#' # Generate predictions on the test set
|
||||
#' predictions <- predict(fitted, out_testing) %>%
|
||||
#' bind_cols(out_testing)
|
||||
#'
|
||||
#' # Evaluate predictions using standard classification metrics
|
||||
#' our_metrics <- metric_set(accuracy, kap, ppv, npv)
|
||||
#' metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class)
|
||||
#'
|
||||
#' # Show performance
|
||||
#' metrics
|
||||
#' }
|
||||
all_mic <- function() {
|
||||
x <- tidymodels_amr_select(levels(NA_mic_))
|
||||
names(x)
|
||||
|
@ -4,20 +4,23 @@
|
||||
\alias{age_groups}
|
||||
\title{Split Ages into Age Groups}
|
||||
\usage{
|
||||
age_groups(x, split_at = c(12, 25, 55, 75), na.rm = FALSE)
|
||||
age_groups(x, split_at = c(0, 12, 25, 55, 75), names = NULL,
|
||||
na.rm = FALSE)
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{Age, e.g. calculated with \code{\link[=age]{age()}}.}
|
||||
|
||||
\item{split_at}{Values to split \code{x} at - the default is age groups 0-11, 12-24, 25-54, 55-74 and 75+. See \emph{Details}.}
|
||||
|
||||
\item{names}{Optional names to be given to the various age groups.}
|
||||
|
||||
\item{na.rm}{A \link{logical} to indicate whether missing values should be removed.}
|
||||
}
|
||||
\value{
|
||||
Ordered \link{factor}
|
||||
}
|
||||
\description{
|
||||
Split ages into age groups defined by the \code{split} argument. This allows for easier demographic (antimicrobial resistance) analysis.
|
||||
Split ages into age groups defined by the \code{split} argument. This allows for easier demographic (antimicrobial resistance) analysis. The function returns an ordered \link{factor}.
|
||||
}
|
||||
\details{
|
||||
To split ages, the input for the \code{split_at} argument can be:
|
||||
@ -41,6 +44,7 @@ age_groups(ages, 50)
|
||||
|
||||
# split into 0-19, 20-49 and 50+
|
||||
age_groups(ages, c(20, 50))
|
||||
age_groups(ages, c(20, 50), names = c("Under 20 years", "20 to 50 years", "Over 50 years"))
|
||||
|
||||
# split into groups of ten years
|
||||
age_groups(ages, 1:10 * 10)
|
||||
|
@ -65,56 +65,59 @@ Pre-processing pipeline steps include:
|
||||
These steps integrate with \code{recipes::recipe()} and work like standard preprocessing steps. They are useful for preparing data for modelling, especially with classification models.
|
||||
}
|
||||
\examples{
|
||||
library(tidymodels)
|
||||
if (require("tidymodels")) {
|
||||
|
||||
# The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703
|
||||
# Presence of ESBL genes was predicted based on raw MIC values.
|
||||
# The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703
|
||||
# Presence of ESBL genes was predicted based on raw MIC values.
|
||||
|
||||
|
||||
# example data set in the AMR package
|
||||
esbl_isolates
|
||||
# example data set in the AMR package
|
||||
esbl_isolates
|
||||
|
||||
# Prepare a binary outcome and convert to ordered factor
|
||||
data <- esbl_isolates \%>\%
|
||||
mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE))
|
||||
# Prepare a binary outcome and convert to ordered factor
|
||||
data <- esbl_isolates \%>\%
|
||||
mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE))
|
||||
|
||||
# Split into training and testing sets
|
||||
split <- initial_split(data)
|
||||
training_data <- training(split)
|
||||
testing_data <- testing(split)
|
||||
# Split into training and testing sets
|
||||
split <- initial_split(data)
|
||||
training_data <- training(split)
|
||||
testing_data <- testing(split)
|
||||
|
||||
# Create and prep a recipe with MIC log2 transformation
|
||||
mic_recipe <- recipe(esbl ~ ., data = training_data) \%>\%
|
||||
# Optionally remove non-predictive variables
|
||||
remove_role(genus, old_role = "predictor") \%>\%
|
||||
# Apply the log2 transformation to all MIC predictors
|
||||
step_mic_log2(all_mic_predictors()) \%>\%
|
||||
prep()
|
||||
# Create and prep a recipe with MIC log2 transformation
|
||||
mic_recipe <- recipe(esbl ~ ., data = training_data) \%>\%
|
||||
|
||||
# View prepped recipe
|
||||
mic_recipe
|
||||
# Optionally remove non-predictive variables
|
||||
remove_role(genus, old_role = "predictor") \%>\%
|
||||
|
||||
# Apply the recipe to training and testing data
|
||||
out_training <- bake(mic_recipe, new_data = NULL)
|
||||
out_testing <- bake(mic_recipe, new_data = testing_data)
|
||||
# Apply the log2 transformation to all MIC predictors
|
||||
step_mic_log2(all_mic_predictors()) \%>\%
|
||||
|
||||
# Fit a logistic regression model
|
||||
fitted <- logistic_reg(mode = "classification") \%>\%
|
||||
set_engine("glm") \%>\%
|
||||
fit(esbl ~ ., data = out_training)
|
||||
# And apply the preparation steps
|
||||
prep()
|
||||
|
||||
# Generate predictions on the test set
|
||||
predictions <- predict(fitted, out_testing) \%>\%
|
||||
bind_cols(out_testing)
|
||||
# View prepped recipe
|
||||
mic_recipe
|
||||
|
||||
# Evaluate predictions using standard classification metrics
|
||||
our_metrics <- metric_set(accuracy, kap, ppv, npv)
|
||||
metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class)
|
||||
# Apply the recipe to training and testing data
|
||||
out_training <- bake(mic_recipe, new_data = NULL)
|
||||
out_testing <- bake(mic_recipe, new_data = testing_data)
|
||||
|
||||
# Show performance:
|
||||
# - negative predictive value (NPV) of ~98\%
|
||||
# - positive predictive value (PPV) of ~94\%
|
||||
metrics
|
||||
# Fit a logistic regression model
|
||||
fitted <- logistic_reg(mode = "classification") \%>\%
|
||||
set_engine("glm") \%>\%
|
||||
fit(esbl ~ ., data = out_training)
|
||||
|
||||
# Generate predictions on the test set
|
||||
predictions <- predict(fitted, out_testing) \%>\%
|
||||
bind_cols(out_testing)
|
||||
|
||||
# Evaluate predictions using standard classification metrics
|
||||
our_metrics <- metric_set(accuracy, kap, ppv, npv)
|
||||
metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class)
|
||||
|
||||
# Show performance
|
||||
metrics
|
||||
}
|
||||
}
|
||||
\seealso{
|
||||
\code{\link[recipes:recipe]{recipes::recipe()}}, \code{\link[=as.mic]{as.mic()}}, \code{\link[=as.sir]{as.sir()}}
|
||||
|
Reference in New Issue
Block a user