diff --git a/DESCRIPTION b/DESCRIPTION index 341e2a3d3..2c0f56fc2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: AMR -Version: 3.0.0.9010 +Version: 3.0.0.9011 Date: 2025-07-17 Title: Antimicrobial Resistance Data Analysis Description: Functions to simplify and standardise antimicrobial resistance (AMR) diff --git a/NEWS.md b/NEWS.md index bb441313b..17d94d557 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# AMR 3.0.0.9010 +# AMR 3.0.0.9011 This is primarily a bugfix release, though we added one nice feature too. @@ -17,6 +17,7 @@ This is primarily a bugfix release, though we added one nice feature too. * Fixed a bug in `ggplot_sir()` when using `combine_SI = FALSE` (#213) * Fixed all plotting to contain a separate colour for SDD (susceptible dose-dependent) (#223) * Fixed some specific Dutch translations for antimicrobials +* Added `names` to `age_groups()` so that custom names can be given (#215) * Added note to `as.sir()` to make it explicit when higher-level taxonomic breakpoints are used (#218) * Updated `random_mic()` and `random_disk()` to set skewedness of the distribution and allow multiple microorganisms diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R index 6057a0465..8cf625d49 100644 --- a/R/aa_helper_functions.R +++ b/R/aa_helper_functions.R @@ -519,7 +519,7 @@ word_wrap <- function(..., ) msg <- paste0(parts, collapse = "`") } - msg <- gsub("`(.+?)`", font_grey_bg("\\1"), msg) + msg <- gsub("`(.+?)`", font_grey_bg("`\\1`"), msg) # clean introduced whitespace in between fullstops msg <- gsub("[.] +[.]", "..", msg) diff --git a/R/age.R b/R/age.R index 903fc380d..9fb436466 100755 --- a/R/age.R +++ b/R/age.R @@ -128,9 +128,10 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE, ...) { #' Split Ages into Age Groups #' -#' Split ages into age groups defined by the `split` argument. This allows for easier demographic (antimicrobial resistance) analysis. +#' Split ages into age groups defined by the `split` argument. This allows for easier demographic (antimicrobial resistance) analysis. The function returns an ordered [factor]. #' @param x Age, e.g. calculated with [age()]. #' @param split_at Values to split `x` at - the default is age groups 0-11, 12-24, 25-54, 55-74 and 75+. See *Details*. +#' @param names Optional names to be given to the various age groups. #' @param na.rm A [logical] to indicate whether missing values should be removed. #' @details To split ages, the input for the `split_at` argument can be: #' @@ -152,6 +153,7 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE, ...) { #' #' # split into 0-19, 20-49 and 50+ #' age_groups(ages, c(20, 50)) +#' age_groups(ages, c(20, 50), names = c("Under 20 years", "20 to 50 years", "Over 50 years")) #' #' # split into groups of ten years #' age_groups(ages, 1:10 * 10) @@ -181,9 +183,10 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE, ...) { #' ) #' } #' } -age_groups <- function(x, split_at = c(12, 25, 55, 75), na.rm = FALSE) { +age_groups <- function(x, split_at = c(0, 12, 25, 55, 75), names = NULL, na.rm = FALSE) { meet_criteria(x, allow_class = c("numeric", "integer"), is_positive_or_zero = TRUE, is_finite = TRUE) meet_criteria(split_at, allow_class = c("numeric", "integer", "character"), is_positive_or_zero = TRUE, is_finite = TRUE) + meet_criteria(names, allow_class = "character", allow_NULL = TRUE) meet_criteria(na.rm, allow_class = "logical", has_length = 1) if (any(x < 0, na.rm = TRUE)) { @@ -224,6 +227,11 @@ age_groups <- function(x, split_at = c(12, 25, 55, 75), na.rm = FALSE) { agegroups <- factor(lbls[y], levels = lbls, ordered = TRUE) + if (!is.null(names)) { + stop_ifnot(length(names) == length(levels(agegroups)), "`names` must have the same length as the number of age groups (", length(levels(agegroups)), ").") + levels(agegroups) <- names + } + if (isTRUE(na.rm)) { agegroups <- agegroups[!is.na(agegroups)] } diff --git a/R/ggplot_sir.R b/R/ggplot_sir.R index e4fdafe7c..fc3b64082 100755 --- a/R/ggplot_sir.R +++ b/R/ggplot_sir.R @@ -206,7 +206,7 @@ ggplot_sir <- function(data, meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE) language <- validate_language(language) meet_criteria(nrow, allow_class = c("numeric", "integer"), has_length = 1, allow_NULL = TRUE, is_positive = TRUE, is_finite = TRUE) - meet_criteria(colours, allow_class = c("character", "logical")) + meet_criteria(colours, allow_class = c("character", "logical"), allow_NULL = TRUE) meet_criteria(datalabels, allow_class = "logical", has_length = 1) meet_criteria(datalabels.size, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE) meet_criteria(datalabels.colour, allow_class = "character", has_length = 1) @@ -246,7 +246,7 @@ ggplot_sir <- function(data, ) + theme_sir() - if (fill == "interpretation") { + if (fill == "interpretation" && !is.null(colours) && !isFALSE(colours)) { p <- suppressWarnings(p + scale_sir_colours(aesthetics = "fill", colours = colours)) } diff --git a/R/tidymodels.R b/R/tidymodels.R index a5722e568..100d5078c 100644 --- a/R/tidymodels.R +++ b/R/tidymodels.R @@ -19,56 +19,59 @@ #' @keywords internal #' @export #' @examples -#' library(tidymodels) +#' if (require("tidymodels")) { #' -#' # The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703 -#' # Presence of ESBL genes was predicted based on raw MIC values. +#' # The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703 +#' # Presence of ESBL genes was predicted based on raw MIC values. #' #' -#' # example data set in the AMR package -#' esbl_isolates +#' # example data set in the AMR package +#' esbl_isolates #' -#' # Prepare a binary outcome and convert to ordered factor -#' data <- esbl_isolates %>% -#' mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE)) +#' # Prepare a binary outcome and convert to ordered factor +#' data <- esbl_isolates %>% +#' mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE)) #' -#' # Split into training and testing sets -#' split <- initial_split(data) -#' training_data <- training(split) -#' testing_data <- testing(split) +#' # Split into training and testing sets +#' split <- initial_split(data) +#' training_data <- training(split) +#' testing_data <- testing(split) #' -#' # Create and prep a recipe with MIC log2 transformation -#' mic_recipe <- recipe(esbl ~ ., data = training_data) %>% -#' # Optionally remove non-predictive variables -#' remove_role(genus, old_role = "predictor") %>% -#' # Apply the log2 transformation to all MIC predictors -#' step_mic_log2(all_mic_predictors()) %>% -#' prep() +#' # Create and prep a recipe with MIC log2 transformation +#' mic_recipe <- recipe(esbl ~ ., data = training_data) %>% #' -#' # View prepped recipe -#' mic_recipe +#' # Optionally remove non-predictive variables +#' remove_role(genus, old_role = "predictor") %>% #' -#' # Apply the recipe to training and testing data -#' out_training <- bake(mic_recipe, new_data = NULL) -#' out_testing <- bake(mic_recipe, new_data = testing_data) +#' # Apply the log2 transformation to all MIC predictors +#' step_mic_log2(all_mic_predictors()) %>% #' -#' # Fit a logistic regression model -#' fitted <- logistic_reg(mode = "classification") %>% -#' set_engine("glm") %>% -#' fit(esbl ~ ., data = out_training) +#' # And apply the preparation steps +#' prep() #' -#' # Generate predictions on the test set -#' predictions <- predict(fitted, out_testing) %>% -#' bind_cols(out_testing) +#' # View prepped recipe +#' mic_recipe #' -#' # Evaluate predictions using standard classification metrics -#' our_metrics <- metric_set(accuracy, kap, ppv, npv) -#' metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class) +#' # Apply the recipe to training and testing data +#' out_training <- bake(mic_recipe, new_data = NULL) +#' out_testing <- bake(mic_recipe, new_data = testing_data) #' -#' # Show performance: -#' # - negative predictive value (NPV) of ~98% -#' # - positive predictive value (PPV) of ~94% -#' metrics +#' # Fit a logistic regression model +#' fitted <- logistic_reg(mode = "classification") %>% +#' set_engine("glm") %>% +#' fit(esbl ~ ., data = out_training) +#' +#' # Generate predictions on the test set +#' predictions <- predict(fitted, out_testing) %>% +#' bind_cols(out_testing) +#' +#' # Evaluate predictions using standard classification metrics +#' our_metrics <- metric_set(accuracy, kap, ppv, npv) +#' metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class) +#' +#' # Show performance +#' metrics +#' } all_mic <- function() { x <- tidymodels_amr_select(levels(NA_mic_)) names(x) diff --git a/man/age_groups.Rd b/man/age_groups.Rd index cd93ecb4b..fa0b8abbf 100644 --- a/man/age_groups.Rd +++ b/man/age_groups.Rd @@ -4,20 +4,23 @@ \alias{age_groups} \title{Split Ages into Age Groups} \usage{ -age_groups(x, split_at = c(12, 25, 55, 75), na.rm = FALSE) +age_groups(x, split_at = c(0, 12, 25, 55, 75), names = NULL, + na.rm = FALSE) } \arguments{ \item{x}{Age, e.g. calculated with \code{\link[=age]{age()}}.} \item{split_at}{Values to split \code{x} at - the default is age groups 0-11, 12-24, 25-54, 55-74 and 75+. See \emph{Details}.} +\item{names}{Optional names to be given to the various age groups.} + \item{na.rm}{A \link{logical} to indicate whether missing values should be removed.} } \value{ Ordered \link{factor} } \description{ -Split ages into age groups defined by the \code{split} argument. This allows for easier demographic (antimicrobial resistance) analysis. +Split ages into age groups defined by the \code{split} argument. This allows for easier demographic (antimicrobial resistance) analysis. The function returns an ordered \link{factor}. } \details{ To split ages, the input for the \code{split_at} argument can be: @@ -41,6 +44,7 @@ age_groups(ages, 50) # split into 0-19, 20-49 and 50+ age_groups(ages, c(20, 50)) +age_groups(ages, c(20, 50), names = c("Under 20 years", "20 to 50 years", "Over 50 years")) # split into groups of ten years age_groups(ages, 1:10 * 10) diff --git a/man/amr-tidymodels.Rd b/man/amr-tidymodels.Rd index b79239ae5..da6f7b2c3 100644 --- a/man/amr-tidymodels.Rd +++ b/man/amr-tidymodels.Rd @@ -65,56 +65,59 @@ Pre-processing pipeline steps include: These steps integrate with \code{recipes::recipe()} and work like standard preprocessing steps. They are useful for preparing data for modelling, especially with classification models. } \examples{ -library(tidymodels) +if (require("tidymodels")) { -# The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703 -# Presence of ESBL genes was predicted based on raw MIC values. + # The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703 + # Presence of ESBL genes was predicted based on raw MIC values. -# example data set in the AMR package -esbl_isolates + # example data set in the AMR package + esbl_isolates -# Prepare a binary outcome and convert to ordered factor -data <- esbl_isolates \%>\% - mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE)) + # Prepare a binary outcome and convert to ordered factor + data <- esbl_isolates \%>\% + mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE)) -# Split into training and testing sets -split <- initial_split(data) -training_data <- training(split) -testing_data <- testing(split) + # Split into training and testing sets + split <- initial_split(data) + training_data <- training(split) + testing_data <- testing(split) -# Create and prep a recipe with MIC log2 transformation -mic_recipe <- recipe(esbl ~ ., data = training_data) \%>\% - # Optionally remove non-predictive variables - remove_role(genus, old_role = "predictor") \%>\% - # Apply the log2 transformation to all MIC predictors - step_mic_log2(all_mic_predictors()) \%>\% - prep() + # Create and prep a recipe with MIC log2 transformation + mic_recipe <- recipe(esbl ~ ., data = training_data) \%>\% -# View prepped recipe -mic_recipe + # Optionally remove non-predictive variables + remove_role(genus, old_role = "predictor") \%>\% -# Apply the recipe to training and testing data -out_training <- bake(mic_recipe, new_data = NULL) -out_testing <- bake(mic_recipe, new_data = testing_data) + # Apply the log2 transformation to all MIC predictors + step_mic_log2(all_mic_predictors()) \%>\% -# Fit a logistic regression model -fitted <- logistic_reg(mode = "classification") \%>\% - set_engine("glm") \%>\% - fit(esbl ~ ., data = out_training) + # And apply the preparation steps + prep() -# Generate predictions on the test set -predictions <- predict(fitted, out_testing) \%>\% - bind_cols(out_testing) + # View prepped recipe + mic_recipe -# Evaluate predictions using standard classification metrics -our_metrics <- metric_set(accuracy, kap, ppv, npv) -metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class) + # Apply the recipe to training and testing data + out_training <- bake(mic_recipe, new_data = NULL) + out_testing <- bake(mic_recipe, new_data = testing_data) -# Show performance: -# - negative predictive value (NPV) of ~98\% -# - positive predictive value (PPV) of ~94\% -metrics + # Fit a logistic regression model + fitted <- logistic_reg(mode = "classification") \%>\% + set_engine("glm") \%>\% + fit(esbl ~ ., data = out_training) + + # Generate predictions on the test set + predictions <- predict(fitted, out_testing) \%>\% + bind_cols(out_testing) + + # Evaluate predictions using standard classification metrics + our_metrics <- metric_set(accuracy, kap, ppv, npv) + metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class) + + # Show performance + metrics +} } \seealso{ \code{\link[recipes:recipe]{recipes::recipe()}}, \code{\link[=as.mic]{as.mic()}}, \code{\link[=as.sir]{as.sir()}}