mirror of
				https://github.com/msberends/AMR.git
				synced 2025-10-31 04:08:15 +01:00 
			
		
		
		
	(v3.0.0.9011) allow names for age_groups()
				
					
				
			This commit is contained in:
		| @@ -1,5 +1,5 @@ | |||||||
| Package: AMR | Package: AMR | ||||||
| Version: 3.0.0.9010 | Version: 3.0.0.9011 | ||||||
| Date: 2025-07-17 | Date: 2025-07-17 | ||||||
| Title: Antimicrobial Resistance Data Analysis | Title: Antimicrobial Resistance Data Analysis | ||||||
| Description: Functions to simplify and standardise antimicrobial resistance (AMR) | Description: Functions to simplify and standardise antimicrobial resistance (AMR) | ||||||
|   | |||||||
							
								
								
									
										3
									
								
								NEWS.md
									
									
									
									
									
								
							
							
						
						
									
										3
									
								
								NEWS.md
									
									
									
									
									
								
							| @@ -1,4 +1,4 @@ | |||||||
| # AMR 3.0.0.9010 | # AMR 3.0.0.9011 | ||||||
|  |  | ||||||
| This is primarily a bugfix release, though we added one nice feature too. | This is primarily a bugfix release, though we added one nice feature too. | ||||||
|  |  | ||||||
| @@ -17,6 +17,7 @@ This is primarily a bugfix release, though we added one nice feature too. | |||||||
| * Fixed a bug in `ggplot_sir()` when using `combine_SI = FALSE` (#213) | * Fixed a bug in `ggplot_sir()` when using `combine_SI = FALSE` (#213) | ||||||
| * Fixed all plotting to contain a separate colour for SDD (susceptible dose-dependent) (#223) | * Fixed all plotting to contain a separate colour for SDD (susceptible dose-dependent) (#223) | ||||||
| * Fixed some specific Dutch translations for antimicrobials | * Fixed some specific Dutch translations for antimicrobials | ||||||
|  | * Added `names` to `age_groups()` so that custom names can be given (#215) | ||||||
| * Added note to `as.sir()` to make it explicit when higher-level taxonomic breakpoints are used (#218) | * Added note to `as.sir()` to make it explicit when higher-level taxonomic breakpoints are used (#218) | ||||||
| * Updated `random_mic()` and `random_disk()` to set skewedness of the distribution and allow multiple microorganisms | * Updated `random_mic()` and `random_disk()` to set skewedness of the distribution and allow multiple microorganisms | ||||||
|  |  | ||||||
|   | |||||||
| @@ -519,7 +519,7 @@ word_wrap <- function(..., | |||||||
|     ) |     ) | ||||||
|     msg <- paste0(parts, collapse = "`") |     msg <- paste0(parts, collapse = "`") | ||||||
|   } |   } | ||||||
|   msg <- gsub("`(.+?)`", font_grey_bg("\\1"), msg) |   msg <- gsub("`(.+?)`", font_grey_bg("`\\1`"), msg) | ||||||
|  |  | ||||||
|   # clean introduced whitespace in between fullstops |   # clean introduced whitespace in between fullstops | ||||||
|   msg <- gsub("[.] +[.]", "..", msg) |   msg <- gsub("[.] +[.]", "..", msg) | ||||||
|   | |||||||
							
								
								
									
										12
									
								
								R/age.R
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								R/age.R
									
									
									
									
									
								
							| @@ -128,9 +128,10 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE, ...) { | |||||||
|  |  | ||||||
| #' Split Ages into Age Groups | #' Split Ages into Age Groups | ||||||
| #' | #' | ||||||
| #' Split ages into age groups defined by the `split` argument. This allows for easier demographic (antimicrobial resistance) analysis. | #' Split ages into age groups defined by the `split` argument. This allows for easier demographic (antimicrobial resistance) analysis. The function returns an ordered [factor]. | ||||||
| #' @param x Age, e.g. calculated with [age()]. | #' @param x Age, e.g. calculated with [age()]. | ||||||
| #' @param split_at Values to split `x` at - the default is age groups 0-11, 12-24, 25-54, 55-74 and 75+. See *Details*. | #' @param split_at Values to split `x` at - the default is age groups 0-11, 12-24, 25-54, 55-74 and 75+. See *Details*. | ||||||
|  | #' @param names Optional names to be given to the various age groups. | ||||||
| #' @param na.rm A [logical] to indicate whether missing values should be removed. | #' @param na.rm A [logical] to indicate whether missing values should be removed. | ||||||
| #' @details To split ages, the input for the `split_at` argument can be: | #' @details To split ages, the input for the `split_at` argument can be: | ||||||
| #' | #' | ||||||
| @@ -152,6 +153,7 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE, ...) { | |||||||
| #' | #' | ||||||
| #' # split into 0-19, 20-49 and 50+ | #' # split into 0-19, 20-49 and 50+ | ||||||
| #' age_groups(ages, c(20, 50)) | #' age_groups(ages, c(20, 50)) | ||||||
|  | #' age_groups(ages, c(20, 50), names = c("Under 20 years", "20 to 50 years", "Over 50 years")) | ||||||
| #' | #' | ||||||
| #' # split into groups of ten years | #' # split into groups of ten years | ||||||
| #' age_groups(ages, 1:10 * 10) | #' age_groups(ages, 1:10 * 10) | ||||||
| @@ -181,9 +183,10 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE, ...) { | |||||||
| #'     ) | #'     ) | ||||||
| #' } | #' } | ||||||
| #' } | #' } | ||||||
| age_groups <- function(x, split_at = c(12, 25, 55, 75), na.rm = FALSE) { | age_groups <- function(x, split_at = c(0, 12, 25, 55, 75), names = NULL, na.rm = FALSE) { | ||||||
|   meet_criteria(x, allow_class = c("numeric", "integer"), is_positive_or_zero = TRUE, is_finite = TRUE) |   meet_criteria(x, allow_class = c("numeric", "integer"), is_positive_or_zero = TRUE, is_finite = TRUE) | ||||||
|   meet_criteria(split_at, allow_class = c("numeric", "integer", "character"), is_positive_or_zero = TRUE, is_finite = TRUE) |   meet_criteria(split_at, allow_class = c("numeric", "integer", "character"), is_positive_or_zero = TRUE, is_finite = TRUE) | ||||||
|  |   meet_criteria(names, allow_class = "character", allow_NULL = TRUE) | ||||||
|   meet_criteria(na.rm, allow_class = "logical", has_length = 1) |   meet_criteria(na.rm, allow_class = "logical", has_length = 1) | ||||||
|  |  | ||||||
|   if (any(x < 0, na.rm = TRUE)) { |   if (any(x < 0, na.rm = TRUE)) { | ||||||
| @@ -224,6 +227,11 @@ age_groups <- function(x, split_at = c(12, 25, 55, 75), na.rm = FALSE) { | |||||||
|  |  | ||||||
|   agegroups <- factor(lbls[y], levels = lbls, ordered = TRUE) |   agegroups <- factor(lbls[y], levels = lbls, ordered = TRUE) | ||||||
|  |  | ||||||
|  |   if (!is.null(names)) { | ||||||
|  |     stop_ifnot(length(names) == length(levels(agegroups)), "`names` must have the same length as the number of age groups (", length(levels(agegroups)), ").") | ||||||
|  |     levels(agegroups) <- names | ||||||
|  |   } | ||||||
|  |  | ||||||
|   if (isTRUE(na.rm)) { |   if (isTRUE(na.rm)) { | ||||||
|     agegroups <- agegroups[!is.na(agegroups)] |     agegroups <- agegroups[!is.na(agegroups)] | ||||||
|   } |   } | ||||||
|   | |||||||
| @@ -206,7 +206,7 @@ ggplot_sir <- function(data, | |||||||
|   meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE) |   meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE) | ||||||
|   language <- validate_language(language) |   language <- validate_language(language) | ||||||
|   meet_criteria(nrow, allow_class = c("numeric", "integer"), has_length = 1, allow_NULL = TRUE, is_positive = TRUE, is_finite = TRUE) |   meet_criteria(nrow, allow_class = c("numeric", "integer"), has_length = 1, allow_NULL = TRUE, is_positive = TRUE, is_finite = TRUE) | ||||||
|   meet_criteria(colours, allow_class = c("character", "logical")) |   meet_criteria(colours, allow_class = c("character", "logical"), allow_NULL = TRUE) | ||||||
|   meet_criteria(datalabels, allow_class = "logical", has_length = 1) |   meet_criteria(datalabels, allow_class = "logical", has_length = 1) | ||||||
|   meet_criteria(datalabels.size, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE) |   meet_criteria(datalabels.size, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE) | ||||||
|   meet_criteria(datalabels.colour, allow_class = "character", has_length = 1) |   meet_criteria(datalabels.colour, allow_class = "character", has_length = 1) | ||||||
| @@ -246,7 +246,7 @@ ggplot_sir <- function(data, | |||||||
|     ) + |     ) + | ||||||
|     theme_sir() |     theme_sir() | ||||||
|  |  | ||||||
|   if (fill == "interpretation") { |   if (fill == "interpretation" && !is.null(colours) && !isFALSE(colours)) { | ||||||
|     p <- suppressWarnings(p + scale_sir_colours(aesthetics = "fill", colours = colours)) |     p <- suppressWarnings(p + scale_sir_colours(aesthetics = "fill", colours = colours)) | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -19,56 +19,59 @@ | |||||||
| #' @keywords internal | #' @keywords internal | ||||||
| #' @export | #' @export | ||||||
| #' @examples | #' @examples | ||||||
| #' library(tidymodels) | #' if (require("tidymodels")) { | ||||||
| #' | #' | ||||||
| #' # The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703 | #'   # The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703 | ||||||
| #' # Presence of ESBL genes was predicted based on raw MIC values. | #'   # Presence of ESBL genes was predicted based on raw MIC values. | ||||||
| #' | #' | ||||||
| #' | #' | ||||||
| #' # example data set in the AMR package | #'   # example data set in the AMR package | ||||||
| #' esbl_isolates | #'   esbl_isolates | ||||||
| #' | #' | ||||||
| #' # Prepare a binary outcome and convert to ordered factor | #'   # Prepare a binary outcome and convert to ordered factor | ||||||
| #' data <- esbl_isolates %>% | #'   data <- esbl_isolates %>% | ||||||
| #'   mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE)) | #'     mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE)) | ||||||
| #' | #' | ||||||
| #' # Split into training and testing sets | #'   # Split into training and testing sets | ||||||
| #' split <- initial_split(data) | #'   split <- initial_split(data) | ||||||
| #' training_data <- training(split) | #'   training_data <- training(split) | ||||||
| #' testing_data <- testing(split) | #'   testing_data <- testing(split) | ||||||
| #' | #' | ||||||
| #' # Create and prep a recipe with MIC log2 transformation | #'   # Create and prep a recipe with MIC log2 transformation | ||||||
| #' mic_recipe <- recipe(esbl ~ ., data = training_data) %>% | #'   mic_recipe <- recipe(esbl ~ ., data = training_data) %>% | ||||||
| #'   # Optionally remove non-predictive variables |  | ||||||
| #'   remove_role(genus, old_role = "predictor") %>% |  | ||||||
| #'   # Apply the log2 transformation to all MIC predictors |  | ||||||
| #'   step_mic_log2(all_mic_predictors()) %>% |  | ||||||
| #'   prep() |  | ||||||
| #' | #' | ||||||
| #' # View prepped recipe | #'     # Optionally remove non-predictive variables | ||||||
| #' mic_recipe | #'     remove_role(genus, old_role = "predictor") %>% | ||||||
| #' | #' | ||||||
| #' # Apply the recipe to training and testing data | #'     # Apply the log2 transformation to all MIC predictors | ||||||
| #' out_training <- bake(mic_recipe, new_data = NULL) | #'     step_mic_log2(all_mic_predictors()) %>% | ||||||
| #' out_testing <- bake(mic_recipe, new_data = testing_data) |  | ||||||
| #' | #' | ||||||
| #' # Fit a logistic regression model | #'     # And apply the preparation steps | ||||||
| #' fitted <- logistic_reg(mode = "classification") %>% | #'     prep() | ||||||
| #'   set_engine("glm") %>% |  | ||||||
| #'   fit(esbl ~ ., data = out_training) |  | ||||||
| #' | #' | ||||||
| #' # Generate predictions on the test set | #'   # View prepped recipe | ||||||
| #' predictions <- predict(fitted, out_testing) %>% | #'   mic_recipe | ||||||
| #'   bind_cols(out_testing) |  | ||||||
| #' | #' | ||||||
| #' # Evaluate predictions using standard classification metrics | #'   # Apply the recipe to training and testing data | ||||||
| #' our_metrics <- metric_set(accuracy, kap, ppv, npv) | #'   out_training <- bake(mic_recipe, new_data = NULL) | ||||||
| #' metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class) | #'   out_testing <- bake(mic_recipe, new_data = testing_data) | ||||||
| #' | #' | ||||||
| #' # Show performance: | #'   # Fit a logistic regression model | ||||||
| #' # - negative predictive value (NPV) of ~98% | #'   fitted <- logistic_reg(mode = "classification") %>% | ||||||
| #' # - positive predictive value (PPV) of ~94% | #'     set_engine("glm") %>% | ||||||
| #' metrics | #'     fit(esbl ~ ., data = out_training) | ||||||
|  | #' | ||||||
|  | #'   # Generate predictions on the test set | ||||||
|  | #'   predictions <- predict(fitted, out_testing) %>% | ||||||
|  | #'     bind_cols(out_testing) | ||||||
|  | #' | ||||||
|  | #'   # Evaluate predictions using standard classification metrics | ||||||
|  | #'   our_metrics <- metric_set(accuracy, kap, ppv, npv) | ||||||
|  | #'   metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class) | ||||||
|  | #' | ||||||
|  | #'   # Show performance | ||||||
|  | #'   metrics | ||||||
|  | #' } | ||||||
| all_mic <- function() { | all_mic <- function() { | ||||||
|   x <- tidymodels_amr_select(levels(NA_mic_)) |   x <- tidymodels_amr_select(levels(NA_mic_)) | ||||||
|   names(x) |   names(x) | ||||||
|   | |||||||
| @@ -4,20 +4,23 @@ | |||||||
| \alias{age_groups} | \alias{age_groups} | ||||||
| \title{Split Ages into Age Groups} | \title{Split Ages into Age Groups} | ||||||
| \usage{ | \usage{ | ||||||
| age_groups(x, split_at = c(12, 25, 55, 75), na.rm = FALSE) | age_groups(x, split_at = c(0, 12, 25, 55, 75), names = NULL, | ||||||
|  |   na.rm = FALSE) | ||||||
| } | } | ||||||
| \arguments{ | \arguments{ | ||||||
| \item{x}{Age, e.g. calculated with \code{\link[=age]{age()}}.} | \item{x}{Age, e.g. calculated with \code{\link[=age]{age()}}.} | ||||||
|  |  | ||||||
| \item{split_at}{Values to split \code{x} at - the default is age groups 0-11, 12-24, 25-54, 55-74 and 75+. See \emph{Details}.} | \item{split_at}{Values to split \code{x} at - the default is age groups 0-11, 12-24, 25-54, 55-74 and 75+. See \emph{Details}.} | ||||||
|  |  | ||||||
|  | \item{names}{Optional names to be given to the various age groups.} | ||||||
|  |  | ||||||
| \item{na.rm}{A \link{logical} to indicate whether missing values should be removed.} | \item{na.rm}{A \link{logical} to indicate whether missing values should be removed.} | ||||||
| } | } | ||||||
| \value{ | \value{ | ||||||
| Ordered \link{factor} | Ordered \link{factor} | ||||||
| } | } | ||||||
| \description{ | \description{ | ||||||
| Split ages into age groups defined by the \code{split} argument. This allows for easier demographic (antimicrobial resistance) analysis. | Split ages into age groups defined by the \code{split} argument. This allows for easier demographic (antimicrobial resistance) analysis. The function returns an ordered \link{factor}. | ||||||
| } | } | ||||||
| \details{ | \details{ | ||||||
| To split ages, the input for the \code{split_at} argument can be: | To split ages, the input for the \code{split_at} argument can be: | ||||||
| @@ -41,6 +44,7 @@ age_groups(ages, 50) | |||||||
|  |  | ||||||
| # split into 0-19, 20-49 and 50+ | # split into 0-19, 20-49 and 50+ | ||||||
| age_groups(ages, c(20, 50)) | age_groups(ages, c(20, 50)) | ||||||
|  | age_groups(ages, c(20, 50), names = c("Under 20 years", "20 to 50 years", "Over 50 years")) | ||||||
|  |  | ||||||
| # split into groups of ten years | # split into groups of ten years | ||||||
| age_groups(ages, 1:10 * 10) | age_groups(ages, 1:10 * 10) | ||||||
|   | |||||||
| @@ -65,56 +65,59 @@ Pre-processing pipeline steps include: | |||||||
| These steps integrate with \code{recipes::recipe()} and work like standard preprocessing steps. They are useful for preparing data for modelling, especially with classification models. | These steps integrate with \code{recipes::recipe()} and work like standard preprocessing steps. They are useful for preparing data for modelling, especially with classification models. | ||||||
| } | } | ||||||
| \examples{ | \examples{ | ||||||
| library(tidymodels) | if (require("tidymodels")) { | ||||||
|  |  | ||||||
| # The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703 |   # The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703 | ||||||
| # Presence of ESBL genes was predicted based on raw MIC values. |   # Presence of ESBL genes was predicted based on raw MIC values. | ||||||
|  |  | ||||||
|  |  | ||||||
| # example data set in the AMR package |   # example data set in the AMR package | ||||||
| esbl_isolates |   esbl_isolates | ||||||
|  |  | ||||||
| # Prepare a binary outcome and convert to ordered factor |   # Prepare a binary outcome and convert to ordered factor | ||||||
| data <- esbl_isolates \%>\% |   data <- esbl_isolates \%>\% | ||||||
|   mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE)) |     mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE)) | ||||||
|  |  | ||||||
| # Split into training and testing sets |   # Split into training and testing sets | ||||||
| split <- initial_split(data) |   split <- initial_split(data) | ||||||
| training_data <- training(split) |   training_data <- training(split) | ||||||
| testing_data <- testing(split) |   testing_data <- testing(split) | ||||||
|  |  | ||||||
| # Create and prep a recipe with MIC log2 transformation |   # Create and prep a recipe with MIC log2 transformation | ||||||
| mic_recipe <- recipe(esbl ~ ., data = training_data) \%>\% |   mic_recipe <- recipe(esbl ~ ., data = training_data) \%>\% | ||||||
|   # Optionally remove non-predictive variables |  | ||||||
|   remove_role(genus, old_role = "predictor") \%>\% |  | ||||||
|   # Apply the log2 transformation to all MIC predictors |  | ||||||
|   step_mic_log2(all_mic_predictors()) \%>\% |  | ||||||
|   prep() |  | ||||||
|  |  | ||||||
| # View prepped recipe |     # Optionally remove non-predictive variables | ||||||
| mic_recipe |     remove_role(genus, old_role = "predictor") \%>\% | ||||||
|  |  | ||||||
| # Apply the recipe to training and testing data |     # Apply the log2 transformation to all MIC predictors | ||||||
| out_training <- bake(mic_recipe, new_data = NULL) |     step_mic_log2(all_mic_predictors()) \%>\% | ||||||
| out_testing <- bake(mic_recipe, new_data = testing_data) |  | ||||||
|  |  | ||||||
| # Fit a logistic regression model |     # And apply the preparation steps | ||||||
| fitted <- logistic_reg(mode = "classification") \%>\% |     prep() | ||||||
|   set_engine("glm") \%>\% |  | ||||||
|   fit(esbl ~ ., data = out_training) |  | ||||||
|  |  | ||||||
| # Generate predictions on the test set |   # View prepped recipe | ||||||
| predictions <- predict(fitted, out_testing) \%>\% |   mic_recipe | ||||||
|   bind_cols(out_testing) |  | ||||||
|  |  | ||||||
| # Evaluate predictions using standard classification metrics |   # Apply the recipe to training and testing data | ||||||
| our_metrics <- metric_set(accuracy, kap, ppv, npv) |   out_training <- bake(mic_recipe, new_data = NULL) | ||||||
| metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class) |   out_testing <- bake(mic_recipe, new_data = testing_data) | ||||||
|  |  | ||||||
| # Show performance: |   # Fit a logistic regression model | ||||||
| # - negative predictive value (NPV) of ~98\% |   fitted <- logistic_reg(mode = "classification") \%>\% | ||||||
| # - positive predictive value (PPV) of ~94\% |     set_engine("glm") \%>\% | ||||||
| metrics |     fit(esbl ~ ., data = out_training) | ||||||
|  |  | ||||||
|  |   # Generate predictions on the test set | ||||||
|  |   predictions <- predict(fitted, out_testing) \%>\% | ||||||
|  |     bind_cols(out_testing) | ||||||
|  |  | ||||||
|  |   # Evaluate predictions using standard classification metrics | ||||||
|  |   our_metrics <- metric_set(accuracy, kap, ppv, npv) | ||||||
|  |   metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class) | ||||||
|  |  | ||||||
|  |   # Show performance | ||||||
|  |   metrics | ||||||
|  | } | ||||||
| } | } | ||||||
| \seealso{ | \seealso{ | ||||||
| \code{\link[recipes:recipe]{recipes::recipe()}}, \code{\link[=as.mic]{as.mic()}}, \code{\link[=as.sir]{as.sir()}} | \code{\link[recipes:recipe]{recipes::recipe()}}, \code{\link[=as.mic]{as.mic()}}, \code{\link[=as.sir]{as.sir()}} | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user