mirror of
				https://github.com/msberends/AMR.git
				synced 2025-10-30 23:28:15 +01:00 
			
		
		
		
	(v3.0.0.9011) allow names for age_groups()
				
					
				
			This commit is contained in:
		| @@ -1,5 +1,5 @@ | ||||
| Package: AMR | ||||
| Version: 3.0.0.9010 | ||||
| Version: 3.0.0.9011 | ||||
| Date: 2025-07-17 | ||||
| Title: Antimicrobial Resistance Data Analysis | ||||
| Description: Functions to simplify and standardise antimicrobial resistance (AMR) | ||||
|   | ||||
							
								
								
									
										3
									
								
								NEWS.md
									
									
									
									
									
								
							
							
						
						
									
										3
									
								
								NEWS.md
									
									
									
									
									
								
							| @@ -1,4 +1,4 @@ | ||||
| # AMR 3.0.0.9010 | ||||
| # AMR 3.0.0.9011 | ||||
|  | ||||
| This is primarily a bugfix release, though we added one nice feature too. | ||||
|  | ||||
| @@ -17,6 +17,7 @@ This is primarily a bugfix release, though we added one nice feature too. | ||||
| * Fixed a bug in `ggplot_sir()` when using `combine_SI = FALSE` (#213) | ||||
| * Fixed all plotting to contain a separate colour for SDD (susceptible dose-dependent) (#223) | ||||
| * Fixed some specific Dutch translations for antimicrobials | ||||
| * Added `names` to `age_groups()` so that custom names can be given (#215) | ||||
| * Added note to `as.sir()` to make it explicit when higher-level taxonomic breakpoints are used (#218) | ||||
| * Updated `random_mic()` and `random_disk()` to set skewedness of the distribution and allow multiple microorganisms | ||||
|  | ||||
|   | ||||
| @@ -519,7 +519,7 @@ word_wrap <- function(..., | ||||
|     ) | ||||
|     msg <- paste0(parts, collapse = "`") | ||||
|   } | ||||
|   msg <- gsub("`(.+?)`", font_grey_bg("\\1"), msg) | ||||
|   msg <- gsub("`(.+?)`", font_grey_bg("`\\1`"), msg) | ||||
|  | ||||
|   # clean introduced whitespace in between fullstops | ||||
|   msg <- gsub("[.] +[.]", "..", msg) | ||||
|   | ||||
							
								
								
									
										12
									
								
								R/age.R
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								R/age.R
									
									
									
									
									
								
							| @@ -128,9 +128,10 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE, ...) { | ||||
|  | ||||
| #' Split Ages into Age Groups | ||||
| #' | ||||
| #' Split ages into age groups defined by the `split` argument. This allows for easier demographic (antimicrobial resistance) analysis. | ||||
| #' Split ages into age groups defined by the `split` argument. This allows for easier demographic (antimicrobial resistance) analysis. The function returns an ordered [factor]. | ||||
| #' @param x Age, e.g. calculated with [age()]. | ||||
| #' @param split_at Values to split `x` at - the default is age groups 0-11, 12-24, 25-54, 55-74 and 75+. See *Details*. | ||||
| #' @param names Optional names to be given to the various age groups. | ||||
| #' @param na.rm A [logical] to indicate whether missing values should be removed. | ||||
| #' @details To split ages, the input for the `split_at` argument can be: | ||||
| #' | ||||
| @@ -152,6 +153,7 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE, ...) { | ||||
| #' | ||||
| #' # split into 0-19, 20-49 and 50+ | ||||
| #' age_groups(ages, c(20, 50)) | ||||
| #' age_groups(ages, c(20, 50), names = c("Under 20 years", "20 to 50 years", "Over 50 years")) | ||||
| #' | ||||
| #' # split into groups of ten years | ||||
| #' age_groups(ages, 1:10 * 10) | ||||
| @@ -181,9 +183,10 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE, ...) { | ||||
| #'     ) | ||||
| #' } | ||||
| #' } | ||||
| age_groups <- function(x, split_at = c(12, 25, 55, 75), na.rm = FALSE) { | ||||
| age_groups <- function(x, split_at = c(0, 12, 25, 55, 75), names = NULL, na.rm = FALSE) { | ||||
|   meet_criteria(x, allow_class = c("numeric", "integer"), is_positive_or_zero = TRUE, is_finite = TRUE) | ||||
|   meet_criteria(split_at, allow_class = c("numeric", "integer", "character"), is_positive_or_zero = TRUE, is_finite = TRUE) | ||||
|   meet_criteria(names, allow_class = "character", allow_NULL = TRUE) | ||||
|   meet_criteria(na.rm, allow_class = "logical", has_length = 1) | ||||
|  | ||||
|   if (any(x < 0, na.rm = TRUE)) { | ||||
| @@ -224,6 +227,11 @@ age_groups <- function(x, split_at = c(12, 25, 55, 75), na.rm = FALSE) { | ||||
|  | ||||
|   agegroups <- factor(lbls[y], levels = lbls, ordered = TRUE) | ||||
|  | ||||
|   if (!is.null(names)) { | ||||
|     stop_ifnot(length(names) == length(levels(agegroups)), "`names` must have the same length as the number of age groups (", length(levels(agegroups)), ").") | ||||
|     levels(agegroups) <- names | ||||
|   } | ||||
|  | ||||
|   if (isTRUE(na.rm)) { | ||||
|     agegroups <- agegroups[!is.na(agegroups)] | ||||
|   } | ||||
|   | ||||
| @@ -206,7 +206,7 @@ ggplot_sir <- function(data, | ||||
|   meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE) | ||||
|   language <- validate_language(language) | ||||
|   meet_criteria(nrow, allow_class = c("numeric", "integer"), has_length = 1, allow_NULL = TRUE, is_positive = TRUE, is_finite = TRUE) | ||||
|   meet_criteria(colours, allow_class = c("character", "logical")) | ||||
|   meet_criteria(colours, allow_class = c("character", "logical"), allow_NULL = TRUE) | ||||
|   meet_criteria(datalabels, allow_class = "logical", has_length = 1) | ||||
|   meet_criteria(datalabels.size, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE) | ||||
|   meet_criteria(datalabels.colour, allow_class = "character", has_length = 1) | ||||
| @@ -246,7 +246,7 @@ ggplot_sir <- function(data, | ||||
|     ) + | ||||
|     theme_sir() | ||||
|  | ||||
|   if (fill == "interpretation") { | ||||
|   if (fill == "interpretation" && !is.null(colours) && !isFALSE(colours)) { | ||||
|     p <- suppressWarnings(p + scale_sir_colours(aesthetics = "fill", colours = colours)) | ||||
|   } | ||||
|  | ||||
|   | ||||
| @@ -19,56 +19,59 @@ | ||||
| #' @keywords internal | ||||
| #' @export | ||||
| #' @examples | ||||
| #' library(tidymodels) | ||||
| #' if (require("tidymodels")) { | ||||
| #' | ||||
| #' # The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703 | ||||
| #' # Presence of ESBL genes was predicted based on raw MIC values. | ||||
| #'   # The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703 | ||||
| #'   # Presence of ESBL genes was predicted based on raw MIC values. | ||||
| #' | ||||
| #' | ||||
| #' # example data set in the AMR package | ||||
| #' esbl_isolates | ||||
| #'   # example data set in the AMR package | ||||
| #'   esbl_isolates | ||||
| #' | ||||
| #' # Prepare a binary outcome and convert to ordered factor | ||||
| #' data <- esbl_isolates %>% | ||||
| #'   mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE)) | ||||
| #'   # Prepare a binary outcome and convert to ordered factor | ||||
| #'   data <- esbl_isolates %>% | ||||
| #'     mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE)) | ||||
| #' | ||||
| #' # Split into training and testing sets | ||||
| #' split <- initial_split(data) | ||||
| #' training_data <- training(split) | ||||
| #' testing_data <- testing(split) | ||||
| #'   # Split into training and testing sets | ||||
| #'   split <- initial_split(data) | ||||
| #'   training_data <- training(split) | ||||
| #'   testing_data <- testing(split) | ||||
| #' | ||||
| #' # Create and prep a recipe with MIC log2 transformation | ||||
| #' mic_recipe <- recipe(esbl ~ ., data = training_data) %>% | ||||
| #'   # Optionally remove non-predictive variables | ||||
| #'   remove_role(genus, old_role = "predictor") %>% | ||||
| #'   # Apply the log2 transformation to all MIC predictors | ||||
| #'   step_mic_log2(all_mic_predictors()) %>% | ||||
| #'   prep() | ||||
| #'   # Create and prep a recipe with MIC log2 transformation | ||||
| #'   mic_recipe <- recipe(esbl ~ ., data = training_data) %>% | ||||
| #' | ||||
| #' # View prepped recipe | ||||
| #' mic_recipe | ||||
| #'     # Optionally remove non-predictive variables | ||||
| #'     remove_role(genus, old_role = "predictor") %>% | ||||
| #' | ||||
| #' # Apply the recipe to training and testing data | ||||
| #' out_training <- bake(mic_recipe, new_data = NULL) | ||||
| #' out_testing <- bake(mic_recipe, new_data = testing_data) | ||||
| #'     # Apply the log2 transformation to all MIC predictors | ||||
| #'     step_mic_log2(all_mic_predictors()) %>% | ||||
| #' | ||||
| #' # Fit a logistic regression model | ||||
| #' fitted <- logistic_reg(mode = "classification") %>% | ||||
| #'   set_engine("glm") %>% | ||||
| #'   fit(esbl ~ ., data = out_training) | ||||
| #'     # And apply the preparation steps | ||||
| #'     prep() | ||||
| #' | ||||
| #' # Generate predictions on the test set | ||||
| #' predictions <- predict(fitted, out_testing) %>% | ||||
| #'   bind_cols(out_testing) | ||||
| #'   # View prepped recipe | ||||
| #'   mic_recipe | ||||
| #' | ||||
| #' # Evaluate predictions using standard classification metrics | ||||
| #' our_metrics <- metric_set(accuracy, kap, ppv, npv) | ||||
| #' metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class) | ||||
| #'   # Apply the recipe to training and testing data | ||||
| #'   out_training <- bake(mic_recipe, new_data = NULL) | ||||
| #'   out_testing <- bake(mic_recipe, new_data = testing_data) | ||||
| #' | ||||
| #' # Show performance: | ||||
| #' # - negative predictive value (NPV) of ~98% | ||||
| #' # - positive predictive value (PPV) of ~94% | ||||
| #' metrics | ||||
| #'   # Fit a logistic regression model | ||||
| #'   fitted <- logistic_reg(mode = "classification") %>% | ||||
| #'     set_engine("glm") %>% | ||||
| #'     fit(esbl ~ ., data = out_training) | ||||
| #' | ||||
| #'   # Generate predictions on the test set | ||||
| #'   predictions <- predict(fitted, out_testing) %>% | ||||
| #'     bind_cols(out_testing) | ||||
| #' | ||||
| #'   # Evaluate predictions using standard classification metrics | ||||
| #'   our_metrics <- metric_set(accuracy, kap, ppv, npv) | ||||
| #'   metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class) | ||||
| #' | ||||
| #'   # Show performance | ||||
| #'   metrics | ||||
| #' } | ||||
| all_mic <- function() { | ||||
|   x <- tidymodels_amr_select(levels(NA_mic_)) | ||||
|   names(x) | ||||
|   | ||||
| @@ -4,20 +4,23 @@ | ||||
| \alias{age_groups} | ||||
| \title{Split Ages into Age Groups} | ||||
| \usage{ | ||||
| age_groups(x, split_at = c(12, 25, 55, 75), na.rm = FALSE) | ||||
| age_groups(x, split_at = c(0, 12, 25, 55, 75), names = NULL, | ||||
|   na.rm = FALSE) | ||||
| } | ||||
| \arguments{ | ||||
| \item{x}{Age, e.g. calculated with \code{\link[=age]{age()}}.} | ||||
|  | ||||
| \item{split_at}{Values to split \code{x} at - the default is age groups 0-11, 12-24, 25-54, 55-74 and 75+. See \emph{Details}.} | ||||
|  | ||||
| \item{names}{Optional names to be given to the various age groups.} | ||||
|  | ||||
| \item{na.rm}{A \link{logical} to indicate whether missing values should be removed.} | ||||
| } | ||||
| \value{ | ||||
| Ordered \link{factor} | ||||
| } | ||||
| \description{ | ||||
| Split ages into age groups defined by the \code{split} argument. This allows for easier demographic (antimicrobial resistance) analysis. | ||||
| Split ages into age groups defined by the \code{split} argument. This allows for easier demographic (antimicrobial resistance) analysis. The function returns an ordered \link{factor}. | ||||
| } | ||||
| \details{ | ||||
| To split ages, the input for the \code{split_at} argument can be: | ||||
| @@ -41,6 +44,7 @@ age_groups(ages, 50) | ||||
|  | ||||
| # split into 0-19, 20-49 and 50+ | ||||
| age_groups(ages, c(20, 50)) | ||||
| age_groups(ages, c(20, 50), names = c("Under 20 years", "20 to 50 years", "Over 50 years")) | ||||
|  | ||||
| # split into groups of ten years | ||||
| age_groups(ages, 1:10 * 10) | ||||
|   | ||||
| @@ -65,56 +65,59 @@ Pre-processing pipeline steps include: | ||||
| These steps integrate with \code{recipes::recipe()} and work like standard preprocessing steps. They are useful for preparing data for modelling, especially with classification models. | ||||
| } | ||||
| \examples{ | ||||
| library(tidymodels) | ||||
| if (require("tidymodels")) { | ||||
|  | ||||
| # The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703 | ||||
| # Presence of ESBL genes was predicted based on raw MIC values. | ||||
|   # The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703 | ||||
|   # Presence of ESBL genes was predicted based on raw MIC values. | ||||
|  | ||||
|  | ||||
| # example data set in the AMR package | ||||
| esbl_isolates | ||||
|   # example data set in the AMR package | ||||
|   esbl_isolates | ||||
|  | ||||
| # Prepare a binary outcome and convert to ordered factor | ||||
| data <- esbl_isolates \%>\% | ||||
|   mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE)) | ||||
|   # Prepare a binary outcome and convert to ordered factor | ||||
|   data <- esbl_isolates \%>\% | ||||
|     mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE)) | ||||
|  | ||||
| # Split into training and testing sets | ||||
| split <- initial_split(data) | ||||
| training_data <- training(split) | ||||
| testing_data <- testing(split) | ||||
|   # Split into training and testing sets | ||||
|   split <- initial_split(data) | ||||
|   training_data <- training(split) | ||||
|   testing_data <- testing(split) | ||||
|  | ||||
| # Create and prep a recipe with MIC log2 transformation | ||||
| mic_recipe <- recipe(esbl ~ ., data = training_data) \%>\% | ||||
|   # Optionally remove non-predictive variables | ||||
|   remove_role(genus, old_role = "predictor") \%>\% | ||||
|   # Apply the log2 transformation to all MIC predictors | ||||
|   step_mic_log2(all_mic_predictors()) \%>\% | ||||
|   prep() | ||||
|   # Create and prep a recipe with MIC log2 transformation | ||||
|   mic_recipe <- recipe(esbl ~ ., data = training_data) \%>\% | ||||
|  | ||||
| # View prepped recipe | ||||
| mic_recipe | ||||
|     # Optionally remove non-predictive variables | ||||
|     remove_role(genus, old_role = "predictor") \%>\% | ||||
|  | ||||
| # Apply the recipe to training and testing data | ||||
| out_training <- bake(mic_recipe, new_data = NULL) | ||||
| out_testing <- bake(mic_recipe, new_data = testing_data) | ||||
|     # Apply the log2 transformation to all MIC predictors | ||||
|     step_mic_log2(all_mic_predictors()) \%>\% | ||||
|  | ||||
| # Fit a logistic regression model | ||||
| fitted <- logistic_reg(mode = "classification") \%>\% | ||||
|   set_engine("glm") \%>\% | ||||
|   fit(esbl ~ ., data = out_training) | ||||
|     # And apply the preparation steps | ||||
|     prep() | ||||
|  | ||||
| # Generate predictions on the test set | ||||
| predictions <- predict(fitted, out_testing) \%>\% | ||||
|   bind_cols(out_testing) | ||||
|   # View prepped recipe | ||||
|   mic_recipe | ||||
|  | ||||
| # Evaluate predictions using standard classification metrics | ||||
| our_metrics <- metric_set(accuracy, kap, ppv, npv) | ||||
| metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class) | ||||
|   # Apply the recipe to training and testing data | ||||
|   out_training <- bake(mic_recipe, new_data = NULL) | ||||
|   out_testing <- bake(mic_recipe, new_data = testing_data) | ||||
|  | ||||
| # Show performance: | ||||
| # - negative predictive value (NPV) of ~98\% | ||||
| # - positive predictive value (PPV) of ~94\% | ||||
| metrics | ||||
|   # Fit a logistic regression model | ||||
|   fitted <- logistic_reg(mode = "classification") \%>\% | ||||
|     set_engine("glm") \%>\% | ||||
|     fit(esbl ~ ., data = out_training) | ||||
|  | ||||
|   # Generate predictions on the test set | ||||
|   predictions <- predict(fitted, out_testing) \%>\% | ||||
|     bind_cols(out_testing) | ||||
|  | ||||
|   # Evaluate predictions using standard classification metrics | ||||
|   our_metrics <- metric_set(accuracy, kap, ppv, npv) | ||||
|   metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class) | ||||
|  | ||||
|   # Show performance | ||||
|   metrics | ||||
| } | ||||
| } | ||||
| \seealso{ | ||||
| \code{\link[recipes:recipe]{recipes::recipe()}}, \code{\link[=as.mic]{as.mic()}}, \code{\link[=as.sir]{as.sir()}} | ||||
|   | ||||
		Reference in New Issue
	
	Block a user