mirror of
				https://github.com/msberends/AMR.git
				synced 2025-10-31 09:28:17 +01:00 
			
		
		
		
	(v3.0.0.9029) fix for vignette and envir data
This commit is contained in:
		| @@ -1,5 +1,5 @@ | |||||||
| Package: AMR | Package: AMR | ||||||
| Version: 3.0.0.9028 | Version: 3.0.0.9029 | ||||||
| Date: 2025-09-10 | Date: 2025-09-10 | ||||||
| Title: Antimicrobial Resistance Data Analysis | Title: Antimicrobial Resistance Data Analysis | ||||||
| Description: Functions to simplify and standardise antimicrobial resistance (AMR) | Description: Functions to simplify and standardise antimicrobial resistance (AMR) | ||||||
|   | |||||||
							
								
								
									
										2
									
								
								NEWS.md
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								NEWS.md
									
									
									
									
									
								
							| @@ -1,4 +1,4 @@ | |||||||
| # AMR 3.0.0.9028 | #  3.0.0.9029 | ||||||
|  |  | ||||||
| This is a bugfix release following the release of v3.0.0 in June 2025. | This is a bugfix release following the release of v3.0.0 in June 2025. | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										2
									
								
								R/ab.R
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								R/ab.R
									
									
									
									
									
								
							| @@ -203,7 +203,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, language = get_AMR_locale(), | |||||||
|     progress <- progress_ticker(n = sum(!already_known), n_min = 25, print = info) # start if n >= 25 |     progress <- progress_ticker(n = sum(!already_known), n_min = 25, print = info) # start if n >= 25 | ||||||
|     on.exit(close(progress)) |     on.exit(close(progress)) | ||||||
|     if (any(x_new[!already_known & !is.na(x_new)] %in% unlist(AMR_env$AV_lookup$generalised_all, use.names = FALSE), na.rm = TRUE)) { |     if (any(x_new[!already_known & !is.na(x_new)] %in% unlist(AMR_env$AV_lookup$generalised_all, use.names = FALSE), na.rm = TRUE)) { | ||||||
|       warning_("in `as.ab()`: some input seem to resemble antiviral drugs - use `as.av()` or e.g. `av_name()` for these, not `as.ab()` or e.g. `ab_name()`.") |       warning_("in `as.ab()`: some input seems to resemble antiviral drugs - use `as.av()` or e.g. `av_name()` for these, not `as.ab()` or e.g. `ab_name()`.") | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -70,14 +70,14 @@ test_that("test-misc.R", { | |||||||
|   } |   } | ||||||
|  |  | ||||||
|   df <- example_isolates[, check_df("x")] |   df <- example_isolates[, check_df("x")] | ||||||
|   expect_true(is_right, info = "the environmental data cannot be found for base/x (1)") |   expect_true(is_right, info = "the environmental data cannot be found for base `x`") | ||||||
|  |  | ||||||
|   if (getRversion() < "4.0.0") { |   # should work on R >=3.6.3 or so | ||||||
|  |   df <- example_isolates[c(1:3), check_df("x")] | ||||||
|  |   if (!is_right) { | ||||||
|  |     # otherwise, this is needed for older versions | ||||||
|     df <- example_isolates[c(1:3), check_df("xx")] |     df <- example_isolates[c(1:3), check_df("xx")] | ||||||
|     expect_true(is_right, info = "the environmental data cannot be found for base/xx") |     expect_true(is_right, info = "the environmental data cannot be found for base `x` or `xx`")  | ||||||
|   } else { |  | ||||||
|     df <- example_isolates[c(1:3), check_df("x")] |  | ||||||
|     expect_true(is_right, info = "the environmental data cannot be found for base/x (2)") |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0", also_load = TRUE)) { |   if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0", also_load = TRUE)) { | ||||||
|   | |||||||
| @@ -225,148 +225,12 @@ This approach and idea formed the basis for the publication [DOI: 10.3389/fmicb. | |||||||
| >  | >  | ||||||
| > The new AMR package version will contain new tidymodels selectors such as `step_mic_log2()`. | > The new AMR package version will contain new tidymodels selectors such as `step_mic_log2()`. | ||||||
|  |  | ||||||
| <!-- | <!-- TODO for AMR v3.1.0: add info from here: https://github.com/msberends/AMR/blob/2461631bcefa78ebdb37bdfad359be74cdd9165a/vignettes/AMR_with_tidymodels.Rmd#L212-L291 --> | ||||||
|  |  | ||||||
| ### **Objective** |  | ||||||
|  |  | ||||||
| Our goal is to: |  | ||||||
|  |  | ||||||
| 1. Use raw MIC values to predict whether a bacterial isolate produces ESBL. |  | ||||||
| 2. Apply AMR-aware preprocessing in a `tidymodels` recipe. |  | ||||||
| 3. Train a classification model and evaluate its predictive performance. |  | ||||||
|  |  | ||||||
| ### **Data Preparation** |  | ||||||
|  |  | ||||||
| We use the `esbl_isolates` dataset that comes with the AMR package. |  | ||||||
|  |  | ||||||
| ```{r} |  | ||||||
| # Load required libraries |  | ||||||
| library(AMR) |  | ||||||
| library(tidymodels) |  | ||||||
|  |  | ||||||
| # View the esbl_isolates data set |  | ||||||
| esbl_isolates |  | ||||||
|  |  | ||||||
| # Prepare a binary outcome and convert to ordered factor |  | ||||||
| data <- esbl_isolates %>% |  | ||||||
|   mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE)) |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| **Explanation:** |  | ||||||
|  |  | ||||||
| - `esbl_isolates`: Contains MIC test results and ESBL status for each isolate. |  | ||||||
| - `mutate(esbl = ...)`: Converts the target column to an ordered factor for classification. |  | ||||||
|  |  | ||||||
| ### **Defining the Workflow** |  | ||||||
|  |  | ||||||
| #### 1. Preprocessing with a Recipe |  | ||||||
|  |  | ||||||
| We use our `step_mic_log2()` function to log2-transform MIC values, ensuring that MICs are numeric and properly scaled. All MIC predictors can easily and agnostically selected using the new `all_mic_predictors()`: |  | ||||||
|  |  | ||||||
| ```{r} |  | ||||||
| # Split into training and testing sets |  | ||||||
| set.seed(123) |  | ||||||
| split <- initial_split(data) |  | ||||||
| training_data <- training(split) |  | ||||||
| testing_data <- testing(split) |  | ||||||
|  |  | ||||||
| # Define the recipe |  | ||||||
| mic_recipe <- recipe(esbl ~ ., data = training_data) %>% |  | ||||||
|   remove_role(genus, old_role = "predictor") %>%  # Remove non-informative variable |  | ||||||
|   step_mic_log2(all_mic_predictors()) #%>%         # Log2 transform all MIC predictors |  | ||||||
|  # prep() |  | ||||||
|  |  | ||||||
| mic_recipe |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| **Explanation:** |  | ||||||
|  |  | ||||||
| - `remove_role()`: Removes irrelevant variables like genus. |  | ||||||
| - `step_mic_log2()`: Applies `log2(as.numeric(...))` to all MIC predictors in one go. |  | ||||||
| - `prep()`: Finalises the recipe based on training data. |  | ||||||
|  |  | ||||||
| #### 2. Specifying the Model |  | ||||||
|  |  | ||||||
| We use a simple logistic regression to model ESBL presence, though recent models such as xgboost ([link to `parsnip` manual](https://parsnip.tidymodels.org/reference/details_boost_tree_xgboost.html)) could be much more precise. |  | ||||||
|  |  | ||||||
| ```{r} |  | ||||||
| # Define the model |  | ||||||
| model <- logistic_reg(mode = "classification") %>% |  | ||||||
|   set_engine("glm") |  | ||||||
|  |  | ||||||
| model |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| **Explanation:** |  | ||||||
|  |  | ||||||
| - `logistic_reg()`: Specifies a binary classification model. |  | ||||||
| - `set_engine("glm")`: Uses the base R GLM engine. |  | ||||||
|  |  | ||||||
| #### 3. Building the Workflow |  | ||||||
|  |  | ||||||
| ```{r} |  | ||||||
| # Create workflow |  | ||||||
| workflow_model <- workflow() %>% |  | ||||||
|   add_recipe(mic_recipe) %>% |  | ||||||
|   add_model(model) |  | ||||||
|  |  | ||||||
| workflow_model |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| ### **Training and Evaluating the Model** |  | ||||||
|  |  | ||||||
| ```{r} |  | ||||||
| # Fit the model |  | ||||||
| fitted <- fit(workflow_model, training_data) |  | ||||||
|  |  | ||||||
| # Generate predictions |  | ||||||
| predictions <- predict(fitted, testing_data) %>% |  | ||||||
|   bind_cols(testing_data) |  | ||||||
|  |  | ||||||
| # Evaluate model performance |  | ||||||
| our_metrics <- metric_set(accuracy, kap, ppv, npv) |  | ||||||
| metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class) |  | ||||||
|  |  | ||||||
| metrics |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| **Explanation:** |  | ||||||
|  |  | ||||||
| - `fit()`: Trains the model on the processed training data. |  | ||||||
| - `predict()`: Produces predictions for unseen test data. |  | ||||||
| - `metric_set()`: Allows evaluating multiple classification metrics. |  | ||||||
|  |  | ||||||
| It appears we can predict ESBL gene presence with a positive predictive value (PPV) of `r round(metrics$.estimate[3], 3) * 100`% and a negative predictive value (NPV) of `r round(metrics$.estimate[4], 3) * 100` using a simplistic logistic regression model. |  | ||||||
|  |  | ||||||
| ### **Visualising Predictions** |  | ||||||
|  |  | ||||||
| We can visualise predictions by comparing predicted and actual ESBL status. |  | ||||||
|  |  | ||||||
| ```{r} |  | ||||||
| library(ggplot2) |  | ||||||
|  |  | ||||||
| ggplot(predictions, aes(x = esbl, fill = .pred_class)) + |  | ||||||
|   geom_bar(position = "stack") + |  | ||||||
|   labs(title = "Predicted vs Actual ESBL Status", |  | ||||||
|        x = "Actual ESBL", |  | ||||||
|        y = "Count") + |  | ||||||
|   theme_minimal() |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| <!-- |  | ||||||
|  |  | ||||||
| ### **Conclusion** |  | ||||||
|  |  | ||||||
| In this example, we showcased how the new `AMR`-specific recipe steps simplify working with `<mic>` columns in `tidymodels`. The `step_mic_log2()` transformation converts ordered MICs to log2-transformed numerics, improving compatibility with classification models. |  | ||||||
|  |  | ||||||
| This pipeline enables realistic, reproducible, and interpretable modelling of antimicrobial resistance data. |  | ||||||
|  |  | ||||||
| --> |  | ||||||
|  |  | ||||||
| --- | --- | ||||||
|  |  | ||||||
|  |  | ||||||
| ## Example 3: Predicting AMR Over Time | ## Example 2: Predicting AMR Over Time | ||||||
|  |  | ||||||
| In this third example, we aim to predict antimicrobial resistance (AMR) trends over time using `tidymodels`. We will model resistance to three antibiotics (amoxicillin `AMX`, amoxicillin-clavulanic acid `AMC`, and ciprofloxacin `CIP`), based on historical data grouped by year and hospital ward. | In this third example, we aim to predict antimicrobial resistance (AMR) trends over time using `tidymodels`. We will model resistance to three antibiotics (amoxicillin `AMX`, amoxicillin-clavulanic acid `AMC`, and ciprofloxacin `CIP`), based on historical data grouped by year and hospital ward. | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user