Add add_if_missing parameter to control NA handling in interpretive rules (#264)

2026-06-22 15:36:18 +02:00 · 2026-04-21 21:53:43 +02:00
parent fb8758f36b
commit 8ff5d4472a
46 changed files with 1232 additions and 1016 deletions
--- a/vignettes/AMR.Rmd
+++ b/vignettes/AMR.Rmd
@@ -268,7 +268,8 @@ To create a traditional antibiogram, simply state which antibiotics should be us

 ```{r trad}
 antibiogram(example_isolates,
-            antibiotics = c(aminoglycosides(), carbapenems()))
+  antibiotics = c(aminoglycosides(), carbapenems())
+)
 ```

 Notice that the `antibiogram()` function automatically prints in the right format when using Quarto or R Markdown (such as this page), and even applies italics for taxonomic names (by using `italicise_taxonomy()` internally).
@@ -277,10 +278,11 @@ It also uses the language of your OS if this is either `r AMR:::vector_or(vapply

 ```{r trad2}
 antibiogram(example_isolates,
-            mo_transform = "gramstain",
-            antibiotics = aminoglycosides(),
-            ab_transform = "name",
-            language = "es")
+  mo_transform = "gramstain",
+  antibiotics = aminoglycosides(),
+  ab_transform = "name",
+  language = "es"
+)
 ```

 ### Combined Antibiogram
@@ -289,8 +291,9 @@ To create a combined antibiogram, use antibiotic codes or names with a plus `+`

 ```{r comb}
 combined_ab <- antibiogram(example_isolates,
-                           antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"),
-                           ab_transform = NULL)
+  antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"),
+  ab_transform = NULL
+)
 combined_ab
 ```

@@ -300,8 +303,9 @@ To create a syndromic antibiogram, the `syndromic_group` argument must be used.

 ```{r synd}
 antibiogram(example_isolates,
-            antibiotics = c(aminoglycosides(), carbapenems()),
-            syndromic_group = "ward")
+  antibiotics = c(aminoglycosides(), carbapenems()),
+  syndromic_group = "ward"
+)
 ```

 ### Weighted-Incidence Syndromic Combination Antibiogram (WISCA)
@@ -310,8 +314,10 @@ To create a **Weighted-Incidence Syndromic Combination Antibiogram (WISCA)**, si

 ```{r wisca}
 example_isolates %>%
-  wisca(antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"),
-        minimum = 10) # Recommended threshold: ≥30
+  wisca(
+    antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"),
+    minimum = 10
+  ) # Recommended threshold: ≥30
 ```

 WISCA uses a **Bayesian decision model** to integrate data from multiple pathogens, improving empirical therapy guidance, especially for low-incidence infections. It is **pathogen-agnostic**, meaning results are syndrome-based rather than stratified by microorganism.
@@ -323,8 +329,10 @@ For **patient- or syndrome-specific WISCA**, run the function on a grouped `tibb
 ```{r wisca_grouped}
 example_isolates %>%
  top_n_microorganisms(n = 10) %>%
-  group_by(age_group = age_groups(age, c(25, 50, 75)),
-           gender) %>%
+  group_by(
+    age_group = age_groups(age, c(25, 50, 75)),
+    gender
+  ) %>%
  wisca(antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"))
 ```

@@ -379,17 +387,21 @@ We can visualise MIC distributions and their SIR interpretations using `ggplot2`

 ```{r mic_plot}
 # add a group
-my_data$group <- rep(c("A", "B", "C", "D"), each = 25) 
+my_data$group <- rep(c("A", "B", "C", "D"), each = 25)

-ggplot(my_data,
-       aes(x = group, y = MIC, colour = SIR)) +
+ggplot(
+  my_data,
+  aes(x = group, y = MIC, colour = SIR)
+) +
  geom_jitter(width = 0.2, size = 2) +
  geom_boxplot(fill = NA, colour = "grey40") +
  scale_y_mic() +
  scale_colour_sir() +
-  labs(title = "MIC Distribution and SIR Interpretation",
-       x = "Sample Groups",
-       y = "MIC (mg/L)")
+  labs(
+    title = "MIC Distribution and SIR Interpretation",
+    x = "Sample Groups",
+    y = "MIC (mg/L)"
+  )
 ```

 This plot provides an intuitive way to assess susceptibility patterns across different groups while incorporating clinical breakpoints.  
--- a/vignettes/AMR_with_tidymodels.Rmd
+++ b/vignettes/AMR_with_tidymodels.Rmd
@@ -53,8 +53,8 @@ We begin by loading the required libraries and preparing the `example_isolates`

 ```{r lib packages, message = FALSE, warning = FALSE, results = 'asis'}
 # Load required libraries
-library(AMR)          # For AMR data analysis
-library(tidymodels)   # For machine learning workflows, and data manipulation (dplyr, tidyr, ...)
+library(AMR) # For AMR data analysis
+library(tidymodels) # For machine learning workflows, and data manipulation (dplyr, tidyr, ...)
 ```

 Prepare the data:
@@ -68,13 +68,19 @@ data <- example_isolates %>%
  # select AB results dynamically
  select(mo, aminoglycosides(), betalactams()) %>%
  # replace NAs with NI (not-interpretable)
-   mutate(across(where(is.sir),
-                 ~replace_na(.x, "NI")),
-          # make factors of SIR columns
-          across(where(is.sir),
-                 as.integer),
-          # get Gramstain of microorganisms
-          mo = as.factor(mo_gramstain(mo))) %>%
+  mutate(
+    across(
+      where(is.sir),
+      ~ replace_na(.x, "NI")
+    ),
+    # make factors of SIR columns
+    across(
+      where(is.sir),
+      as.integer
+    ),
+    # get Gramstain of microorganisms
+    mo = as.factor(mo_gramstain(mo))
+  ) %>%
  # drop NAs - the ones without a Gramstain (fungi, etc.)
  drop_na()
 ```
@@ -149,7 +155,7 @@ To train the model, we split the data into training and testing sets. Then, we f
 set.seed(123) # For reproducibility
 data_split <- initial_split(data, prop = 0.8) # 80% training, 20% testing
 training_data <- training(data_split) # Training set
-testing_data <- testing(data_split)   # Testing set
+testing_data <- testing(data_split) # Testing set

 # Fit the workflow to the training data
 fitted_workflow <- resistance_workflow %>%
@@ -168,7 +174,7 @@ Next, we evaluate the model on the testing data.
 ```{r}
 # Make predictions on the testing set
 predictions <- fitted_workflow %>%
-  predict(testing_data)                # Generate predictions
+  predict(testing_data) # Generate predictions
 probabilities <- fitted_workflow %>%
  predict(testing_data, type = "prob") # Generate probabilities

@@ -266,8 +272,8 @@ testing_data <- testing(split)

 # Define the recipe
 mic_recipe <- recipe(esbl ~ ., data = training_data) %>%
-  remove_role(genus, old_role = "predictor") %>%  # Remove non-informative variable
-  step_mic_log2(all_mic_predictors())             # Log2 transform all MIC predictors
+  remove_role(genus, old_role = "predictor") %>% # Remove non-informative variable
+  step_mic_log2(all_mic_predictors()) # Log2 transform all MIC predictors

 prep(mic_recipe)
 ```
@@ -341,9 +347,11 @@ library(ggplot2)

 ggplot(predictions, aes(x = esbl, fill = .pred_class)) +
  geom_bar(position = "stack") +
-  labs(title = "Predicted vs Actual ESBL Status",
-       x = "Actual ESBL",
-       y = "Count") +
+  labs(
+    title = "Predicted vs Actual ESBL Status",
+    x = "Actual ESBL",
+    y = "Count"
+  ) +
  theme_minimal()
 ```

@@ -351,18 +359,27 @@ And plot the certainties too - how certain were the actual predictions?

 ```{r}
 predictions %>%
-  mutate(certainty = ifelse(.pred_class == "FALSE",
-                            .pred_FALSE,
-                            .pred_TRUE),
-         correct = ifelse(esbl == .pred_class, "Right", "Wrong")) %>%
-  ggplot(aes(x = seq_len(nrow(predictions)),
-             y = certainty,
-             colour = correct)) +
-  scale_colour_manual(values = c(Right = "green3", Wrong = "red2"),
-                      name = "Correct?") +
+  mutate(
+    certainty = ifelse(.pred_class == "FALSE",
+      .pred_FALSE,
+      .pred_TRUE
+    ),
+    correct = ifelse(esbl == .pred_class, "Right", "Wrong")
+  ) %>%
+  ggplot(aes(
+    x = seq_len(nrow(predictions)),
+    y = certainty,
+    colour = correct
+  )) +
+  scale_colour_manual(
+    values = c(Right = "green3", Wrong = "red2"),
+    name = "Correct?"
+  ) +
  geom_point() +
-  scale_y_continuous(labels = function(x) paste0(x * 100, "%"),
-                     limits = c(0.5, 1)) +
+  scale_y_continuous(
+    labels = function(x) paste0(x * 100, "%"),
+    limits = c(0.5, 1)
+  ) +
  theme_minimal()
 ```

@@ -399,13 +416,18 @@ library(tidymodels)
 # Transform dataset
 data_time <- example_isolates %>%
  top_n_microorganisms(n = 10) %>% # Filter on the top #10 species
-  mutate(year = as.integer(format(date, "%Y")),  # Extract year from date
-         gramstain = mo_gramstain(mo)) %>% # Get taxonomic names
+  mutate(
+    year = as.integer(format(date, "%Y")), # Extract year from date
+    gramstain = mo_gramstain(mo)
+  ) %>% # Get taxonomic names
  group_by(year, gramstain) %>%
-  summarise(across(c(AMX, AMC, CIP), 
-                   function(x) resistance(x, minimum = 0),
-                   .names = "res_{.col}"), 
-            .groups = "drop") %>% 
+  summarise(
+    across(c(AMX, AMC, CIP),
+      function(x) resistance(x, minimum = 0),
+      .names = "res_{.col}"
+    ),
+    .groups = "drop"
+  ) %>%
  filter(!is.na(res_AMX) & !is.na(res_AMC) & !is.na(res_CIP)) # Drop missing values

 data_time
@@ -426,9 +448,9 @@ We now define the modelling workflow, which consists of a preprocessing step, a
 ```{r}
 # Define the recipe
 resistance_recipe_time <- recipe(res_AMX ~ year + gramstain, data = data_time) %>%
-  step_dummy(gramstain, one_hot = TRUE) %>%  # Convert categorical to numerical
-  step_normalize(year) %>%  # Normalise year for better model performance
-  step_nzv(all_predictors())  # Remove near-zero variance predictors
+  step_dummy(gramstain, one_hot = TRUE) %>% # Convert categorical to numerical
+  step_normalize(year) %>% # Normalise year for better model performance
+  step_nzv(all_predictors()) # Remove near-zero variance predictors

 resistance_recipe_time
 ```
@@ -514,9 +536,11 @@ library(ggplot2)
 ggplot(predictions_time, aes(x = year)) +
  geom_point(aes(y = res_AMX, color = "Actual")) +
  geom_line(aes(y = .pred, color = "Predicted")) +
-  labs(title = "Predicted vs Actual AMX Resistance Over Time",
-       x = "Year",
-       y = "Resistance Proportion") +
+  labs(
+    title = "Predicted vs Actual AMX Resistance Over Time",
+    x = "Year",
+    y = "Resistance Proportion"
+  ) +
  theme_minimal()
 ```

@@ -525,13 +549,17 @@ Additionally, we can visualise resistance trends in `ggplot2` and directly add l
 ```{r}
 ggplot(data_time, aes(x = year, y = res_AMX, color = gramstain)) +
  geom_line() +
-  labs(title = "AMX Resistance Trends",
-       x = "Year",
-       y = "Resistance Proportion") +
+  labs(
+    title = "AMX Resistance Trends",
+    x = "Year",
+    y = "Resistance Proportion"
+  ) +
  # add a linear model directly in ggplot2:
-  geom_smooth(method = "lm",
-              formula = y ~ x,
-              alpha = 0.25) +
+  geom_smooth(
+    method = "lm",
+    formula = y ~ x,
+    alpha = 0.25
+  ) +
  theme_minimal()
 ```

--- a/vignettes/EUCAST.Rmd
+++ b/vignettes/EUCAST.Rmd
@@ -80,7 +80,7 @@ data <- tibble::tibble(
  CAZ = "-", # Ceftazidime
  CXM = "-", # Cefuroxime
  PEN = "S", # Benzylenicillin
-  FOX = "S"  # Cefoxitin
+  FOX = "S" # Cefoxitin
 )
 ```
 ```{r, eval = FALSE}
--- a/vignettes/WISCA.Rmd
+++ b/vignettes/WISCA.Rmd
@@ -147,31 +147,35 @@ data$syndrome <- ifelse(data$mo %like% "coli", "UTI", "No UTI")

 ```{r}
 wisca(data,
-      antimicrobials = c("AMC", "CIP", "GEN"))
+  antimicrobials = c("AMC", "CIP", "GEN")
+)
 ```

 ### Use combination regimens

 ```{r}
 wisca(data,
-      antimicrobials = c("AMC", "AMC + CIP", "AMC + GEN"))
+  antimicrobials = c("AMC", "AMC + CIP", "AMC + GEN")
+)
 ```

 ### Stratify by syndrome

 ```{r}
 wisca(data,
-      antimicrobials = c("AMC", "AMC + CIP", "AMC + GEN"),
-      syndromic_group = "syndrome")
+  antimicrobials = c("AMC", "AMC + CIP", "AMC + GEN"),
+  syndromic_group = "syndrome"
+)
 ```

 The `AMR` package is available in `r length(AMR:::LANGUAGES_SUPPORTED)` languages, which can all be used for the `wisca()` function too:

 ```{r}
 wisca(data,
-      antimicrobials = c("AMC", "AMC + CIP", "AMC + GEN"),
-      syndromic_group = gsub("UTI", "UCI", data$syndrome),
-      language = "Spanish")
+  antimicrobials = c("AMC", "AMC + CIP", "AMC + GEN"),
+  syndromic_group = gsub("UTI", "UCI", data$syndrome),
+  language = "Spanish"
+)
 ```