1
0
mirror of https://github.com/msberends/AMR.git synced 2026-04-28 12:23:54 +02:00

Add add_if_missing parameter to control NA handling in interpretive rules (#264)

This commit is contained in:
Matthijs Berends
2026-04-21 21:53:43 +02:00
committed by GitHub
parent fb8758f36b
commit 8ff5d4472a
46 changed files with 1232 additions and 1016 deletions

View File

@@ -268,7 +268,8 @@ To create a traditional antibiogram, simply state which antibiotics should be us
```{r trad}
antibiogram(example_isolates,
antibiotics = c(aminoglycosides(), carbapenems()))
antibiotics = c(aminoglycosides(), carbapenems())
)
```
Notice that the `antibiogram()` function automatically prints in the right format when using Quarto or R Markdown (such as this page), and even applies italics for taxonomic names (by using `italicise_taxonomy()` internally).
@@ -277,10 +278,11 @@ It also uses the language of your OS if this is either `r AMR:::vector_or(vapply
```{r trad2}
antibiogram(example_isolates,
mo_transform = "gramstain",
antibiotics = aminoglycosides(),
ab_transform = "name",
language = "es")
mo_transform = "gramstain",
antibiotics = aminoglycosides(),
ab_transform = "name",
language = "es"
)
```
### Combined Antibiogram
@@ -289,8 +291,9 @@ To create a combined antibiogram, use antibiotic codes or names with a plus `+`
```{r comb}
combined_ab <- antibiogram(example_isolates,
antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"),
ab_transform = NULL)
antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"),
ab_transform = NULL
)
combined_ab
```
@@ -300,8 +303,9 @@ To create a syndromic antibiogram, the `syndromic_group` argument must be used.
```{r synd}
antibiogram(example_isolates,
antibiotics = c(aminoglycosides(), carbapenems()),
syndromic_group = "ward")
antibiotics = c(aminoglycosides(), carbapenems()),
syndromic_group = "ward"
)
```
### Weighted-Incidence Syndromic Combination Antibiogram (WISCA)
@@ -310,8 +314,10 @@ To create a **Weighted-Incidence Syndromic Combination Antibiogram (WISCA)**, si
```{r wisca}
example_isolates %>%
wisca(antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"),
minimum = 10) # Recommended threshold: ≥30
wisca(
antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"),
minimum = 10
) # Recommended threshold: ≥30
```
WISCA uses a **Bayesian decision model** to integrate data from multiple pathogens, improving empirical therapy guidance, especially for low-incidence infections. It is **pathogen-agnostic**, meaning results are syndrome-based rather than stratified by microorganism.
@@ -323,8 +329,10 @@ For **patient- or syndrome-specific WISCA**, run the function on a grouped `tibb
```{r wisca_grouped}
example_isolates %>%
top_n_microorganisms(n = 10) %>%
group_by(age_group = age_groups(age, c(25, 50, 75)),
gender) %>%
group_by(
age_group = age_groups(age, c(25, 50, 75)),
gender
) %>%
wisca(antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"))
```
@@ -379,17 +387,21 @@ We can visualise MIC distributions and their SIR interpretations using `ggplot2`
```{r mic_plot}
# add a group
my_data$group <- rep(c("A", "B", "C", "D"), each = 25)
my_data$group <- rep(c("A", "B", "C", "D"), each = 25)
ggplot(my_data,
aes(x = group, y = MIC, colour = SIR)) +
ggplot(
my_data,
aes(x = group, y = MIC, colour = SIR)
) +
geom_jitter(width = 0.2, size = 2) +
geom_boxplot(fill = NA, colour = "grey40") +
scale_y_mic() +
scale_colour_sir() +
labs(title = "MIC Distribution and SIR Interpretation",
x = "Sample Groups",
y = "MIC (mg/L)")
labs(
title = "MIC Distribution and SIR Interpretation",
x = "Sample Groups",
y = "MIC (mg/L)"
)
```
This plot provides an intuitive way to assess susceptibility patterns across different groups while incorporating clinical breakpoints.

View File

@@ -53,8 +53,8 @@ We begin by loading the required libraries and preparing the `example_isolates`
```{r lib packages, message = FALSE, warning = FALSE, results = 'asis'}
# Load required libraries
library(AMR) # For AMR data analysis
library(tidymodels) # For machine learning workflows, and data manipulation (dplyr, tidyr, ...)
library(AMR) # For AMR data analysis
library(tidymodels) # For machine learning workflows, and data manipulation (dplyr, tidyr, ...)
```
Prepare the data:
@@ -68,13 +68,19 @@ data <- example_isolates %>%
# select AB results dynamically
select(mo, aminoglycosides(), betalactams()) %>%
# replace NAs with NI (not-interpretable)
mutate(across(where(is.sir),
~replace_na(.x, "NI")),
# make factors of SIR columns
across(where(is.sir),
as.integer),
# get Gramstain of microorganisms
mo = as.factor(mo_gramstain(mo))) %>%
mutate(
across(
where(is.sir),
~ replace_na(.x, "NI")
),
# make factors of SIR columns
across(
where(is.sir),
as.integer
),
# get Gramstain of microorganisms
mo = as.factor(mo_gramstain(mo))
) %>%
# drop NAs - the ones without a Gramstain (fungi, etc.)
drop_na()
```
@@ -149,7 +155,7 @@ To train the model, we split the data into training and testing sets. Then, we f
set.seed(123) # For reproducibility
data_split <- initial_split(data, prop = 0.8) # 80% training, 20% testing
training_data <- training(data_split) # Training set
testing_data <- testing(data_split) # Testing set
testing_data <- testing(data_split) # Testing set
# Fit the workflow to the training data
fitted_workflow <- resistance_workflow %>%
@@ -168,7 +174,7 @@ Next, we evaluate the model on the testing data.
```{r}
# Make predictions on the testing set
predictions <- fitted_workflow %>%
predict(testing_data) # Generate predictions
predict(testing_data) # Generate predictions
probabilities <- fitted_workflow %>%
predict(testing_data, type = "prob") # Generate probabilities
@@ -266,8 +272,8 @@ testing_data <- testing(split)
# Define the recipe
mic_recipe <- recipe(esbl ~ ., data = training_data) %>%
remove_role(genus, old_role = "predictor") %>% # Remove non-informative variable
step_mic_log2(all_mic_predictors()) # Log2 transform all MIC predictors
remove_role(genus, old_role = "predictor") %>% # Remove non-informative variable
step_mic_log2(all_mic_predictors()) # Log2 transform all MIC predictors
prep(mic_recipe)
```
@@ -341,9 +347,11 @@ library(ggplot2)
ggplot(predictions, aes(x = esbl, fill = .pred_class)) +
geom_bar(position = "stack") +
labs(title = "Predicted vs Actual ESBL Status",
x = "Actual ESBL",
y = "Count") +
labs(
title = "Predicted vs Actual ESBL Status",
x = "Actual ESBL",
y = "Count"
) +
theme_minimal()
```
@@ -351,18 +359,27 @@ And plot the certainties too - how certain were the actual predictions?
```{r}
predictions %>%
mutate(certainty = ifelse(.pred_class == "FALSE",
.pred_FALSE,
.pred_TRUE),
correct = ifelse(esbl == .pred_class, "Right", "Wrong")) %>%
ggplot(aes(x = seq_len(nrow(predictions)),
y = certainty,
colour = correct)) +
scale_colour_manual(values = c(Right = "green3", Wrong = "red2"),
name = "Correct?") +
mutate(
certainty = ifelse(.pred_class == "FALSE",
.pred_FALSE,
.pred_TRUE
),
correct = ifelse(esbl == .pred_class, "Right", "Wrong")
) %>%
ggplot(aes(
x = seq_len(nrow(predictions)),
y = certainty,
colour = correct
)) +
scale_colour_manual(
values = c(Right = "green3", Wrong = "red2"),
name = "Correct?"
) +
geom_point() +
scale_y_continuous(labels = function(x) paste0(x * 100, "%"),
limits = c(0.5, 1)) +
scale_y_continuous(
labels = function(x) paste0(x * 100, "%"),
limits = c(0.5, 1)
) +
theme_minimal()
```
@@ -399,13 +416,18 @@ library(tidymodels)
# Transform dataset
data_time <- example_isolates %>%
top_n_microorganisms(n = 10) %>% # Filter on the top #10 species
mutate(year = as.integer(format(date, "%Y")), # Extract year from date
gramstain = mo_gramstain(mo)) %>% # Get taxonomic names
mutate(
year = as.integer(format(date, "%Y")), # Extract year from date
gramstain = mo_gramstain(mo)
) %>% # Get taxonomic names
group_by(year, gramstain) %>%
summarise(across(c(AMX, AMC, CIP),
function(x) resistance(x, minimum = 0),
.names = "res_{.col}"),
.groups = "drop") %>%
summarise(
across(c(AMX, AMC, CIP),
function(x) resistance(x, minimum = 0),
.names = "res_{.col}"
),
.groups = "drop"
) %>%
filter(!is.na(res_AMX) & !is.na(res_AMC) & !is.na(res_CIP)) # Drop missing values
data_time
@@ -426,9 +448,9 @@ We now define the modelling workflow, which consists of a preprocessing step, a
```{r}
# Define the recipe
resistance_recipe_time <- recipe(res_AMX ~ year + gramstain, data = data_time) %>%
step_dummy(gramstain, one_hot = TRUE) %>% # Convert categorical to numerical
step_normalize(year) %>% # Normalise year for better model performance
step_nzv(all_predictors()) # Remove near-zero variance predictors
step_dummy(gramstain, one_hot = TRUE) %>% # Convert categorical to numerical
step_normalize(year) %>% # Normalise year for better model performance
step_nzv(all_predictors()) # Remove near-zero variance predictors
resistance_recipe_time
```
@@ -514,9 +536,11 @@ library(ggplot2)
ggplot(predictions_time, aes(x = year)) +
geom_point(aes(y = res_AMX, color = "Actual")) +
geom_line(aes(y = .pred, color = "Predicted")) +
labs(title = "Predicted vs Actual AMX Resistance Over Time",
x = "Year",
y = "Resistance Proportion") +
labs(
title = "Predicted vs Actual AMX Resistance Over Time",
x = "Year",
y = "Resistance Proportion"
) +
theme_minimal()
```
@@ -525,13 +549,17 @@ Additionally, we can visualise resistance trends in `ggplot2` and directly add l
```{r}
ggplot(data_time, aes(x = year, y = res_AMX, color = gramstain)) +
geom_line() +
labs(title = "AMX Resistance Trends",
x = "Year",
y = "Resistance Proportion") +
labs(
title = "AMX Resistance Trends",
x = "Year",
y = "Resistance Proportion"
) +
# add a linear model directly in ggplot2:
geom_smooth(method = "lm",
formula = y ~ x,
alpha = 0.25) +
geom_smooth(
method = "lm",
formula = y ~ x,
alpha = 0.25
) +
theme_minimal()
```

View File

@@ -80,7 +80,7 @@ data <- tibble::tibble(
CAZ = "-", # Ceftazidime
CXM = "-", # Cefuroxime
PEN = "S", # Benzylenicillin
FOX = "S" # Cefoxitin
FOX = "S" # Cefoxitin
)
```
```{r, eval = FALSE}

View File

@@ -147,31 +147,35 @@ data$syndrome <- ifelse(data$mo %like% "coli", "UTI", "No UTI")
```{r}
wisca(data,
antimicrobials = c("AMC", "CIP", "GEN"))
antimicrobials = c("AMC", "CIP", "GEN")
)
```
### Use combination regimens
```{r}
wisca(data,
antimicrobials = c("AMC", "AMC + CIP", "AMC + GEN"))
antimicrobials = c("AMC", "AMC + CIP", "AMC + GEN")
)
```
### Stratify by syndrome
```{r}
wisca(data,
antimicrobials = c("AMC", "AMC + CIP", "AMC + GEN"),
syndromic_group = "syndrome")
antimicrobials = c("AMC", "AMC + CIP", "AMC + GEN"),
syndromic_group = "syndrome"
)
```
The `AMR` package is available in `r length(AMR:::LANGUAGES_SUPPORTED)` languages, which can all be used for the `wisca()` function too:
```{r}
wisca(data,
antimicrobials = c("AMC", "AMC + CIP", "AMC + GEN"),
syndromic_group = gsub("UTI", "UCI", data$syndrome),
language = "Spanish")
antimicrobials = c("AMC", "AMC + CIP", "AMC + GEN"),
syndromic_group = gsub("UTI", "UCI", data$syndrome),
language = "Spanish"
)
```