mirror of
https://github.com/msberends/AMR.git
synced 2026-04-28 12:23:54 +02:00
Add add_if_missing parameter to control NA handling in interpretive rules (#264)
This commit is contained in:
@@ -268,7 +268,8 @@ To create a traditional antibiogram, simply state which antibiotics should be us
|
||||
|
||||
```{r trad}
|
||||
antibiogram(example_isolates,
|
||||
antibiotics = c(aminoglycosides(), carbapenems()))
|
||||
antibiotics = c(aminoglycosides(), carbapenems())
|
||||
)
|
||||
```
|
||||
|
||||
Notice that the `antibiogram()` function automatically prints in the right format when using Quarto or R Markdown (such as this page), and even applies italics for taxonomic names (by using `italicise_taxonomy()` internally).
|
||||
@@ -277,10 +278,11 @@ It also uses the language of your OS if this is either `r AMR:::vector_or(vapply
|
||||
|
||||
```{r trad2}
|
||||
antibiogram(example_isolates,
|
||||
mo_transform = "gramstain",
|
||||
antibiotics = aminoglycosides(),
|
||||
ab_transform = "name",
|
||||
language = "es")
|
||||
mo_transform = "gramstain",
|
||||
antibiotics = aminoglycosides(),
|
||||
ab_transform = "name",
|
||||
language = "es"
|
||||
)
|
||||
```
|
||||
|
||||
### Combined Antibiogram
|
||||
@@ -289,8 +291,9 @@ To create a combined antibiogram, use antibiotic codes or names with a plus `+`
|
||||
|
||||
```{r comb}
|
||||
combined_ab <- antibiogram(example_isolates,
|
||||
antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"),
|
||||
ab_transform = NULL)
|
||||
antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"),
|
||||
ab_transform = NULL
|
||||
)
|
||||
combined_ab
|
||||
```
|
||||
|
||||
@@ -300,8 +303,9 @@ To create a syndromic antibiogram, the `syndromic_group` argument must be used.
|
||||
|
||||
```{r synd}
|
||||
antibiogram(example_isolates,
|
||||
antibiotics = c(aminoglycosides(), carbapenems()),
|
||||
syndromic_group = "ward")
|
||||
antibiotics = c(aminoglycosides(), carbapenems()),
|
||||
syndromic_group = "ward"
|
||||
)
|
||||
```
|
||||
|
||||
### Weighted-Incidence Syndromic Combination Antibiogram (WISCA)
|
||||
@@ -310,8 +314,10 @@ To create a **Weighted-Incidence Syndromic Combination Antibiogram (WISCA)**, si
|
||||
|
||||
```{r wisca}
|
||||
example_isolates %>%
|
||||
wisca(antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"),
|
||||
minimum = 10) # Recommended threshold: ≥30
|
||||
wisca(
|
||||
antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"),
|
||||
minimum = 10
|
||||
) # Recommended threshold: ≥30
|
||||
```
|
||||
|
||||
WISCA uses a **Bayesian decision model** to integrate data from multiple pathogens, improving empirical therapy guidance, especially for low-incidence infections. It is **pathogen-agnostic**, meaning results are syndrome-based rather than stratified by microorganism.
|
||||
@@ -323,8 +329,10 @@ For **patient- or syndrome-specific WISCA**, run the function on a grouped `tibb
|
||||
```{r wisca_grouped}
|
||||
example_isolates %>%
|
||||
top_n_microorganisms(n = 10) %>%
|
||||
group_by(age_group = age_groups(age, c(25, 50, 75)),
|
||||
gender) %>%
|
||||
group_by(
|
||||
age_group = age_groups(age, c(25, 50, 75)),
|
||||
gender
|
||||
) %>%
|
||||
wisca(antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"))
|
||||
```
|
||||
|
||||
@@ -379,17 +387,21 @@ We can visualise MIC distributions and their SIR interpretations using `ggplot2`
|
||||
|
||||
```{r mic_plot}
|
||||
# add a group
|
||||
my_data$group <- rep(c("A", "B", "C", "D"), each = 25)
|
||||
my_data$group <- rep(c("A", "B", "C", "D"), each = 25)
|
||||
|
||||
ggplot(my_data,
|
||||
aes(x = group, y = MIC, colour = SIR)) +
|
||||
ggplot(
|
||||
my_data,
|
||||
aes(x = group, y = MIC, colour = SIR)
|
||||
) +
|
||||
geom_jitter(width = 0.2, size = 2) +
|
||||
geom_boxplot(fill = NA, colour = "grey40") +
|
||||
scale_y_mic() +
|
||||
scale_colour_sir() +
|
||||
labs(title = "MIC Distribution and SIR Interpretation",
|
||||
x = "Sample Groups",
|
||||
y = "MIC (mg/L)")
|
||||
labs(
|
||||
title = "MIC Distribution and SIR Interpretation",
|
||||
x = "Sample Groups",
|
||||
y = "MIC (mg/L)"
|
||||
)
|
||||
```
|
||||
|
||||
This plot provides an intuitive way to assess susceptibility patterns across different groups while incorporating clinical breakpoints.
|
||||
|
||||
@@ -53,8 +53,8 @@ We begin by loading the required libraries and preparing the `example_isolates`
|
||||
|
||||
```{r lib packages, message = FALSE, warning = FALSE, results = 'asis'}
|
||||
# Load required libraries
|
||||
library(AMR) # For AMR data analysis
|
||||
library(tidymodels) # For machine learning workflows, and data manipulation (dplyr, tidyr, ...)
|
||||
library(AMR) # For AMR data analysis
|
||||
library(tidymodels) # For machine learning workflows, and data manipulation (dplyr, tidyr, ...)
|
||||
```
|
||||
|
||||
Prepare the data:
|
||||
@@ -68,13 +68,19 @@ data <- example_isolates %>%
|
||||
# select AB results dynamically
|
||||
select(mo, aminoglycosides(), betalactams()) %>%
|
||||
# replace NAs with NI (not-interpretable)
|
||||
mutate(across(where(is.sir),
|
||||
~replace_na(.x, "NI")),
|
||||
# make factors of SIR columns
|
||||
across(where(is.sir),
|
||||
as.integer),
|
||||
# get Gramstain of microorganisms
|
||||
mo = as.factor(mo_gramstain(mo))) %>%
|
||||
mutate(
|
||||
across(
|
||||
where(is.sir),
|
||||
~ replace_na(.x, "NI")
|
||||
),
|
||||
# make factors of SIR columns
|
||||
across(
|
||||
where(is.sir),
|
||||
as.integer
|
||||
),
|
||||
# get Gramstain of microorganisms
|
||||
mo = as.factor(mo_gramstain(mo))
|
||||
) %>%
|
||||
# drop NAs - the ones without a Gramstain (fungi, etc.)
|
||||
drop_na()
|
||||
```
|
||||
@@ -149,7 +155,7 @@ To train the model, we split the data into training and testing sets. Then, we f
|
||||
set.seed(123) # For reproducibility
|
||||
data_split <- initial_split(data, prop = 0.8) # 80% training, 20% testing
|
||||
training_data <- training(data_split) # Training set
|
||||
testing_data <- testing(data_split) # Testing set
|
||||
testing_data <- testing(data_split) # Testing set
|
||||
|
||||
# Fit the workflow to the training data
|
||||
fitted_workflow <- resistance_workflow %>%
|
||||
@@ -168,7 +174,7 @@ Next, we evaluate the model on the testing data.
|
||||
```{r}
|
||||
# Make predictions on the testing set
|
||||
predictions <- fitted_workflow %>%
|
||||
predict(testing_data) # Generate predictions
|
||||
predict(testing_data) # Generate predictions
|
||||
probabilities <- fitted_workflow %>%
|
||||
predict(testing_data, type = "prob") # Generate probabilities
|
||||
|
||||
@@ -266,8 +272,8 @@ testing_data <- testing(split)
|
||||
|
||||
# Define the recipe
|
||||
mic_recipe <- recipe(esbl ~ ., data = training_data) %>%
|
||||
remove_role(genus, old_role = "predictor") %>% # Remove non-informative variable
|
||||
step_mic_log2(all_mic_predictors()) # Log2 transform all MIC predictors
|
||||
remove_role(genus, old_role = "predictor") %>% # Remove non-informative variable
|
||||
step_mic_log2(all_mic_predictors()) # Log2 transform all MIC predictors
|
||||
|
||||
prep(mic_recipe)
|
||||
```
|
||||
@@ -341,9 +347,11 @@ library(ggplot2)
|
||||
|
||||
ggplot(predictions, aes(x = esbl, fill = .pred_class)) +
|
||||
geom_bar(position = "stack") +
|
||||
labs(title = "Predicted vs Actual ESBL Status",
|
||||
x = "Actual ESBL",
|
||||
y = "Count") +
|
||||
labs(
|
||||
title = "Predicted vs Actual ESBL Status",
|
||||
x = "Actual ESBL",
|
||||
y = "Count"
|
||||
) +
|
||||
theme_minimal()
|
||||
```
|
||||
|
||||
@@ -351,18 +359,27 @@ And plot the certainties too - how certain were the actual predictions?
|
||||
|
||||
```{r}
|
||||
predictions %>%
|
||||
mutate(certainty = ifelse(.pred_class == "FALSE",
|
||||
.pred_FALSE,
|
||||
.pred_TRUE),
|
||||
correct = ifelse(esbl == .pred_class, "Right", "Wrong")) %>%
|
||||
ggplot(aes(x = seq_len(nrow(predictions)),
|
||||
y = certainty,
|
||||
colour = correct)) +
|
||||
scale_colour_manual(values = c(Right = "green3", Wrong = "red2"),
|
||||
name = "Correct?") +
|
||||
mutate(
|
||||
certainty = ifelse(.pred_class == "FALSE",
|
||||
.pred_FALSE,
|
||||
.pred_TRUE
|
||||
),
|
||||
correct = ifelse(esbl == .pred_class, "Right", "Wrong")
|
||||
) %>%
|
||||
ggplot(aes(
|
||||
x = seq_len(nrow(predictions)),
|
||||
y = certainty,
|
||||
colour = correct
|
||||
)) +
|
||||
scale_colour_manual(
|
||||
values = c(Right = "green3", Wrong = "red2"),
|
||||
name = "Correct?"
|
||||
) +
|
||||
geom_point() +
|
||||
scale_y_continuous(labels = function(x) paste0(x * 100, "%"),
|
||||
limits = c(0.5, 1)) +
|
||||
scale_y_continuous(
|
||||
labels = function(x) paste0(x * 100, "%"),
|
||||
limits = c(0.5, 1)
|
||||
) +
|
||||
theme_minimal()
|
||||
```
|
||||
|
||||
@@ -399,13 +416,18 @@ library(tidymodels)
|
||||
# Transform dataset
|
||||
data_time <- example_isolates %>%
|
||||
top_n_microorganisms(n = 10) %>% # Filter on the top #10 species
|
||||
mutate(year = as.integer(format(date, "%Y")), # Extract year from date
|
||||
gramstain = mo_gramstain(mo)) %>% # Get taxonomic names
|
||||
mutate(
|
||||
year = as.integer(format(date, "%Y")), # Extract year from date
|
||||
gramstain = mo_gramstain(mo)
|
||||
) %>% # Get taxonomic names
|
||||
group_by(year, gramstain) %>%
|
||||
summarise(across(c(AMX, AMC, CIP),
|
||||
function(x) resistance(x, minimum = 0),
|
||||
.names = "res_{.col}"),
|
||||
.groups = "drop") %>%
|
||||
summarise(
|
||||
across(c(AMX, AMC, CIP),
|
||||
function(x) resistance(x, minimum = 0),
|
||||
.names = "res_{.col}"
|
||||
),
|
||||
.groups = "drop"
|
||||
) %>%
|
||||
filter(!is.na(res_AMX) & !is.na(res_AMC) & !is.na(res_CIP)) # Drop missing values
|
||||
|
||||
data_time
|
||||
@@ -426,9 +448,9 @@ We now define the modelling workflow, which consists of a preprocessing step, a
|
||||
```{r}
|
||||
# Define the recipe
|
||||
resistance_recipe_time <- recipe(res_AMX ~ year + gramstain, data = data_time) %>%
|
||||
step_dummy(gramstain, one_hot = TRUE) %>% # Convert categorical to numerical
|
||||
step_normalize(year) %>% # Normalise year for better model performance
|
||||
step_nzv(all_predictors()) # Remove near-zero variance predictors
|
||||
step_dummy(gramstain, one_hot = TRUE) %>% # Convert categorical to numerical
|
||||
step_normalize(year) %>% # Normalise year for better model performance
|
||||
step_nzv(all_predictors()) # Remove near-zero variance predictors
|
||||
|
||||
resistance_recipe_time
|
||||
```
|
||||
@@ -514,9 +536,11 @@ library(ggplot2)
|
||||
ggplot(predictions_time, aes(x = year)) +
|
||||
geom_point(aes(y = res_AMX, color = "Actual")) +
|
||||
geom_line(aes(y = .pred, color = "Predicted")) +
|
||||
labs(title = "Predicted vs Actual AMX Resistance Over Time",
|
||||
x = "Year",
|
||||
y = "Resistance Proportion") +
|
||||
labs(
|
||||
title = "Predicted vs Actual AMX Resistance Over Time",
|
||||
x = "Year",
|
||||
y = "Resistance Proportion"
|
||||
) +
|
||||
theme_minimal()
|
||||
```
|
||||
|
||||
@@ -525,13 +549,17 @@ Additionally, we can visualise resistance trends in `ggplot2` and directly add l
|
||||
```{r}
|
||||
ggplot(data_time, aes(x = year, y = res_AMX, color = gramstain)) +
|
||||
geom_line() +
|
||||
labs(title = "AMX Resistance Trends",
|
||||
x = "Year",
|
||||
y = "Resistance Proportion") +
|
||||
labs(
|
||||
title = "AMX Resistance Trends",
|
||||
x = "Year",
|
||||
y = "Resistance Proportion"
|
||||
) +
|
||||
# add a linear model directly in ggplot2:
|
||||
geom_smooth(method = "lm",
|
||||
formula = y ~ x,
|
||||
alpha = 0.25) +
|
||||
geom_smooth(
|
||||
method = "lm",
|
||||
formula = y ~ x,
|
||||
alpha = 0.25
|
||||
) +
|
||||
theme_minimal()
|
||||
```
|
||||
|
||||
|
||||
@@ -80,7 +80,7 @@ data <- tibble::tibble(
|
||||
CAZ = "-", # Ceftazidime
|
||||
CXM = "-", # Cefuroxime
|
||||
PEN = "S", # Benzylenicillin
|
||||
FOX = "S" # Cefoxitin
|
||||
FOX = "S" # Cefoxitin
|
||||
)
|
||||
```
|
||||
```{r, eval = FALSE}
|
||||
|
||||
@@ -147,31 +147,35 @@ data$syndrome <- ifelse(data$mo %like% "coli", "UTI", "No UTI")
|
||||
|
||||
```{r}
|
||||
wisca(data,
|
||||
antimicrobials = c("AMC", "CIP", "GEN"))
|
||||
antimicrobials = c("AMC", "CIP", "GEN")
|
||||
)
|
||||
```
|
||||
|
||||
### Use combination regimens
|
||||
|
||||
```{r}
|
||||
wisca(data,
|
||||
antimicrobials = c("AMC", "AMC + CIP", "AMC + GEN"))
|
||||
antimicrobials = c("AMC", "AMC + CIP", "AMC + GEN")
|
||||
)
|
||||
```
|
||||
|
||||
### Stratify by syndrome
|
||||
|
||||
```{r}
|
||||
wisca(data,
|
||||
antimicrobials = c("AMC", "AMC + CIP", "AMC + GEN"),
|
||||
syndromic_group = "syndrome")
|
||||
antimicrobials = c("AMC", "AMC + CIP", "AMC + GEN"),
|
||||
syndromic_group = "syndrome"
|
||||
)
|
||||
```
|
||||
|
||||
The `AMR` package is available in `r length(AMR:::LANGUAGES_SUPPORTED)` languages, which can all be used for the `wisca()` function too:
|
||||
|
||||
```{r}
|
||||
wisca(data,
|
||||
antimicrobials = c("AMC", "AMC + CIP", "AMC + GEN"),
|
||||
syndromic_group = gsub("UTI", "UCI", data$syndrome),
|
||||
language = "Spanish")
|
||||
antimicrobials = c("AMC", "AMC + CIP", "AMC + GEN"),
|
||||
syndromic_group = gsub("UTI", "UCI", data$syndrome),
|
||||
language = "Spanish"
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user