1
0
mirror of https://github.com/msberends/AMR.git synced 2025-12-20 00:20:34 +01:00

2 Commits

Author SHA1 Message Date
Nick Thomson
68442f3042 (v3.0.0.9012) Python wrapper fix 2025-07-17 19:43:07 +02:00
39ea5f6597 (v3.0.0.9011) allow names for age_groups() 2025-07-17 19:32:46 +02:00
9 changed files with 144 additions and 118 deletions

View File

@@ -1,5 +1,5 @@
Package: AMR Package: AMR
Version: 3.0.0.9010 Version: 3.0.0.9012
Date: 2025-07-17 Date: 2025-07-17
Title: Antimicrobial Resistance Data Analysis Title: Antimicrobial Resistance Data Analysis
Description: Functions to simplify and standardise antimicrobial resistance (AMR) Description: Functions to simplify and standardise antimicrobial resistance (AMR)

View File

@@ -1,4 +1,4 @@
# AMR 3.0.0.9010 # AMR 3.0.0.9012
This is primarily a bugfix release, though we added one nice feature too. This is primarily a bugfix release, though we added one nice feature too.
@@ -17,6 +17,7 @@ This is primarily a bugfix release, though we added one nice feature too.
* Fixed a bug in `ggplot_sir()` when using `combine_SI = FALSE` (#213) * Fixed a bug in `ggplot_sir()` when using `combine_SI = FALSE` (#213)
* Fixed all plotting to contain a separate colour for SDD (susceptible dose-dependent) (#223) * Fixed all plotting to contain a separate colour for SDD (susceptible dose-dependent) (#223)
* Fixed some specific Dutch translations for antimicrobials * Fixed some specific Dutch translations for antimicrobials
* Added `names` to `age_groups()` so that custom names can be given (#215)
* Added note to `as.sir()` to make it explicit when higher-level taxonomic breakpoints are used (#218) * Added note to `as.sir()` to make it explicit when higher-level taxonomic breakpoints are used (#218)
* Updated `random_mic()` and `random_disk()` to set skewedness of the distribution and allow multiple microorganisms * Updated `random_mic()` and `random_disk()` to set skewedness of the distribution and allow multiple microorganisms

View File

@@ -519,7 +519,7 @@ word_wrap <- function(...,
) )
msg <- paste0(parts, collapse = "`") msg <- paste0(parts, collapse = "`")
} }
msg <- gsub("`(.+?)`", font_grey_bg("\\1"), msg) msg <- gsub("`(.+?)`", font_grey_bg("`\\1`"), msg)
# clean introduced whitespace in between fullstops # clean introduced whitespace in between fullstops
msg <- gsub("[.] +[.]", "..", msg) msg <- gsub("[.] +[.]", "..", msg)

12
R/age.R
View File

@@ -128,9 +128,10 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE, ...) {
#' Split Ages into Age Groups #' Split Ages into Age Groups
#' #'
#' Split ages into age groups defined by the `split` argument. This allows for easier demographic (antimicrobial resistance) analysis. #' Split ages into age groups defined by the `split` argument. This allows for easier demographic (antimicrobial resistance) analysis. The function returns an ordered [factor].
#' @param x Age, e.g. calculated with [age()]. #' @param x Age, e.g. calculated with [age()].
#' @param split_at Values to split `x` at - the default is age groups 0-11, 12-24, 25-54, 55-74 and 75+. See *Details*. #' @param split_at Values to split `x` at - the default is age groups 0-11, 12-24, 25-54, 55-74 and 75+. See *Details*.
#' @param names Optional names to be given to the various age groups.
#' @param na.rm A [logical] to indicate whether missing values should be removed. #' @param na.rm A [logical] to indicate whether missing values should be removed.
#' @details To split ages, the input for the `split_at` argument can be: #' @details To split ages, the input for the `split_at` argument can be:
#' #'
@@ -152,6 +153,7 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE, ...) {
#' #'
#' # split into 0-19, 20-49 and 50+ #' # split into 0-19, 20-49 and 50+
#' age_groups(ages, c(20, 50)) #' age_groups(ages, c(20, 50))
#' age_groups(ages, c(20, 50), names = c("Under 20 years", "20 to 50 years", "Over 50 years"))
#' #'
#' # split into groups of ten years #' # split into groups of ten years
#' age_groups(ages, 1:10 * 10) #' age_groups(ages, 1:10 * 10)
@@ -181,9 +183,10 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE, ...) {
#' ) #' )
#' } #' }
#' } #' }
age_groups <- function(x, split_at = c(12, 25, 55, 75), na.rm = FALSE) { age_groups <- function(x, split_at = c(0, 12, 25, 55, 75), names = NULL, na.rm = FALSE) {
meet_criteria(x, allow_class = c("numeric", "integer"), is_positive_or_zero = TRUE, is_finite = TRUE) meet_criteria(x, allow_class = c("numeric", "integer"), is_positive_or_zero = TRUE, is_finite = TRUE)
meet_criteria(split_at, allow_class = c("numeric", "integer", "character"), is_positive_or_zero = TRUE, is_finite = TRUE) meet_criteria(split_at, allow_class = c("numeric", "integer", "character"), is_positive_or_zero = TRUE, is_finite = TRUE)
meet_criteria(names, allow_class = "character", allow_NULL = TRUE)
meet_criteria(na.rm, allow_class = "logical", has_length = 1) meet_criteria(na.rm, allow_class = "logical", has_length = 1)
if (any(x < 0, na.rm = TRUE)) { if (any(x < 0, na.rm = TRUE)) {
@@ -224,6 +227,11 @@ age_groups <- function(x, split_at = c(12, 25, 55, 75), na.rm = FALSE) {
agegroups <- factor(lbls[y], levels = lbls, ordered = TRUE) agegroups <- factor(lbls[y], levels = lbls, ordered = TRUE)
if (!is.null(names)) {
stop_ifnot(length(names) == length(levels(agegroups)), "`names` must have the same length as the number of age groups (", length(levels(agegroups)), ").")
levels(agegroups) <- names
}
if (isTRUE(na.rm)) { if (isTRUE(na.rm)) {
agegroups <- agegroups[!is.na(agegroups)] agegroups <- agegroups[!is.na(agegroups)]
} }

View File

@@ -206,7 +206,7 @@ ggplot_sir <- function(data,
meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE) meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE)
language <- validate_language(language) language <- validate_language(language)
meet_criteria(nrow, allow_class = c("numeric", "integer"), has_length = 1, allow_NULL = TRUE, is_positive = TRUE, is_finite = TRUE) meet_criteria(nrow, allow_class = c("numeric", "integer"), has_length = 1, allow_NULL = TRUE, is_positive = TRUE, is_finite = TRUE)
meet_criteria(colours, allow_class = c("character", "logical")) meet_criteria(colours, allow_class = c("character", "logical"), allow_NULL = TRUE)
meet_criteria(datalabels, allow_class = "logical", has_length = 1) meet_criteria(datalabels, allow_class = "logical", has_length = 1)
meet_criteria(datalabels.size, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE) meet_criteria(datalabels.size, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE)
meet_criteria(datalabels.colour, allow_class = "character", has_length = 1) meet_criteria(datalabels.colour, allow_class = "character", has_length = 1)
@@ -246,7 +246,7 @@ ggplot_sir <- function(data,
) + ) +
theme_sir() theme_sir()
if (fill == "interpretation") { if (fill == "interpretation" && !is.null(colours) && !isFALSE(colours)) {
p <- suppressWarnings(p + scale_sir_colours(aesthetics = "fill", colours = colours)) p <- suppressWarnings(p + scale_sir_colours(aesthetics = "fill", colours = colours))
} }

View File

@@ -19,56 +19,59 @@
#' @keywords internal #' @keywords internal
#' @export #' @export
#' @examples #' @examples
#' library(tidymodels) #' if (require("tidymodels")) {
#' #'
#' # The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703 #' # The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703
#' # Presence of ESBL genes was predicted based on raw MIC values. #' # Presence of ESBL genes was predicted based on raw MIC values.
#' #'
#' #'
#' # example data set in the AMR package #' # example data set in the AMR package
#' esbl_isolates #' esbl_isolates
#' #'
#' # Prepare a binary outcome and convert to ordered factor #' # Prepare a binary outcome and convert to ordered factor
#' data <- esbl_isolates %>% #' data <- esbl_isolates %>%
#' mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE)) #' mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE))
#' #'
#' # Split into training and testing sets #' # Split into training and testing sets
#' split <- initial_split(data) #' split <- initial_split(data)
#' training_data <- training(split) #' training_data <- training(split)
#' testing_data <- testing(split) #' testing_data <- testing(split)
#' #'
#' # Create and prep a recipe with MIC log2 transformation #' # Create and prep a recipe with MIC log2 transformation
#' mic_recipe <- recipe(esbl ~ ., data = training_data) %>% #' mic_recipe <- recipe(esbl ~ ., data = training_data) %>%
#' # Optionally remove non-predictive variables
#' remove_role(genus, old_role = "predictor") %>%
#' # Apply the log2 transformation to all MIC predictors
#' step_mic_log2(all_mic_predictors()) %>%
#' prep()
#' #'
#' # View prepped recipe #' # Optionally remove non-predictive variables
#' mic_recipe #' remove_role(genus, old_role = "predictor") %>%
#' #'
#' # Apply the recipe to training and testing data #' # Apply the log2 transformation to all MIC predictors
#' out_training <- bake(mic_recipe, new_data = NULL) #' step_mic_log2(all_mic_predictors()) %>%
#' out_testing <- bake(mic_recipe, new_data = testing_data)
#' #'
#' # Fit a logistic regression model #' # And apply the preparation steps
#' fitted <- logistic_reg(mode = "classification") %>% #' prep()
#' set_engine("glm") %>%
#' fit(esbl ~ ., data = out_training)
#' #'
#' # Generate predictions on the test set #' # View prepped recipe
#' predictions <- predict(fitted, out_testing) %>% #' mic_recipe
#' bind_cols(out_testing)
#' #'
#' # Evaluate predictions using standard classification metrics #' # Apply the recipe to training and testing data
#' our_metrics <- metric_set(accuracy, kap, ppv, npv) #' out_training <- bake(mic_recipe, new_data = NULL)
#' metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class) #' out_testing <- bake(mic_recipe, new_data = testing_data)
#' #'
#' # Show performance: #' # Fit a logistic regression model
#' # - negative predictive value (NPV) of ~98% #' fitted <- logistic_reg(mode = "classification") %>%
#' # - positive predictive value (PPV) of ~94% #' set_engine("glm") %>%
#' metrics #' fit(esbl ~ ., data = out_training)
#'
#' # Generate predictions on the test set
#' predictions <- predict(fitted, out_testing) %>%
#' bind_cols(out_testing)
#'
#' # Evaluate predictions using standard classification metrics
#' our_metrics <- metric_set(accuracy, kap, ppv, npv)
#' metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class)
#'
#' # Show performance
#' metrics
#' }
all_mic <- function() { all_mic <- function() {
x <- tidymodels_amr_select(levels(NA_mic_)) x <- tidymodels_amr_select(levels(NA_mic_))
names(x) names(x)

View File

@@ -56,7 +56,8 @@ os.makedirs(r_lib_path, exist_ok=True)
os.environ['R_LIBS_SITE'] = r_lib_path os.environ['R_LIBS_SITE'] = r_lib_path
from rpy2 import robjects from rpy2 import robjects
from rpy2.robjects import pandas2ri from rpy2.robjects.conversion import localconverter
from rpy2.robjects import default_converter, numpy2ri, pandas2ri
from rpy2.robjects.packages import importr, isinstalled from rpy2.robjects.packages import importr, isinstalled
# Import base and utils # Import base and utils
@@ -94,27 +95,26 @@ if r_amr_version != python_amr_version:
print(f"AMR: Setting up R environment and AMR datasets...", flush=True) print(f"AMR: Setting up R environment and AMR datasets...", flush=True)
# Activate the automatic conversion between R and pandas DataFrames # Activate the automatic conversion between R and pandas DataFrames
pandas2ri.activate() with localconverter(default_converter + numpy2ri.converter + pandas2ri.converter):
# example_isolates
example_isolates = robjects.r('''
df <- AMR::example_isolates
df[] <- lapply(df, function(x) {
if (inherits(x, c("Date", "POSIXt", "factor"))) {
as.character(x)
} else {
x
}
})
df <- df[, !sapply(df, is.list)]
df
''')
example_isolates['date'] = pd.to_datetime(example_isolates['date'])
# example_isolates # microorganisms
example_isolates = pandas2ri.rpy2py(robjects.r(''' microorganisms = robjects.r('AMR::microorganisms[, !sapply(AMR::microorganisms, is.list)]')
df <- AMR::example_isolates antimicrobials = robjects.r('AMR::antimicrobials[, !sapply(AMR::antimicrobials, is.list)]')
df[] <- lapply(df, function(x) { clinical_breakpoints = robjects.r('AMR::clinical_breakpoints[, !sapply(AMR::clinical_breakpoints, is.list)]')
if (inherits(x, c("Date", "POSIXt", "factor"))) {
as.character(x)
} else {
x
}
})
df <- df[, !sapply(df, is.list)]
df
'''))
example_isolates['date'] = pd.to_datetime(example_isolates['date'])
# microorganisms
microorganisms = pandas2ri.rpy2py(robjects.r('AMR::microorganisms[, !sapply(AMR::microorganisms, is.list)]'))
antimicrobials = pandas2ri.rpy2py(robjects.r('AMR::antimicrobials[, !sapply(AMR::antimicrobials, is.list)]'))
clinical_breakpoints = pandas2ri.rpy2py(robjects.r('AMR::clinical_breakpoints[, !sapply(AMR::clinical_breakpoints, is.list)]'))
base.options(warn = 0) base.options(warn = 0)
@@ -129,16 +129,15 @@ echo "from .datasets import clinical_breakpoints" >> $init_file
# Write header to the functions Python file, including the convert_to_python function # Write header to the functions Python file, including the convert_to_python function
cat <<EOL > "$functions_file" cat <<EOL > "$functions_file"
import functools
import rpy2.robjects as robjects import rpy2.robjects as robjects
from rpy2.robjects.packages import importr from rpy2.robjects.packages import importr
from rpy2.robjects.vectors import StrVector, FactorVector, IntVector, FloatVector, DataFrame from rpy2.robjects.vectors import StrVector, FactorVector, IntVector, FloatVector, DataFrame
from rpy2.robjects import pandas2ri from rpy2.robjects.conversion import localconverter
from rpy2.robjects import default_converter, numpy2ri, pandas2ri
import pandas as pd import pandas as pd
import numpy as np import numpy as np
# Activate automatic conversion between R data frames and pandas data frames
pandas2ri.activate()
# Import the AMR R package # Import the AMR R package
amr_r = importr('AMR') amr_r = importr('AMR')
@@ -156,10 +155,8 @@ def convert_to_python(r_output):
return list(r_output) # Convert to a Python list of integers or floats return list(r_output) # Convert to a Python list of integers or floats
# Check if it's a pandas-compatible R data frame # Check if it's a pandas-compatible R data frame
elif isinstance(r_output, pd.DataFrame): elif isinstance(r_output, (pd.DataFrame, DataFrame)):
return r_output # Return as pandas DataFrame (already converted by pandas2ri) return r_output # Return as pandas DataFrame (already converted by pandas2ri)
elif isinstance(r_output, DataFrame):
return pandas2ri.rpy2py(r_output) # Return as pandas DataFrame
# Check if the input is a NumPy array and has a string data type # Check if the input is a NumPy array and has a string data type
if isinstance(r_output, np.ndarray) and np.issubdtype(r_output.dtype, np.str_): if isinstance(r_output, np.ndarray) and np.issubdtype(r_output.dtype, np.str_):
@@ -167,6 +164,15 @@ def convert_to_python(r_output):
# Fall-back # Fall-back
return r_output return r_output
def r_to_python(r_func):
"""Decorator that runs an rpy2 function under a localconverter
and then applies convert_to_python to its output."""
@functools.wraps(r_func)
def wrapper(*args, **kwargs):
with localconverter(default_converter + numpy2ri.converter + pandas2ri.converter):
return convert_to_python(r_func(*args, **kwargs))
return wrapper
EOL EOL
# Directory where the .Rd files are stored (update path as needed) # Directory where the .Rd files are stored (update path as needed)
@@ -246,11 +252,12 @@ for rd_file in "$rd_dir"/*.Rd; do
gsub("FALSE", "False", func_args) gsub("FALSE", "False", func_args)
gsub("NULL", "None", func_args) gsub("NULL", "None", func_args)
# Write the Python function definition to the output file # Write the Python function definition to the output file, using decorator
print "def " func_name_py "(" func_args "):" >> "'"$functions_file"'" print "@r_to_python" >> "'"$functions_file"'"
print " \"\"\"Please see our website of the R package for the full manual: https://amr-for-r.org\"\"\"" >> "'"$functions_file"'" print "def " func_name_py "(" func_args "):" >> "'"$functions_file"'"
print " return convert_to_python(amr_r." func_name_py "(" func_args "))" >> "'"$functions_file"'" print " \"\"\"Please see our website of the R package for the full manual: https://amr-for-r.org\"\"\"" >> "'"$functions_file"'"
print " return amr_r." func_name_py "(" func_args ")" >> "'"$functions_file"'"
print "from .functions import " func_name_py >> "'"$init_file"'" print "from .functions import " func_name_py >> "'"$init_file"'"
} }
' "$rd_file" ' "$rd_file"

View File

@@ -4,20 +4,23 @@
\alias{age_groups} \alias{age_groups}
\title{Split Ages into Age Groups} \title{Split Ages into Age Groups}
\usage{ \usage{
age_groups(x, split_at = c(12, 25, 55, 75), na.rm = FALSE) age_groups(x, split_at = c(0, 12, 25, 55, 75), names = NULL,
na.rm = FALSE)
} }
\arguments{ \arguments{
\item{x}{Age, e.g. calculated with \code{\link[=age]{age()}}.} \item{x}{Age, e.g. calculated with \code{\link[=age]{age()}}.}
\item{split_at}{Values to split \code{x} at - the default is age groups 0-11, 12-24, 25-54, 55-74 and 75+. See \emph{Details}.} \item{split_at}{Values to split \code{x} at - the default is age groups 0-11, 12-24, 25-54, 55-74 and 75+. See \emph{Details}.}
\item{names}{Optional names to be given to the various age groups.}
\item{na.rm}{A \link{logical} to indicate whether missing values should be removed.} \item{na.rm}{A \link{logical} to indicate whether missing values should be removed.}
} }
\value{ \value{
Ordered \link{factor} Ordered \link{factor}
} }
\description{ \description{
Split ages into age groups defined by the \code{split} argument. This allows for easier demographic (antimicrobial resistance) analysis. Split ages into age groups defined by the \code{split} argument. This allows for easier demographic (antimicrobial resistance) analysis. The function returns an ordered \link{factor}.
} }
\details{ \details{
To split ages, the input for the \code{split_at} argument can be: To split ages, the input for the \code{split_at} argument can be:
@@ -41,6 +44,7 @@ age_groups(ages, 50)
# split into 0-19, 20-49 and 50+ # split into 0-19, 20-49 and 50+
age_groups(ages, c(20, 50)) age_groups(ages, c(20, 50))
age_groups(ages, c(20, 50), names = c("Under 20 years", "20 to 50 years", "Over 50 years"))
# split into groups of ten years # split into groups of ten years
age_groups(ages, 1:10 * 10) age_groups(ages, 1:10 * 10)

View File

@@ -65,56 +65,59 @@ Pre-processing pipeline steps include:
These steps integrate with \code{recipes::recipe()} and work like standard preprocessing steps. They are useful for preparing data for modelling, especially with classification models. These steps integrate with \code{recipes::recipe()} and work like standard preprocessing steps. They are useful for preparing data for modelling, especially with classification models.
} }
\examples{ \examples{
library(tidymodels) if (require("tidymodels")) {
# The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703 # The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703
# Presence of ESBL genes was predicted based on raw MIC values. # Presence of ESBL genes was predicted based on raw MIC values.
# example data set in the AMR package # example data set in the AMR package
esbl_isolates esbl_isolates
# Prepare a binary outcome and convert to ordered factor # Prepare a binary outcome and convert to ordered factor
data <- esbl_isolates \%>\% data <- esbl_isolates \%>\%
mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE)) mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE))
# Split into training and testing sets # Split into training and testing sets
split <- initial_split(data) split <- initial_split(data)
training_data <- training(split) training_data <- training(split)
testing_data <- testing(split) testing_data <- testing(split)
# Create and prep a recipe with MIC log2 transformation # Create and prep a recipe with MIC log2 transformation
mic_recipe <- recipe(esbl ~ ., data = training_data) \%>\% mic_recipe <- recipe(esbl ~ ., data = training_data) \%>\%
# Optionally remove non-predictive variables
remove_role(genus, old_role = "predictor") \%>\%
# Apply the log2 transformation to all MIC predictors
step_mic_log2(all_mic_predictors()) \%>\%
prep()
# View prepped recipe # Optionally remove non-predictive variables
mic_recipe remove_role(genus, old_role = "predictor") \%>\%
# Apply the recipe to training and testing data # Apply the log2 transformation to all MIC predictors
out_training <- bake(mic_recipe, new_data = NULL) step_mic_log2(all_mic_predictors()) \%>\%
out_testing <- bake(mic_recipe, new_data = testing_data)
# Fit a logistic regression model # And apply the preparation steps
fitted <- logistic_reg(mode = "classification") \%>\% prep()
set_engine("glm") \%>\%
fit(esbl ~ ., data = out_training)
# Generate predictions on the test set # View prepped recipe
predictions <- predict(fitted, out_testing) \%>\% mic_recipe
bind_cols(out_testing)
# Evaluate predictions using standard classification metrics # Apply the recipe to training and testing data
our_metrics <- metric_set(accuracy, kap, ppv, npv) out_training <- bake(mic_recipe, new_data = NULL)
metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class) out_testing <- bake(mic_recipe, new_data = testing_data)
# Show performance: # Fit a logistic regression model
# - negative predictive value (NPV) of ~98\% fitted <- logistic_reg(mode = "classification") \%>\%
# - positive predictive value (PPV) of ~94\% set_engine("glm") \%>\%
metrics fit(esbl ~ ., data = out_training)
# Generate predictions on the test set
predictions <- predict(fitted, out_testing) \%>\%
bind_cols(out_testing)
# Evaluate predictions using standard classification metrics
our_metrics <- metric_set(accuracy, kap, ppv, npv)
metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class)
# Show performance
metrics
}
} }
\seealso{ \seealso{
\code{\link[recipes:recipe]{recipes::recipe()}}, \code{\link[=as.mic]{as.mic()}}, \code{\link[=as.sir]{as.sir()}} \code{\link[recipes:recipe]{recipes::recipe()}}, \code{\link[=as.mic]{as.mic()}}, \code{\link[=as.sir]{as.sir()}}