(v3.0.0.9012) Python wrapper fix

(v3.0.0.9011) allow names for age_groups()
2025-12-18 14:20:29 +01:00 · 2025-07-17 19:43:07 +02:00 · 2025-07-17 19:32:46 +02:00
9 changed files with 144 additions and 118 deletions
--- a/2
+++ b/2
@@ -1,5 +1,5 @@
 Package: AMR
-Version: 3.0.0.9010
+Version: 3.0.0.9012
 Date: 2025-07-17
 Title: Antimicrobial Resistance Data Analysis
 Description: Functions to simplify and standardise antimicrobial resistance (AMR)
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,4 @@
-# AMR 3.0.0.9010
+# AMR 3.0.0.9012

 This is primarily a bugfix release, though we added one nice feature too.

@@ -17,6 +17,7 @@ This is primarily a bugfix release, though we added one nice feature too.
 * Fixed a bug in `ggplot_sir()` when using `combine_SI = FALSE` (#213)
 * Fixed all plotting to contain a separate colour for SDD (susceptible dose-dependent) (#223)
 * Fixed some specific Dutch translations for antimicrobials
+* Added `names` to `age_groups()` so that custom names can be given (#215)
 * Added note to `as.sir()` to make it explicit when higher-level taxonomic breakpoints are used (#218)
 * Updated `random_mic()` and `random_disk()` to set skewedness of the distribution and allow multiple microorganisms

--- a/R/aa_helper_functions.R
+++ b/R/aa_helper_functions.R
@@ -519,7 +519,7 @@ word_wrap <- function(...,
    )
    msg <- paste0(parts, collapse = "`")
  }
-  msg <- gsub("`(.+?)`", font_grey_bg("\\1"), msg)
+  msg <- gsub("`(.+?)`", font_grey_bg("`\\1`"), msg)

  # clean introduced whitespace in between fullstops
  msg <- gsub("[.] +[.]", "..", msg)
--- a/R/age.R
+++ b/R/age.R
@@ -128,9 +128,10 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE, ...) {

 #' Split Ages into Age Groups
 #'
-#' Split ages into age groups defined by the `split` argument. This allows for easier demographic (antimicrobial resistance) analysis.
+#' Split ages into age groups defined by the `split` argument. This allows for easier demographic (antimicrobial resistance) analysis. The function returns an ordered [factor].
 #' @param x Age, e.g. calculated with [age()].
 #' @param split_at Values to split `x` at - the default is age groups 0-11, 12-24, 25-54, 55-74 and 75+. See *Details*.
+#' @param names Optional names to be given to the various age groups.
 #' @param na.rm A [logical] to indicate whether missing values should be removed.
 #' @details To split ages, the input for the `split_at` argument can be:
 #'
@@ -152,6 +153,7 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE, ...) {
 #'
 #' # split into 0-19, 20-49 and 50+
 #' age_groups(ages, c(20, 50))
+#' age_groups(ages, c(20, 50), names = c("Under 20 years", "20 to 50 years", "Over 50 years"))
 #'
 #' # split into groups of ten years
 #' age_groups(ages, 1:10 * 10)
@@ -181,9 +183,10 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE, ...) {
 #'     )
 #' }
 #' }
-age_groups <- function(x, split_at = c(12, 25, 55, 75), na.rm = FALSE) {
+age_groups <- function(x, split_at = c(0, 12, 25, 55, 75), names = NULL, na.rm = FALSE) {
  meet_criteria(x, allow_class = c("numeric", "integer"), is_positive_or_zero = TRUE, is_finite = TRUE)
  meet_criteria(split_at, allow_class = c("numeric", "integer", "character"), is_positive_or_zero = TRUE, is_finite = TRUE)
+  meet_criteria(names, allow_class = "character", allow_NULL = TRUE)
  meet_criteria(na.rm, allow_class = "logical", has_length = 1)

  if (any(x < 0, na.rm = TRUE)) {
@@ -224,6 +227,11 @@ age_groups <- function(x, split_at = c(12, 25, 55, 75), na.rm = FALSE) {

  agegroups <- factor(lbls[y], levels = lbls, ordered = TRUE)

+  if (!is.null(names)) {
+    stop_ifnot(length(names) == length(levels(agegroups)), "`names` must have the same length as the number of age groups (", length(levels(agegroups)), ").")
+    levels(agegroups) <- names
+  }
+
  if (isTRUE(na.rm)) {
    agegroups <- agegroups[!is.na(agegroups)]
  }
--- a/R/ggplot_sir.R
+++ b/R/ggplot_sir.R
@@ -206,7 +206,7 @@ ggplot_sir <- function(data,
  meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE)
  language <- validate_language(language)
  meet_criteria(nrow, allow_class = c("numeric", "integer"), has_length = 1, allow_NULL = TRUE, is_positive = TRUE, is_finite = TRUE)
-  meet_criteria(colours, allow_class = c("character", "logical"))
+  meet_criteria(colours, allow_class = c("character", "logical"), allow_NULL = TRUE)
  meet_criteria(datalabels, allow_class = "logical", has_length = 1)
  meet_criteria(datalabels.size, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE)
  meet_criteria(datalabels.colour, allow_class = "character", has_length = 1)
@@ -246,7 +246,7 @@ ggplot_sir <- function(data,
    ) +
    theme_sir()

-  if (fill == "interpretation") {
+  if (fill == "interpretation" && !is.null(colours) && !isFALSE(colours)) {
    p <- suppressWarnings(p + scale_sir_colours(aesthetics = "fill", colours = colours))
  }

--- a/R/tidymodels.R
+++ b/R/tidymodels.R
@@ -19,56 +19,59 @@
 #' @keywords internal
 #' @export
 #' @examples
-#' library(tidymodels)
+#' if (require("tidymodels")) {
 #'
-#' # The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703
-#' # Presence of ESBL genes was predicted based on raw MIC values.
+#'   # The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703
+#'   # Presence of ESBL genes was predicted based on raw MIC values.
 #'
 #'
-#' # example data set in the AMR package
-#' esbl_isolates
+#'   # example data set in the AMR package
+#'   esbl_isolates
 #'
-#' # Prepare a binary outcome and convert to ordered factor
-#' data <- esbl_isolates %>%
-#'   mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE))
+#'   # Prepare a binary outcome and convert to ordered factor
+#'   data <- esbl_isolates %>%
+#'     mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE))
 #'
-#' # Split into training and testing sets
-#' split <- initial_split(data)
-#' training_data <- training(split)
-#' testing_data <- testing(split)
+#'   # Split into training and testing sets
+#'   split <- initial_split(data)
+#'   training_data <- training(split)
+#'   testing_data <- testing(split)
 #'
-#' # Create and prep a recipe with MIC log2 transformation
-#' mic_recipe <- recipe(esbl ~ ., data = training_data) %>%
-#'   # Optionally remove non-predictive variables
-#'   remove_role(genus, old_role = "predictor") %>%
-#'   # Apply the log2 transformation to all MIC predictors
-#'   step_mic_log2(all_mic_predictors()) %>%
-#'   prep()
+#'   # Create and prep a recipe with MIC log2 transformation
+#'   mic_recipe <- recipe(esbl ~ ., data = training_data) %>%
 #'
-#' # View prepped recipe
-#' mic_recipe
+#'     # Optionally remove non-predictive variables
+#'     remove_role(genus, old_role = "predictor") %>%
 #'
-#' # Apply the recipe to training and testing data
-#' out_training <- bake(mic_recipe, new_data = NULL)
-#' out_testing <- bake(mic_recipe, new_data = testing_data)
+#'     # Apply the log2 transformation to all MIC predictors
+#'     step_mic_log2(all_mic_predictors()) %>%
 #'
-#' # Fit a logistic regression model
-#' fitted <- logistic_reg(mode = "classification") %>%
-#'   set_engine("glm") %>%
-#'   fit(esbl ~ ., data = out_training)
+#'     # And apply the preparation steps
+#'     prep()
 #'
-#' # Generate predictions on the test set
-#' predictions <- predict(fitted, out_testing) %>%
-#'   bind_cols(out_testing)
+#'   # View prepped recipe
+#'   mic_recipe
 #'
-#' # Evaluate predictions using standard classification metrics
-#' our_metrics <- metric_set(accuracy, kap, ppv, npv)
-#' metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class)
+#'   # Apply the recipe to training and testing data
+#'   out_training <- bake(mic_recipe, new_data = NULL)
+#'   out_testing <- bake(mic_recipe, new_data = testing_data)
 #'
-#' # Show performance:
-#' # - negative predictive value (NPV) of ~98%
-#' # - positive predictive value (PPV) of ~94%
-#' metrics
+#'   # Fit a logistic regression model
+#'   fitted <- logistic_reg(mode = "classification") %>%
+#'     set_engine("glm") %>%
+#'     fit(esbl ~ ., data = out_training)
+#'
+#'   # Generate predictions on the test set
+#'   predictions <- predict(fitted, out_testing) %>%
+#'     bind_cols(out_testing)
+#'
+#'   # Evaluate predictions using standard classification metrics
+#'   our_metrics <- metric_set(accuracy, kap, ppv, npv)
+#'   metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class)
+#'
+#'   # Show performance
+#'   metrics
+#' }
 all_mic <- function() {
  x <- tidymodels_amr_select(levels(NA_mic_))
  names(x)
--- a/data-raw/_generate_python_wrapper.sh
+++ b/data-raw/_generate_python_wrapper.sh
@@ -56,7 +56,8 @@ os.makedirs(r_lib_path, exist_ok=True)
 os.environ['R_LIBS_SITE'] = r_lib_path

 from rpy2 import robjects
-from rpy2.robjects import pandas2ri
+from rpy2.robjects.conversion import localconverter
+from rpy2.robjects import default_converter, numpy2ri, pandas2ri
 from rpy2.robjects.packages import importr, isinstalled

 # Import base and utils
@@ -94,27 +95,26 @@ if r_amr_version != python_amr_version:
 print(f"AMR: Setting up R environment and AMR datasets...", flush=True)

 # Activate the automatic conversion between R and pandas DataFrames
-pandas2ri.activate()
+with localconverter(default_converter + numpy2ri.converter + pandas2ri.converter):
+    # example_isolates
+    example_isolates = robjects.r('''
+    df <- AMR::example_isolates
+    df[] <- lapply(df, function(x) {
+        if (inherits(x, c("Date", "POSIXt", "factor"))) {
+            as.character(x)
+        } else {
+            x
+        }
+    })
+    df <- df[, !sapply(df, is.list)]
+    df
+    ''')
+    example_isolates['date'] = pd.to_datetime(example_isolates['date'])

-# example_isolates
-example_isolates = pandas2ri.rpy2py(robjects.r('''
-df <- AMR::example_isolates
-df[] <- lapply(df, function(x) {
-    if (inherits(x, c("Date", "POSIXt", "factor"))) {
-        as.character(x)
-    } else {
-        x
-    }
-})
-df <- df[, !sapply(df, is.list)]
-df
-'''))
-example_isolates['date'] = pd.to_datetime(example_isolates['date'])
-
-# microorganisms
-microorganisms = pandas2ri.rpy2py(robjects.r('AMR::microorganisms[, !sapply(AMR::microorganisms, is.list)]'))
-antimicrobials = pandas2ri.rpy2py(robjects.r('AMR::antimicrobials[, !sapply(AMR::antimicrobials, is.list)]'))
-clinical_breakpoints = pandas2ri.rpy2py(robjects.r('AMR::clinical_breakpoints[, !sapply(AMR::clinical_breakpoints, is.list)]'))
+    # microorganisms
+    microorganisms = robjects.r('AMR::microorganisms[, !sapply(AMR::microorganisms, is.list)]')
+    antimicrobials = robjects.r('AMR::antimicrobials[, !sapply(AMR::antimicrobials, is.list)]')
+    clinical_breakpoints = robjects.r('AMR::clinical_breakpoints[, !sapply(AMR::clinical_breakpoints, is.list)]')

 base.options(warn = 0)

@@ -129,16 +129,15 @@ echo "from .datasets import clinical_breakpoints" >> $init_file

 # Write header to the functions Python file, including the convert_to_python function
 cat <<EOL > "$functions_file"
+import functools
 import rpy2.robjects as robjects
 from rpy2.robjects.packages import importr
 from rpy2.robjects.vectors import StrVector, FactorVector, IntVector, FloatVector, DataFrame
-from rpy2.robjects import pandas2ri
+from rpy2.robjects.conversion import localconverter
+from rpy2.robjects import default_converter, numpy2ri, pandas2ri
 import pandas as pd
 import numpy as np

-# Activate automatic conversion between R data frames and pandas data frames
-pandas2ri.activate()
-
 # Import the AMR R package
 amr_r = importr('AMR')

@@ -156,10 +155,8 @@ def convert_to_python(r_output):
        return list(r_output)  # Convert to a Python list of integers or floats
    
    # Check if it's a pandas-compatible R data frame
-    elif isinstance(r_output, pd.DataFrame):
+    elif isinstance(r_output, (pd.DataFrame, DataFrame)):
        return r_output  # Return as pandas DataFrame (already converted by pandas2ri)
-    elif isinstance(r_output, DataFrame):
-        return pandas2ri.rpy2py(r_output) # Return as pandas DataFrame

    # Check if the input is a NumPy array and has a string data type
    if isinstance(r_output, np.ndarray) and np.issubdtype(r_output.dtype, np.str_):
@@ -167,6 +164,15 @@ def convert_to_python(r_output):
    
    # Fall-back
    return r_output
+
+def r_to_python(r_func):
+    """Decorator that runs an rpy2 function under a localconverter
+    and then applies convert_to_python to its output."""
+    @functools.wraps(r_func)
+    def wrapper(*args, **kwargs):
+        with localconverter(default_converter + numpy2ri.converter + pandas2ri.converter):
+            return convert_to_python(r_func(*args, **kwargs))
+    return wrapper
 EOL

 # Directory where the .Rd files are stored (update path as needed)
@@ -246,11 +252,12 @@ for rd_file in "$rd_dir"/*.Rd; do
        gsub("FALSE", "False", func_args)
        gsub("NULL", "None", func_args)

-        # Write the Python function definition to the output file
-        print "def " func_name_py "(" func_args "):" >> "'"$functions_file"'"
-        print "    \"\"\"Please see our website of the R package for the full manual: https://amr-for-r.org\"\"\"" >> "'"$functions_file"'"
-        print "    return convert_to_python(amr_r." func_name_py "(" func_args "))" >> "'"$functions_file"'"
-        
+        # Write the Python function definition to the output file, using decorator
+        print "@r_to_python" >> "'"$functions_file"'"  
+        print "def " func_name_py "(" func_args "):" >> "'"$functions_file"'"  
+        print "    \"\"\"Please see our website of the R package for the full manual: https://amr-for-r.org\"\"\"" >> "'"$functions_file"'"  
+        print "    return amr_r." func_name_py "(" func_args ")" >> "'"$functions_file"'"  
+
        print "from .functions import " func_name_py >> "'"$init_file"'"
    }
    ' "$rd_file"
--- a/man/age_groups.Rd
+++ b/man/age_groups.Rd
@@ -4,20 +4,23 @@
 \alias{age_groups}
 \title{Split Ages into Age Groups}
 \usage{
-age_groups(x, split_at = c(12, 25, 55, 75), na.rm = FALSE)
+age_groups(x, split_at = c(0, 12, 25, 55, 75), names = NULL,
+  na.rm = FALSE)
 }
 \arguments{
 \item{x}{Age, e.g. calculated with \code{\link[=age]{age()}}.}

 \item{split_at}{Values to split \code{x} at - the default is age groups 0-11, 12-24, 25-54, 55-74 and 75+. See \emph{Details}.}

+\item{names}{Optional names to be given to the various age groups.}
+
 \item{na.rm}{A \link{logical} to indicate whether missing values should be removed.}
 }
 \value{
 Ordered \link{factor}
 }
 \description{
-Split ages into age groups defined by the \code{split} argument. This allows for easier demographic (antimicrobial resistance) analysis.
+Split ages into age groups defined by the \code{split} argument. This allows for easier demographic (antimicrobial resistance) analysis. The function returns an ordered \link{factor}.
 }
 \details{
 To split ages, the input for the \code{split_at} argument can be:
@@ -41,6 +44,7 @@ age_groups(ages, 50)

 # split into 0-19, 20-49 and 50+
 age_groups(ages, c(20, 50))
+age_groups(ages, c(20, 50), names = c("Under 20 years", "20 to 50 years", "Over 50 years"))

 # split into groups of ten years
 age_groups(ages, 1:10 * 10)
--- a/man/amr-tidymodels.Rd
+++ b/man/amr-tidymodels.Rd
@@ -65,56 +65,59 @@ Pre-processing pipeline steps include:
 These steps integrate with \code{recipes::recipe()} and work like standard preprocessing steps. They are useful for preparing data for modelling, especially with classification models.
 }
 \examples{
-library(tidymodels)
+if (require("tidymodels")) {

-# The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703
-# Presence of ESBL genes was predicted based on raw MIC values.
+  # The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703
+  # Presence of ESBL genes was predicted based on raw MIC values.


-# example data set in the AMR package
-esbl_isolates
+  # example data set in the AMR package
+  esbl_isolates

-# Prepare a binary outcome and convert to ordered factor
-data <- esbl_isolates \%>\%
-  mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE))
+  # Prepare a binary outcome and convert to ordered factor
+  data <- esbl_isolates \%>\%
+    mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE))

-# Split into training and testing sets
-split <- initial_split(data)
-training_data <- training(split)
-testing_data <- testing(split)
+  # Split into training and testing sets
+  split <- initial_split(data)
+  training_data <- training(split)
+  testing_data <- testing(split)

-# Create and prep a recipe with MIC log2 transformation
-mic_recipe <- recipe(esbl ~ ., data = training_data) \%>\%
-  # Optionally remove non-predictive variables
-  remove_role(genus, old_role = "predictor") \%>\%
-  # Apply the log2 transformation to all MIC predictors
-  step_mic_log2(all_mic_predictors()) \%>\%
-  prep()
+  # Create and prep a recipe with MIC log2 transformation
+  mic_recipe <- recipe(esbl ~ ., data = training_data) \%>\%

-# View prepped recipe
-mic_recipe
+    # Optionally remove non-predictive variables
+    remove_role(genus, old_role = "predictor") \%>\%

-# Apply the recipe to training and testing data
-out_training <- bake(mic_recipe, new_data = NULL)
-out_testing <- bake(mic_recipe, new_data = testing_data)
+    # Apply the log2 transformation to all MIC predictors
+    step_mic_log2(all_mic_predictors()) \%>\%

-# Fit a logistic regression model
-fitted <- logistic_reg(mode = "classification") \%>\%
-  set_engine("glm") \%>\%
-  fit(esbl ~ ., data = out_training)
+    # And apply the preparation steps
+    prep()

-# Generate predictions on the test set
-predictions <- predict(fitted, out_testing) \%>\%
-  bind_cols(out_testing)
+  # View prepped recipe
+  mic_recipe

-# Evaluate predictions using standard classification metrics
-our_metrics <- metric_set(accuracy, kap, ppv, npv)
-metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class)
+  # Apply the recipe to training and testing data
+  out_training <- bake(mic_recipe, new_data = NULL)
+  out_testing <- bake(mic_recipe, new_data = testing_data)

-# Show performance:
-# - negative predictive value (NPV) of ~98\%
-# - positive predictive value (PPV) of ~94\%
-metrics
+  # Fit a logistic regression model
+  fitted <- logistic_reg(mode = "classification") \%>\%
+    set_engine("glm") \%>\%
+    fit(esbl ~ ., data = out_training)
+
+  # Generate predictions on the test set
+  predictions <- predict(fitted, out_testing) \%>\%
+    bind_cols(out_testing)
+
+  # Evaluate predictions using standard classification metrics
+  our_metrics <- metric_set(accuracy, kap, ppv, npv)
+  metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class)
+
+  # Show performance
+  metrics
+}
 }
 \seealso{
 \code{\link[recipes:recipe]{recipes::recipe()}}, \code{\link[=as.mic]{as.mic()}}, \code{\link[=as.sir]{as.sir()}}
Author	SHA1	Message	Date
Nick Thomson	68442f3042	(v3.0.0.9012) Python wrapper fix	2025-07-17 19:43:07 +02:00
dr. M.S. (Matthijs) Berends	39ea5f6597	(v3.0.0.9011) allow names for `age_groups()`	2025-07-17 19:32:46 +02:00