(v3.0.0.9022) postpone new features - we like a clearly focussed bugfix release first

2025-12-16 06:30:21 +01:00 · 2025-09-03 15:39:44 +02:00
parent 0744c6feee
commit 3ba1b8a10a
14 changed files with 105 additions and 198 deletions
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -40,3 +40,4 @@
 ^CRAN-SUBMISSION$
 ^PythonPackage$
 ^README\.Rmd$
+\.no_include$
--- a/.github/workflows/check-old-tinytest.yaml
+++ b/.github/workflows/check-old-tinytest.yaml
@@ -49,13 +49,13 @@ jobs:
          # Test all old versions of R >= 3.0, we support them all!
          # For these old versions, dependencies and vignettes will not be checked.
          # For recent R versions, see check-recent.yaml (r-lib and tidyverse support the latest 5 major R releases).
-          - {os: ubuntu-latest, r: '3.6', allowfail: true}
-          # - {os: windows-latest, r: '3.5', allowfail: true} # always fails, horrible with UTF-8
-          - {os: ubuntu-latest, r: '3.4', allowfail: true}
-          - {os: ubuntu-latest, r: '3.3', allowfail: true}
-          - {os: ubuntu-latest, r: '3.2', allowfail: true}
-          - {os: ubuntu-latest, r: '3.1', allowfail: true}
-          - {os: ubuntu-latest, r: '3.0', allowfail: true}
+          - {os: ubuntu-latest, r: '3.6', allowfail: false}
+          # - {os: windows-latest, r: '3.5', allowfail: false} # always fails, horrible with UTF-8
+          # - {os: ubuntu-latest, r: '3.4', allowfail: false}  # 3.1-3.4 now always fails with Error in grep(warn_re, lines, invert = TRUE, value = TRUE) attempt to set index 46/46 in SET_STRING_ELT
+          # - {os: ubuntu-latest, r: '3.3', allowfail: false}
+          # - {os: ubuntu-latest, r: '3.2', allowfail: false}
+          # - {os: ubuntu-latest, r: '3.1', allowfail: false}
+          - {os: ubuntu-latest, r: '3.0', allowfail: false}

    env:
      R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
--- a/.github/workflows/publish-to-pypi.yml
+++ b/.github/workflows/publish-to-pypi.yml
@@ -39,7 +39,7 @@ jobs:
    runs-on: ubuntu-latest
    
    env:
-      PYPI_PAT: ${{ secrets.PYPI_PAT }}
+      GH_REPO_SCOPE: ${{ secrets.GH_REPO_SCOPE }}
    
    steps:
      - name: Checkout code
@@ -78,6 +78,7 @@ jobs:
          cd PythonPackage/AMR
          python -m twine upload --repository-url https://test.pypi.org/legacy/ dist/*

+      # TODO - Support Miniconda and Anaconda too
      # - name: Set up Miniconda
      #   continue-on-error: true
      #   uses: conda-incubator/setup-miniconda@v2
@@ -117,7 +118,7 @@ jobs:
          rm -rf PythonPackage

          git init
-          git remote add origin https://$PYPI_PAT@github.com/msberends/AMR
+          git remote add origin https://$GH_REPO_SCOPE@github.com/msberends/AMR
          git checkout --orphan python-wrapper
          git config user.name "github-actions[bot]"
          git config user.email "github-actions[bot]@users.noreply.github.com"
@@ -125,4 +126,4 @@ jobs:
          git rm -rf . || true
          git add .
          git commit -m "Python wrapper update"
-          git push https://$PYPI_PAT@github.com/msberends/AMR.git python-wrapper --force
+          git push https://$GH_REPO_SCOPE@github.com/msberends/AMR.git python-wrapper --force
--- a/.github/workflows/renew-gpt-training-data.yml
+++ b/.github/workflows/renew-gpt-training-data.yml
@@ -39,7 +39,7 @@ jobs:
    runs-on: ubuntu-latest
    
    env:
-      PYPI_PAT: ${{ secrets.PYPI_PAT }}
+      GH_REPO_SCOPE: ${{ secrets.GH_REPO_SCOPE }}
    
    steps:
      - name: Checkout code
@@ -63,4 +63,4 @@ jobs:
          git config user.email "github-actions[bot]@users.noreply.github.com"
          git add latest_training_data.txt
          git commit -m "GPT training data update"
-          git push https://$PYPI_PAT@github.com/msberends/amr-for-r-assistant.git main --force
+          git push https://$GH_REPO_SCOPE@github.com/msberends/amr-for-r-assistant.git main --force
--- a/.github/workflows/todo-tracker.yml
+++ b/.github/workflows/todo-tracker.yml
@@ -0,0 +1,75 @@
+# ==================================================================== #
+# TITLE:                                                               #
+# AMR: An R Package for Working with Antimicrobial Resistance Data     #
+#                                                                      #
+# SOURCE CODE:                                                         #
+# https://github.com/msberends/AMR                                     #
+#                                                                      #
+# PLEASE CITE THIS SOFTWARE AS:                                        #
+# Berends MS, Luz CF, Friedrich AW, et al. (2022).                     #
+# AMR: An R Package for Working with Antimicrobial Resistance Data.    #
+# Journal of Statistical Software, 104(3), 1-31.                       #
+# https://doi.org/10.18637/jss.v104.i03                                #
+#                                                                      #
+# Developed at the University of Groningen and the University Medical  #
+# Center Groningen in The Netherlands, in collaboration with many      #
+# colleagues from around the world, see our website.                   # 
+#                                                                      #
+# This R package is free software; you can freely use and distribute   #
+# it for both personal and commercial purposes under the terms of the  #
+# GNU General Public License version 2.0 (GNU GPL-2), as published by  #
+# the Free Software Foundation.                                        #
+# We created this package for both routine data analysis and academic  #
+# research and it was publicly released in the hope that it will be    #
+# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY.              #
+#                                                                      #
+# Visit our website for the full manual and a complete tutorial about  #
+# how to conduct AMR data analysis: https://amr-for-r.org              #
+# ==================================================================== #
+
+on:
+  push:
+    # only on main
+    branches: "main"
+
+name: Update TODO Tracker
+
+jobs:
+  update-todo:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Generate TODO list from R/
+        run: |
+          echo "## TODO Report" > todo.md
+          echo "" >> todo.md
+          echo "_This issue is automatically updated on each push to `main`._" >> todo.md
+          echo "" >> todo.md
+          todos=$(find R/ -type f ! -name "sysdata.rda" -exec grep -nH "TODO" {} + || true)
+          if [ -z "$todos" ]; then
+            echo "✅ No TODOs found." >> todo.md
+          else
+            echo "$todos" | awk -F: '
+              {
+                file = $1
+                line = $2
+                text = substr($0, index($0,$3))
+                if (file != last_file) {
+                  if (last_file != "") print ""
+                  print "### " file
+                  last_file = file
+                }
+                printf "L%s: %s\n", line, text
+              }
+            ' >> todo.md
+          fi
+
+      - name: Update GitHub issue
+        uses: peter-evans/create-or-update-comment@v4
+        with:
+          token: ${{ secrets.GH_REPO_SCOPE }}
+          issue-number: 231
+          body-file: todo.md
+          edit-mode: replace
--- a/2
+++ b/2
@@ -1,5 +1,5 @@
 Package: AMR
-Version: 3.0.0.9021
+Version: 3.0.0.9022
 Date: 2025-09-03
 Title: Antimicrobial Resistance Data Analysis
 Description: Functions to simplify and standardise antimicrobial resistance (AMR)
--- a/14
+++ b/14
@@ -106,8 +106,6 @@ S3method(print,mo_uncertainties)
 S3method(print,pca)
 S3method(print,sir)
 S3method(print,sir_log)
-S3method(print,step_mic_log2)
-S3method(print,step_sir_numeric)
 S3method(quantile,mic)
 S3method(rep,ab)
 S3method(rep,av)
@@ -161,10 +159,6 @@ export(administrable_per_os)
 export(age)
 export(age_groups)
 export(all_antimicrobials)
-export(all_mic)
-export(all_mic_predictors)
-export(all_sir)
-export(all_sir_predictors)
 export(aminoglycosides)
 export(aminopenicillins)
 export(amr_class)
@@ -358,8 +352,6 @@ export(sir_df)
 export(sir_interpretation_history)
 export(sir_predict)
 export(skewness)
-export(step_mic_log2)
-export(step_sir_numeric)
 export(streptogramins)
 export(sulfonamides)
 export(susceptibility)
@@ -396,12 +388,6 @@ if(getRversion() >= "3.0.0") S3method(pillar::type_sum, av)
 if(getRversion() >= "3.0.0") S3method(pillar::type_sum, mic)
 if(getRversion() >= "3.0.0") S3method(pillar::type_sum, mo)
 if(getRversion() >= "3.0.0") S3method(pillar::type_sum, sir)
-if(getRversion() >= "3.0.0") S3method(recipes::bake, step_mic_log2)
-if(getRversion() >= "3.0.0") S3method(recipes::bake, step_sir_numeric)
-if(getRversion() >= "3.0.0") S3method(recipes::prep, step_mic_log2)
-if(getRversion() >= "3.0.0") S3method(recipes::prep, step_sir_numeric)
-if(getRversion() >= "3.0.0") S3method(recipes::tidy, step_mic_log2)
-if(getRversion() >= "3.0.0") S3method(recipes::tidy, step_sir_numeric)
 if(getRversion() >= "3.0.0") S3method(skimr::get_skimmers, disk)
 if(getRversion() >= "3.0.0") S3method(skimr::get_skimmers, mic)
 if(getRversion() >= "3.0.0") S3method(skimr::get_skimmers, mo)
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,11 +1,6 @@
-# AMR 3.0.0.9021
+# AMR 3.0.0.9022

-This is primarily a bugfix release, though we added one nice feature too.
-
-### New
-* Integration with the **tidymodels** framework to allow seamless use of MIC and SIR data in modelling pipelines via `recipes`
-  - `step_mic_log2()` to transform `<mic>` columns with log2, and `step_sir_numeric()` to convert `<sir>` columns to numeric
-  - New `tidyselect` helpers: `all_mic()`, `all_mic_predictors()`, `all_sir()`, `all_sir_predictors()`
+This is a bugfix release following the release of v3.0.0 in June 2025.

 ### Changed
 * Fixed a bug in `antibiogram()` for when no antimicrobials are set
@@ -16,7 +11,7 @@ This is primarily a bugfix release, though we added one nice feature too.
 * Fixed a bug in `as.sir()` to pick right breakpoint when `uti = FALSE` (#216)
 * Fixed a bug in `ggplot_sir()` when using `combine_SI = FALSE` (#213)
 * Fixed a bug the `antimicrobials` data set to remove statins (#229)
-* Fixed a bug in `mdro()` to make sure all genes specified in arguments are acknowledges
+* Fixed a bug in `mdro()` to make sure all genes specified in arguments are acknowledged
 * Fixed ATC J01CR05 to map to piperacillin/tazobactam rather than piperacillin/sulbactam (#230)
 * Fixed all plotting to contain a separate colour for SDD (susceptible dose-dependent) (#223)
 * Fixed some specific Dutch translations for antimicrobials
--- a/R/aa_globals.R
+++ b/R/aa_globals.R
@@ -233,6 +233,7 @@ globalVariables(c(
  "uti_index",
  "value",
  "varname",
+  "where",
  "x",
  "xvar",
  "y",
--- a/R/data.R
+++ b/R/data.R
@@ -362,14 +362,14 @@
 #' dosage
 "dosage"

-#' Data Set with `r format(nrow(esbl_isolates), big.mark = " ")` ESBL Isolates
-#'
-#' A data set containing `r format(nrow(esbl_isolates), big.mark = " ")` microbial isolates with MIC values of common antibiotics and a binary `esbl` column for extended-spectrum beta-lactamase (ESBL) production. This data set contains randomised fictitious data but reflects reality and can be used to practise AMR-related machine learning, e.g., classification modelling with [tidymodels](https://amr-for-r.org/articles/AMR_with_tidymodels.html).
-#' @format A [tibble][tibble::tibble] with `r format(nrow(esbl_isolates), big.mark = " ")` observations and `r ncol(esbl_isolates)` variables:
-#' - `esbl`\cr Logical indicator if the isolate is ESBL-producing
-#' - `genus`\cr Genus of the microorganism
-#' - `AMC:COL`\cr MIC values for 17 antimicrobial agents, transformed to class [`mic`] (see [as.mic()])
-#' @details See our [tidymodels integration][amr-tidymodels] for an example using this data set.
-#' @examples
-#' esbl_isolates
-"esbl_isolates"
+# TODO #' Data Set with `r format(nrow(esbl_isolates), big.mark = " ")` ESBL Isolates
+# TODO #'
+# TODO #' A data set containing `r format(nrow(esbl_isolates), big.mark = " ")` microbial isolates with MIC values of common antibiotics and a binary `esbl` column for extended-spectrum beta-lactamase (ESBL) production. This data set contains randomised fictitious data but reflects reality and can be used to practise AMR-related machine learning, e.g., classification modelling with [tidymodels](https://amr-for-r.org/articles/AMR_with_tidymodels.html).
+# TODO #' @format A [tibble][tibble::tibble] with `r format(nrow(esbl_isolates), big.mark = " ")` observations and `r ncol(esbl_isolates)` variables:
+# TODO #' - `esbl`\cr Logical indicator if the isolate is ESBL-producing
+# TODO #' - `genus`\cr Genus of the microorganism
+# TODO #' - `AMC:COL`\cr MIC values for 17 antimicrobial agents, transformed to class [`mic`] (see [as.mic()])
+# TODO #' @details See our [tidymodels integration][amr-tidymodels] for an example using this data set.
+# TODO #' @examples
+# TODO #' esbl_isolates
+# TODO "esbl_isolates"
--- a/R/tidymodels.R.no_include
+++ b/R/tidymodels.R.no_include
--- a/data/esbl_isolates.rda.no_include
+++ b/data/esbl_isolates.rda.no_include
--- a/man/amr-tidymodels.Rd
+++ b/man/amr-tidymodels.Rd
@@ -1,125 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/tidymodels.R
-\name{amr-tidymodels}
-\alias{amr-tidymodels}
-\alias{all_mic}
-\alias{all_mic_predictors}
-\alias{all_sir}
-\alias{all_sir_predictors}
-\alias{step_mic_log2}
-\alias{step_sir_numeric}
-\title{AMR Extensions for Tidymodels}
-\usage{
-all_mic()
-
-all_mic_predictors()
-
-all_sir()
-
-all_sir_predictors()
-
-step_mic_log2(recipe, ..., role = NA, trained = FALSE, columns = NULL,
-  skip = FALSE, id = recipes::rand_id("mic_log2"))
-
-step_sir_numeric(recipe, ..., role = NA, trained = FALSE, columns = NULL,
-  skip = FALSE, id = recipes::rand_id("sir_numeric"))
-}
-\arguments{
-\item{recipe}{A recipe object. The step will be added to the sequence of
-operations for this recipe.}
-
-\item{...}{One or more selector functions to choose variables for this step.
-See \code{\link[recipes:selections]{selections()}} for more details.}
-
-\item{role}{Not used by this step since no new variables are created.}
-
-\item{trained}{A logical to indicate if the quantities for preprocessing have
-been estimated.}
-
-\item{skip}{A logical. Should the step be skipped when the recipe is baked by
-\code{\link[recipes:bake]{bake()}}? While all operations are baked when \code{\link[recipes:prep]{prep()}} is run, some
-operations may not be able to be conducted on new data (e.g. processing the
-outcome variable(s)). Care should be taken when using \code{skip = TRUE} as it
-may affect the computations for subsequent operations.}
-
-\item{id}{A character string that is unique to this step to identify it.}
-}
-\description{
-This family of functions allows using AMR-specific data types such as \verb{<mic>} and \verb{<sir>} inside \code{tidymodels} pipelines.
-}
-\details{
-You can read more in our online \href{https://amr-for-r.org/articles/AMR_with_tidymodels.html}{AMR with tidymodels introduction}.
-
-Tidyselect helpers include:
-\itemize{
-\item \code{\link[=all_mic]{all_mic()}} and \code{\link[=all_mic_predictors]{all_mic_predictors()}} to select \verb{<mic>} columns
-\item \code{\link[=all_sir]{all_sir()}} and \code{\link[=all_sir_predictors]{all_sir_predictors()}} to select \verb{<sir>} columns
-}
-
-Pre-processing pipeline steps include:
-\itemize{
-\item \code{\link[=step_mic_log2]{step_mic_log2()}} to convert MIC columns to numeric (via \code{as.numeric()}) and apply a log2 transform, to be used with \code{\link[=all_mic_predictors]{all_mic_predictors()}}
-\item \code{\link[=step_sir_numeric]{step_sir_numeric()}} to convert SIR columns to numeric (via \code{as.numeric()}), to be used with \code{\link[=all_sir_predictors]{all_sir_predictors()}}: \code{"S"} = 1, \code{"I"}/\code{"SDD"} = 2, \code{"R"} = 3. All other values are rendered \code{NA}. Keep this in mind for further processing, especially if the model does not allow for \code{NA} values.
-}
-
-These steps integrate with \code{recipes::recipe()} and work like standard preprocessing steps. They are useful for preparing data for modelling, especially with classification models.
-}
-\examples{
-if (require("tidymodels")) {
-
-  # The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703
-  # Presence of ESBL genes was predicted based on raw MIC values.
-
-
-  # example data set in the AMR package
-  esbl_isolates
-
-  # Prepare a binary outcome and convert to ordered factor
-  data <- esbl_isolates \%>\%
-    mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE))
-
-  # Split into training and testing sets
-  split <- initial_split(data)
-  training_data <- training(split)
-  testing_data <- testing(split)
-
-  # Create and prep a recipe with MIC log2 transformation
-  mic_recipe <- recipe(esbl ~ ., data = training_data) \%>\%
-
-    # Optionally remove non-predictive variables
-    remove_role(genus, old_role = "predictor") \%>\%
-
-    # Apply the log2 transformation to all MIC predictors
-    step_mic_log2(all_mic_predictors()) \%>\%
-
-    # And apply the preparation steps
-    prep()
-
-  # View prepped recipe
-  mic_recipe
-
-  # Apply the recipe to training and testing data
-  out_training <- bake(mic_recipe, new_data = NULL)
-  out_testing <- bake(mic_recipe, new_data = testing_data)
-
-  # Fit a logistic regression model
-  fitted <- logistic_reg(mode = "classification") \%>\%
-    set_engine("glm") \%>\%
-    fit(esbl ~ ., data = out_training)
-
-  # Generate predictions on the test set
-  predictions <- predict(fitted, out_testing) \%>\%
-    bind_cols(out_testing)
-
-  # Evaluate predictions using standard classification metrics
-  our_metrics <- metric_set(accuracy, kap, ppv, npv)
-  metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class)
-
-  # Show performance
-  metrics
-}
-}
-\seealso{
-\code{\link[recipes:recipe]{recipes::recipe()}}, \code{\link[=as.mic]{as.mic()}}, \code{\link[=as.sir]{as.sir()}}
-}
-\keyword{internal}
--- a/man/esbl_isolates.Rd
+++ b/man/esbl_isolates.Rd
@@ -1,27 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.R
-\docType{data}
-\name{esbl_isolates}
-\alias{esbl_isolates}
-\title{Data Set with 500 ESBL Isolates}
-\format{
-A \link[tibble:tibble]{tibble} with 500 observations and 19 variables:
-\itemize{
-\item \code{esbl}\cr Logical indicator if the isolate is ESBL-producing
-\item \code{genus}\cr Genus of the microorganism
-\item \code{AMC:COL}\cr MIC values for 17 antimicrobial agents, transformed to class \code{\link{mic}} (see \code{\link[=as.mic]{as.mic()}})
-}
-}
-\usage{
-esbl_isolates
-}
-\description{
-A data set containing 500 microbial isolates with MIC values of common antibiotics and a binary \code{esbl} column for extended-spectrum beta-lactamase (ESBL) production. This data set contains randomised fictitious data but reflects reality and can be used to practise AMR-related machine learning, e.g., classification modelling with \href{https://amr-for-r.org/articles/AMR_with_tidymodels.html}{tidymodels}.
-}
-\details{
-See our \link[=amr-tidymodels]{tidymodels integration} for an example using this data set.
-}
-\examples{
-esbl_isolates
-}
-\keyword{datasets}