diff --git a/.Rbuildignore b/.Rbuildignore index 8f664fbb4..5536607fc 100755 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -40,3 +40,4 @@ ^CRAN-SUBMISSION$ ^PythonPackage$ ^README\.Rmd$ +\.no_include$ diff --git a/.github/workflows/check-old-tinytest.yaml b/.github/workflows/check-old-tinytest.yaml index e5ce21e3b..4ad26b4b3 100644 --- a/.github/workflows/check-old-tinytest.yaml +++ b/.github/workflows/check-old-tinytest.yaml @@ -49,13 +49,13 @@ jobs: # Test all old versions of R >= 3.0, we support them all! # For these old versions, dependencies and vignettes will not be checked. # For recent R versions, see check-recent.yaml (r-lib and tidyverse support the latest 5 major R releases). - - {os: ubuntu-latest, r: '3.6', allowfail: true} - # - {os: windows-latest, r: '3.5', allowfail: true} # always fails, horrible with UTF-8 - - {os: ubuntu-latest, r: '3.4', allowfail: true} - - {os: ubuntu-latest, r: '3.3', allowfail: true} - - {os: ubuntu-latest, r: '3.2', allowfail: true} - - {os: ubuntu-latest, r: '3.1', allowfail: true} - - {os: ubuntu-latest, r: '3.0', allowfail: true} + - {os: ubuntu-latest, r: '3.6', allowfail: false} + # - {os: windows-latest, r: '3.5', allowfail: false} # always fails, horrible with UTF-8 + # - {os: ubuntu-latest, r: '3.4', allowfail: false} # 3.1-3.4 now always fails with Error in grep(warn_re, lines, invert = TRUE, value = TRUE) attempt to set index 46/46 in SET_STRING_ELT + # - {os: ubuntu-latest, r: '3.3', allowfail: false} + # - {os: ubuntu-latest, r: '3.2', allowfail: false} + # - {os: ubuntu-latest, r: '3.1', allowfail: false} + - {os: ubuntu-latest, r: '3.0', allowfail: false} env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index 77d4e1b80..b792dde65 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -39,7 +39,7 @@ jobs: runs-on: ubuntu-latest env: - PYPI_PAT: ${{ secrets.PYPI_PAT }} + GH_REPO_SCOPE: ${{ secrets.GH_REPO_SCOPE }} steps: - name: Checkout code @@ -78,6 +78,7 @@ jobs: cd PythonPackage/AMR python -m twine upload --repository-url https://test.pypi.org/legacy/ dist/* + # TODO - Support Miniconda and Anaconda too # - name: Set up Miniconda # continue-on-error: true # uses: conda-incubator/setup-miniconda@v2 @@ -117,7 +118,7 @@ jobs: rm -rf PythonPackage git init - git remote add origin https://$PYPI_PAT@github.com/msberends/AMR + git remote add origin https://$GH_REPO_SCOPE@github.com/msberends/AMR git checkout --orphan python-wrapper git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" @@ -125,4 +126,4 @@ jobs: git rm -rf . || true git add . git commit -m "Python wrapper update" - git push https://$PYPI_PAT@github.com/msberends/AMR.git python-wrapper --force + git push https://$GH_REPO_SCOPE@github.com/msberends/AMR.git python-wrapper --force diff --git a/.github/workflows/renew-gpt-training-data.yml b/.github/workflows/renew-gpt-training-data.yml index d599a1fc6..391f3bfa7 100644 --- a/.github/workflows/renew-gpt-training-data.yml +++ b/.github/workflows/renew-gpt-training-data.yml @@ -39,7 +39,7 @@ jobs: runs-on: ubuntu-latest env: - PYPI_PAT: ${{ secrets.PYPI_PAT }} + GH_REPO_SCOPE: ${{ secrets.GH_REPO_SCOPE }} steps: - name: Checkout code @@ -63,4 +63,4 @@ jobs: git config user.email "github-actions[bot]@users.noreply.github.com" git add latest_training_data.txt git commit -m "GPT training data update" - git push https://$PYPI_PAT@github.com/msberends/amr-for-r-assistant.git main --force + git push https://$GH_REPO_SCOPE@github.com/msberends/amr-for-r-assistant.git main --force diff --git a/.github/workflows/todo-tracker.yml b/.github/workflows/todo-tracker.yml new file mode 100644 index 000000000..0db1c8a63 --- /dev/null +++ b/.github/workflows/todo-tracker.yml @@ -0,0 +1,75 @@ +# ==================================================================== # +# TITLE: # +# AMR: An R Package for Working with Antimicrobial Resistance Data # +# # +# SOURCE CODE: # +# https://github.com/msberends/AMR # +# # +# PLEASE CITE THIS SOFTWARE AS: # +# Berends MS, Luz CF, Friedrich AW, et al. (2022). # +# AMR: An R Package for Working with Antimicrobial Resistance Data. # +# Journal of Statistical Software, 104(3), 1-31. # +# https://doi.org/10.18637/jss.v104.i03 # +# # +# Developed at the University of Groningen and the University Medical # +# Center Groningen in The Netherlands, in collaboration with many # +# colleagues from around the world, see our website. # +# # +# This R package is free software; you can freely use and distribute # +# it for both personal and commercial purposes under the terms of the # +# GNU General Public License version 2.0 (GNU GPL-2), as published by # +# the Free Software Foundation. # +# We created this package for both routine data analysis and academic # +# research and it was publicly released in the hope that it will be # +# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. # +# # +# Visit our website for the full manual and a complete tutorial about # +# how to conduct AMR data analysis: https://amr-for-r.org # +# ==================================================================== # + +on: + push: + # only on main + branches: "main" + +name: Update TODO Tracker + +jobs: + update-todo: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Generate TODO list from R/ + run: | + echo "## TODO Report" > todo.md + echo "" >> todo.md + echo "_This issue is automatically updated on each push to `main`._" >> todo.md + echo "" >> todo.md + todos=$(find R/ -type f ! -name "sysdata.rda" -exec grep -nH "TODO" {} + || true) + if [ -z "$todos" ]; then + echo "✅ No TODOs found." >> todo.md + else + echo "$todos" | awk -F: ' + { + file = $1 + line = $2 + text = substr($0, index($0,$3)) + if (file != last_file) { + if (last_file != "") print "" + print "### " file + last_file = file + } + printf "L%s: %s\n", line, text + } + ' >> todo.md + fi + + - name: Update GitHub issue + uses: peter-evans/create-or-update-comment@v4 + with: + token: ${{ secrets.GH_REPO_SCOPE }} + issue-number: 231 + body-file: todo.md + edit-mode: replace diff --git a/DESCRIPTION b/DESCRIPTION index e9f1b79ef..cbec77bf0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: AMR -Version: 3.0.0.9021 +Version: 3.0.0.9022 Date: 2025-09-03 Title: Antimicrobial Resistance Data Analysis Description: Functions to simplify and standardise antimicrobial resistance (AMR) diff --git a/NAMESPACE b/NAMESPACE index 2b2cb9f33..0cc72d821 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -106,8 +106,6 @@ S3method(print,mo_uncertainties) S3method(print,pca) S3method(print,sir) S3method(print,sir_log) -S3method(print,step_mic_log2) -S3method(print,step_sir_numeric) S3method(quantile,mic) S3method(rep,ab) S3method(rep,av) @@ -161,10 +159,6 @@ export(administrable_per_os) export(age) export(age_groups) export(all_antimicrobials) -export(all_mic) -export(all_mic_predictors) -export(all_sir) -export(all_sir_predictors) export(aminoglycosides) export(aminopenicillins) export(amr_class) @@ -358,8 +352,6 @@ export(sir_df) export(sir_interpretation_history) export(sir_predict) export(skewness) -export(step_mic_log2) -export(step_sir_numeric) export(streptogramins) export(sulfonamides) export(susceptibility) @@ -396,12 +388,6 @@ if(getRversion() >= "3.0.0") S3method(pillar::type_sum, av) if(getRversion() >= "3.0.0") S3method(pillar::type_sum, mic) if(getRversion() >= "3.0.0") S3method(pillar::type_sum, mo) if(getRversion() >= "3.0.0") S3method(pillar::type_sum, sir) -if(getRversion() >= "3.0.0") S3method(recipes::bake, step_mic_log2) -if(getRversion() >= "3.0.0") S3method(recipes::bake, step_sir_numeric) -if(getRversion() >= "3.0.0") S3method(recipes::prep, step_mic_log2) -if(getRversion() >= "3.0.0") S3method(recipes::prep, step_sir_numeric) -if(getRversion() >= "3.0.0") S3method(recipes::tidy, step_mic_log2) -if(getRversion() >= "3.0.0") S3method(recipes::tidy, step_sir_numeric) if(getRversion() >= "3.0.0") S3method(skimr::get_skimmers, disk) if(getRversion() >= "3.0.0") S3method(skimr::get_skimmers, mic) if(getRversion() >= "3.0.0") S3method(skimr::get_skimmers, mo) diff --git a/NEWS.md b/NEWS.md index c510176f9..589a1bb4c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,11 +1,6 @@ -# AMR 3.0.0.9021 +# AMR 3.0.0.9022 -This is primarily a bugfix release, though we added one nice feature too. - -### New -* Integration with the **tidymodels** framework to allow seamless use of MIC and SIR data in modelling pipelines via `recipes` - - `step_mic_log2()` to transform `` columns with log2, and `step_sir_numeric()` to convert `` columns to numeric - - New `tidyselect` helpers: `all_mic()`, `all_mic_predictors()`, `all_sir()`, `all_sir_predictors()` +This is a bugfix release following the release of v3.0.0 in June 2025. ### Changed * Fixed a bug in `antibiogram()` for when no antimicrobials are set @@ -16,7 +11,7 @@ This is primarily a bugfix release, though we added one nice feature too. * Fixed a bug in `as.sir()` to pick right breakpoint when `uti = FALSE` (#216) * Fixed a bug in `ggplot_sir()` when using `combine_SI = FALSE` (#213) * Fixed a bug the `antimicrobials` data set to remove statins (#229) -* Fixed a bug in `mdro()` to make sure all genes specified in arguments are acknowledges +* Fixed a bug in `mdro()` to make sure all genes specified in arguments are acknowledged * Fixed ATC J01CR05 to map to piperacillin/tazobactam rather than piperacillin/sulbactam (#230) * Fixed all plotting to contain a separate colour for SDD (susceptible dose-dependent) (#223) * Fixed some specific Dutch translations for antimicrobials diff --git a/R/aa_globals.R b/R/aa_globals.R index 4f3c86e06..f1a477dbf 100755 --- a/R/aa_globals.R +++ b/R/aa_globals.R @@ -233,6 +233,7 @@ globalVariables(c( "uti_index", "value", "varname", + "where", "x", "xvar", "y", diff --git a/R/data.R b/R/data.R index bb6bfec3d..cb0be376a 100755 --- a/R/data.R +++ b/R/data.R @@ -362,14 +362,14 @@ #' dosage "dosage" -#' Data Set with `r format(nrow(esbl_isolates), big.mark = " ")` ESBL Isolates -#' -#' A data set containing `r format(nrow(esbl_isolates), big.mark = " ")` microbial isolates with MIC values of common antibiotics and a binary `esbl` column for extended-spectrum beta-lactamase (ESBL) production. This data set contains randomised fictitious data but reflects reality and can be used to practise AMR-related machine learning, e.g., classification modelling with [tidymodels](https://amr-for-r.org/articles/AMR_with_tidymodels.html). -#' @format A [tibble][tibble::tibble] with `r format(nrow(esbl_isolates), big.mark = " ")` observations and `r ncol(esbl_isolates)` variables: -#' - `esbl`\cr Logical indicator if the isolate is ESBL-producing -#' - `genus`\cr Genus of the microorganism -#' - `AMC:COL`\cr MIC values for 17 antimicrobial agents, transformed to class [`mic`] (see [as.mic()]) -#' @details See our [tidymodels integration][amr-tidymodels] for an example using this data set. -#' @examples -#' esbl_isolates -"esbl_isolates" +# TODO #' Data Set with `r format(nrow(esbl_isolates), big.mark = " ")` ESBL Isolates +# TODO #' +# TODO #' A data set containing `r format(nrow(esbl_isolates), big.mark = " ")` microbial isolates with MIC values of common antibiotics and a binary `esbl` column for extended-spectrum beta-lactamase (ESBL) production. This data set contains randomised fictitious data but reflects reality and can be used to practise AMR-related machine learning, e.g., classification modelling with [tidymodels](https://amr-for-r.org/articles/AMR_with_tidymodels.html). +# TODO #' @format A [tibble][tibble::tibble] with `r format(nrow(esbl_isolates), big.mark = " ")` observations and `r ncol(esbl_isolates)` variables: +# TODO #' - `esbl`\cr Logical indicator if the isolate is ESBL-producing +# TODO #' - `genus`\cr Genus of the microorganism +# TODO #' - `AMC:COL`\cr MIC values for 17 antimicrobial agents, transformed to class [`mic`] (see [as.mic()]) +# TODO #' @details See our [tidymodels integration][amr-tidymodels] for an example using this data set. +# TODO #' @examples +# TODO #' esbl_isolates +# TODO "esbl_isolates" diff --git a/R/tidymodels.R b/R/tidymodels.R.no_include similarity index 100% rename from R/tidymodels.R rename to R/tidymodels.R.no_include diff --git a/data/esbl_isolates.rda b/data/esbl_isolates.rda.no_include similarity index 100% rename from data/esbl_isolates.rda rename to data/esbl_isolates.rda.no_include diff --git a/man/amr-tidymodels.Rd b/man/amr-tidymodels.Rd deleted file mode 100644 index da6f7b2c3..000000000 --- a/man/amr-tidymodels.Rd +++ /dev/null @@ -1,125 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tidymodels.R -\name{amr-tidymodels} -\alias{amr-tidymodels} -\alias{all_mic} -\alias{all_mic_predictors} -\alias{all_sir} -\alias{all_sir_predictors} -\alias{step_mic_log2} -\alias{step_sir_numeric} -\title{AMR Extensions for Tidymodels} -\usage{ -all_mic() - -all_mic_predictors() - -all_sir() - -all_sir_predictors() - -step_mic_log2(recipe, ..., role = NA, trained = FALSE, columns = NULL, - skip = FALSE, id = recipes::rand_id("mic_log2")) - -step_sir_numeric(recipe, ..., role = NA, trained = FALSE, columns = NULL, - skip = FALSE, id = recipes::rand_id("sir_numeric")) -} -\arguments{ -\item{recipe}{A recipe object. The step will be added to the sequence of -operations for this recipe.} - -\item{...}{One or more selector functions to choose variables for this step. -See \code{\link[recipes:selections]{selections()}} for more details.} - -\item{role}{Not used by this step since no new variables are created.} - -\item{trained}{A logical to indicate if the quantities for preprocessing have -been estimated.} - -\item{skip}{A logical. Should the step be skipped when the recipe is baked by -\code{\link[recipes:bake]{bake()}}? While all operations are baked when \code{\link[recipes:prep]{prep()}} is run, some -operations may not be able to be conducted on new data (e.g. processing the -outcome variable(s)). Care should be taken when using \code{skip = TRUE} as it -may affect the computations for subsequent operations.} - -\item{id}{A character string that is unique to this step to identify it.} -} -\description{ -This family of functions allows using AMR-specific data types such as \verb{} and \verb{} inside \code{tidymodels} pipelines. -} -\details{ -You can read more in our online \href{https://amr-for-r.org/articles/AMR_with_tidymodels.html}{AMR with tidymodels introduction}. - -Tidyselect helpers include: -\itemize{ -\item \code{\link[=all_mic]{all_mic()}} and \code{\link[=all_mic_predictors]{all_mic_predictors()}} to select \verb{} columns -\item \code{\link[=all_sir]{all_sir()}} and \code{\link[=all_sir_predictors]{all_sir_predictors()}} to select \verb{} columns -} - -Pre-processing pipeline steps include: -\itemize{ -\item \code{\link[=step_mic_log2]{step_mic_log2()}} to convert MIC columns to numeric (via \code{as.numeric()}) and apply a log2 transform, to be used with \code{\link[=all_mic_predictors]{all_mic_predictors()}} -\item \code{\link[=step_sir_numeric]{step_sir_numeric()}} to convert SIR columns to numeric (via \code{as.numeric()}), to be used with \code{\link[=all_sir_predictors]{all_sir_predictors()}}: \code{"S"} = 1, \code{"I"}/\code{"SDD"} = 2, \code{"R"} = 3. All other values are rendered \code{NA}. Keep this in mind for further processing, especially if the model does not allow for \code{NA} values. -} - -These steps integrate with \code{recipes::recipe()} and work like standard preprocessing steps. They are useful for preparing data for modelling, especially with classification models. -} -\examples{ -if (require("tidymodels")) { - - # The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703 - # Presence of ESBL genes was predicted based on raw MIC values. - - - # example data set in the AMR package - esbl_isolates - - # Prepare a binary outcome and convert to ordered factor - data <- esbl_isolates \%>\% - mutate(esbl = factor(esbl, levels = c(FALSE, TRUE), ordered = TRUE)) - - # Split into training and testing sets - split <- initial_split(data) - training_data <- training(split) - testing_data <- testing(split) - - # Create and prep a recipe with MIC log2 transformation - mic_recipe <- recipe(esbl ~ ., data = training_data) \%>\% - - # Optionally remove non-predictive variables - remove_role(genus, old_role = "predictor") \%>\% - - # Apply the log2 transformation to all MIC predictors - step_mic_log2(all_mic_predictors()) \%>\% - - # And apply the preparation steps - prep() - - # View prepped recipe - mic_recipe - - # Apply the recipe to training and testing data - out_training <- bake(mic_recipe, new_data = NULL) - out_testing <- bake(mic_recipe, new_data = testing_data) - - # Fit a logistic regression model - fitted <- logistic_reg(mode = "classification") \%>\% - set_engine("glm") \%>\% - fit(esbl ~ ., data = out_training) - - # Generate predictions on the test set - predictions <- predict(fitted, out_testing) \%>\% - bind_cols(out_testing) - - # Evaluate predictions using standard classification metrics - our_metrics <- metric_set(accuracy, kap, ppv, npv) - metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class) - - # Show performance - metrics -} -} -\seealso{ -\code{\link[recipes:recipe]{recipes::recipe()}}, \code{\link[=as.mic]{as.mic()}}, \code{\link[=as.sir]{as.sir()}} -} -\keyword{internal} diff --git a/man/esbl_isolates.Rd b/man/esbl_isolates.Rd deleted file mode 100644 index a6433c08a..000000000 --- a/man/esbl_isolates.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R -\docType{data} -\name{esbl_isolates} -\alias{esbl_isolates} -\title{Data Set with 500 ESBL Isolates} -\format{ -A \link[tibble:tibble]{tibble} with 500 observations and 19 variables: -\itemize{ -\item \code{esbl}\cr Logical indicator if the isolate is ESBL-producing -\item \code{genus}\cr Genus of the microorganism -\item \code{AMC:COL}\cr MIC values for 17 antimicrobial agents, transformed to class \code{\link{mic}} (see \code{\link[=as.mic]{as.mic()}}) -} -} -\usage{ -esbl_isolates -} -\description{ -A data set containing 500 microbial isolates with MIC values of common antibiotics and a binary \code{esbl} column for extended-spectrum beta-lactamase (ESBL) production. This data set contains randomised fictitious data but reflects reality and can be used to practise AMR-related machine learning, e.g., classification modelling with \href{https://amr-for-r.org/articles/AMR_with_tidymodels.html}{tidymodels}. -} -\details{ -See our \link[=amr-tidymodels]{tidymodels integration} for an example using this data set. -} -\examples{ -esbl_isolates -} -\keyword{datasets}