1
0
mirror of https://github.com/msberends/AMR.git synced 2025-04-19 08:33:49 +02:00

(v2.1.1.9182) fix AMR selectors for tidymodels, add unit tests

This commit is contained in:
dr. M.S. (Matthijs) Berends 2025-03-03 12:59:27 +01:00
parent b85890449d
commit 9a9468fa84
No known key found for this signature in database
16 changed files with 84 additions and 33 deletions

View File

@ -20,16 +20,12 @@ body:
id: version
attributes:
label: AMR Package Version
description: |
What version of the AMR package are you running? You can retrieve this by running in R:
```r
packageVersion("AMR")
```
description: Which version of the AMR package are you running? You can retrieve this by running `packageVersion("AMR")` in R. If you are not running any of these versions, then please update first and check whether the bug still persists.
multiple: false
options:
- Latest CRAN version (2.1.1)
- One of the latest GitHub versions (2.1.1.9xxx)
default: 0
validations:
required: true
- type: checkboxes

View File

@ -1,5 +1,5 @@
name: Feature or Optimisation Request
description: I have a suggestion.
description: I have an idea!
labels: "enhancement"
body:
- type: markdown
@ -7,7 +7,7 @@ body:
value: |
Thanks for taking the time to make a suggestion!
We'll be happy to implement on a short notice if this improves the AMR package. Do note that BY FAR most of the content of the current version is because of collaborators like you!
We'll be happy to implement on a short notice if this improves the AMR package. Do note that BY FAR most of the content of the current version is because of collaborators like you! So, many thanks in advance.
- type: textarea
id: description
attributes:

View File

@ -1,6 +1,6 @@
Package: AMR
Version: 2.1.1.9163
Date: 2025-02-27
Version: 2.1.1.9182
Date: 2025-03-03
Title: Antimicrobial Resistance Data Analysis
Description: Functions to simplify and standardise antimicrobial resistance (AMR)
data analysis and to work with microbial and antimicrobial properties by

View File

@ -1,4 +1,4 @@
# AMR 2.1.1.9163
# AMR 2.1.1.9182
*(this beta version will eventually become v3.0. We're happy to reach a new major milestone soon, which will be all about the new One Health support! Install this beta using [the instructions here](https://msberends.github.io/AMR/#latest-development-version).)*

View File

@ -1,6 +1,6 @@
Metadata-Version: 2.2
Name: AMR
Version: 2.1.1.9163
Version: 2.1.1.9182
Summary: A Python wrapper for the AMR R package
Home-page: https://github.com/msberends/AMR
Author: Matthijs Berends

Binary file not shown.

Binary file not shown.

View File

@ -2,7 +2,7 @@ from setuptools import setup, find_packages
setup(
name='AMR',
version='2.1.1.9163',
version='2.1.1.9182',
packages=find_packages(),
install_requires=[
'rpy2',

View File

@ -989,6 +989,8 @@ ascertain_sir_classes <- function(x, obj_name) {
}
get_current_data <- function(arg_name, call) {
# This function enables AMR selectors (e.g., AMR::carbapenems()) to work seamlessly across different environments, including dplyr, base R, data.table, and tidymodels.
# It identifies and extracts the appropriate data frame from the current execution context.
valid_df <- function(x) {
!is.null(x) && is.data.frame(x)
}
@ -1014,14 +1016,17 @@ get_current_data <- function(arg_name, call) {
}
}
# now go over all underlying environments looking for other dplyr, data.table and base R selection environments
# now go over all underlying environments looking for other dplyr, tidymodels, data.table and base R selection environments
with_generic <- vapply(FUN.VALUE = logical(1), frms, function(e) !is.null(e$`.Generic`))
for (env in frms[which(with_generic)]) {
if (valid_df(env$`.data`)) {
# an element `.data` will be in the environment when using dplyr::select()
return(env$`.data`)
} else if (valid_df(env$training)) {
# an element `training` will be in the environment when using some tidymodels functions such as `prep()`
return(env$training)
} else if (valid_df(env$data)) {
# an element `data` will be in the environment when using older dplyr versions, or tidymodels
# an element `data` will be in the environment when using older dplyr versions, or some tidymodels functions such as `fit()`
return(env$data)
} else if (valid_df(env$xx)) {
# an element `xx` will be in the environment for rows + cols in base R, e.g. `example_isolates[c(1:3), carbapenems()]`
@ -1038,7 +1043,7 @@ get_current_data <- function(arg_name, call) {
for (env in frms[which(with_tbl)]) {
if (!is.null(names(env)) && all(c(".tbl", ".vars", ".cols") %in% names(env), na.rm = TRUE)) {
# an element `.tbl` will be in the environment when using scoped dplyr variants, with or without `dplyr::vars()`
# (e.g. `dplyr::summarise_at()` or `dplyr::mutate_at()`)
# e.g. `dplyr::summarise_at(carbapenems(), ...)` or `dplyr::mutate_at(vars(carbapenems()), ...)`
return(env$`.tbl`)
}
}
@ -1206,7 +1211,13 @@ try_colour <- function(..., before, after, collapse = " ") {
}
}
is_dark <- function() {
if (is.null(AMR_env$is_dark_theme)) {
if (is.null(AMR_env$is_dark_theme) ||
is.null(AMR_env$current_theme) ||
(
!is.null(AMR_env$current_theme) &&
AMR_env$current_theme != tryCatch(getExportedValue("getThemeInfo", ns = asNamespace("rstudioapi"))()$editor, error = function(e) "")
)) {
AMR_env$current_theme <- tryCatch(getExportedValue("getThemeInfo", ns = asNamespace("rstudioapi"))()$editor, error = function(e) NULL)
AMR_env$is_dark_theme <- !has_colour() || tryCatch(isTRUE(getExportedValue("getThemeInfo", ns = asNamespace("rstudioapi"))()$dark), error = function(e) FALSE)
}
isTRUE(AMR_env$is_dark_theme)

View File

@ -121,7 +121,7 @@ as.disk <- function(x, na.rm = FALSE) {
cur_col <- get_current_column()
warning_("in `as.disk()`: ", na_after - na_before, " result",
ifelse(na_after - na_before > 1, "s", ""),
ifelse(is.null(cur_col), "", paste0(" in column '", cur_col, "'")),
ifelse(is.null(cur_col), "", paste0(" in index '", cur_col, "'")),
" truncated (",
round(((na_after - na_before) / length(x)) * 100),
"%) that were invalid disk zones: ",

View File

@ -250,7 +250,7 @@ as.mic <- function(x, na.rm = FALSE, keep_operators = "all") {
cur_col <- get_current_column()
warning_("in `as.mic()`: ", na_after - na_before, " result",
ifelse(na_after - na_before > 1, "s", ""),
ifelse(is.null(cur_col), "", paste0(" in column '", cur_col, "'")),
ifelse(is.null(cur_col), "", paste0(" in index '", cur_col, "'")),
" truncated (",
round(((na_after - na_before) / length(x)) * 100),
"%) that were invalid MICs: ",

View File

@ -539,7 +539,7 @@ as.sir.default <- function(x,
cur_col <- get_current_column()
warning_("in `as.sir()`: ", na_after - na_before, " result",
ifelse(na_after - na_before > 1, "s", ""),
ifelse(is.null(cur_col), "", paste0(" in column '", cur_col, "'")),
ifelse(is.null(cur_col), "", paste0(" in index '", cur_col, "'")),
" truncated (",
round(((na_after - na_before) / length(x)) * 100),
"%) that were invalid antimicrobial interpretations: ",

View File

@ -1,6 +1,6 @@
This knowledge base contains all context you must know about the AMR package for R. You are a GPT trained to be an assistant for the AMR package in R. You are an incredible R specialist, especially trained in this package and in the tidyverse.
First and foremost, you are trained on version 2.1.1.9163. Remember this whenever someone asks which AMR package version youre at.
First and foremost, you are trained on version 2.1.1.9182. Remember this whenever someone asks which AMR package version youre at.
Below are the contents of the file, the file, and all the files (documentation) in the package. Every file content is split using 100 hypens.
----------------------------------------------------------------------------------------------------
@ -9083,8 +9083,8 @@ We begin by loading the required libraries and preparing the `example_isolates`
```{r}
# Load required libraries
library(tidymodels) # For machine learning workflows, and data manipulation (dplyr, tidyr, ...)
library(AMR) # For AMR data analysis
library(tidymodels) # For machine learning workflows, and data manipulation (dplyr, tidyr, ...)
# Select relevant columns for prediction
data <- example_isolates %>%
@ -9122,12 +9122,18 @@ resistance_recipe <- recipe(mo ~ ., data = data) %>%
resistance_recipe
```
For a recipe that includes at least one preprocessing operation, like we have with `step_corr()`, the necessary parameters can be estimated from a training set using `prep()`:
```{r}
prep(resistance_recipe)
```
**Explanation:**
- `recipe(mo ~ ., data = data)` will take the `mo` column as outcome and all other columns as predictors.
- `step_corr()` removes predictors (i.e., antibiotic columns) that have a higher correlation than 90%.
Notice how the recipe contains just the antibiotic selector functions - no need to define the columns specifically.
Notice how the recipe contains just the antibiotic selector functions - no need to define the columns specifically. In the preparation (retrieved with `prep()`) we can see that the columns or variables `r paste0("'", suppressMessages(prep(resistance_recipe))$steps[[1]]$removals, "'", collapse = " and ")` were removed as they correlate too much with existing, other variables.
#### 2. Specifying the Model
@ -9154,6 +9160,7 @@ We bundle the recipe and model together into a `workflow`, which organizes the e
resistance_workflow <- workflow() %>%
add_recipe(resistance_recipe) %>% # Add the preprocessing recipe
add_model(logistic_model) # Add the logistic regression model
resistance_workflow
```
### **Training and Evaluating the Model**

View File

@ -45,17 +45,47 @@ expect_equal(AMR:::trimws2(" test "), "test")
expect_equal(AMR:::trimws2(" test ", "l"), "test ")
expect_equal(AMR:::trimws2(" test ", "r"), " test")
# expect_warning(AMR:::generate_warning_abs_missing(c("AMP", "AMX")))
# expect_warning(AMR:::generate_warning_abs_missing(c("AMP", "AMX"), any = TRUE))
# expect_warning(AMR:::get_column_abx(example_isolates, hard_dependencies = "FUS"))
expect_message(AMR:::get_column_abx(example_isolates, soft_dependencies = "FUS"))
if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0", also_load = TRUE)) {
# expect_warning(AMR:::get_column_abx(rename(example_isolates, thisone = AMX), amox = "thisone", tmp = "thisone", verbose = TRUE))
# expect_warning(AMR:::get_column_abx(rename(example_isolates, thisone = AMX), amox = "thisone", tmp = "thisone", verbose = FALSE))
}
# we rely on "grouped_tbl" being a class of grouped tibbles, so run a test that checks for this:
if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0", also_load = TRUE)) {
expect_true(AMR:::is_null_or_grouped_tbl(example_isolates %>% group_by(ward)))
}
# test get_current_data() ----
is_right <- FALSE
check_df <- function(check_element, return_val = 0) {
is_right <<- FALSE
for (env in sys.frames()) {
if (!is.null(env[[check_element]]) && is.data.frame(env[[check_element]])) {
is_right <<- TRUE
}
}
return_val
}
df <- example_isolates[, check_df("x")]
expect_true(is_right, info = "the environmental data cannot be found for base/x")
df <- example_isolates[c(1:3), check_df("x")]
expect_true(is_right, info = "the environmental data cannot be found for base/x")
if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0", also_load = TRUE)) {
df <- example_isolates %>% select(mo, check_df("data123"))
expect_false(is_right, info = "just a check if non-sense is not being gathered by get_current_data()")
df <- example_isolates %>% select(mo, check_df(".data"))
expect_true(is_right, info = "the environmental data cannot be found for dplyr/select()")
df <- example_isolates %>% select_at(check_df(".tbl"))
expect_true(is_right, info = "the environmental data cannot be found for dplyr/select_at()")
}
if (AMR:::pkg_is_available("tidymodels", also_load = TRUE)) {
resistance_recipe <- recipe(mo ~ ., data = example_isolates) %>%
step_corr(check_df("training")) %>%
prep()
expect_true(is_right, info = "the environmental data cannot be found for tidymodels/prep()")
}

View File

@ -42,8 +42,8 @@ We begin by loading the required libraries and preparing the `example_isolates`
```{r}
# Load required libraries
library(tidymodels) # For machine learning workflows, and data manipulation (dplyr, tidyr, ...)
library(AMR) # For AMR data analysis
library(tidymodels) # For machine learning workflows, and data manipulation (dplyr, tidyr, ...)
# Select relevant columns for prediction
data <- example_isolates %>%
@ -81,12 +81,18 @@ resistance_recipe <- recipe(mo ~ ., data = data) %>%
resistance_recipe
```
For a recipe that includes at least one preprocessing operation, like we have with `step_corr()`, the necessary parameters can be estimated from a training set using `prep()`:
```{r}
prep(resistance_recipe)
```
**Explanation:**
- `recipe(mo ~ ., data = data)` will take the `mo` column as outcome and all other columns as predictors.
- `step_corr()` removes predictors (i.e., antibiotic columns) that have a higher correlation than 90%.
Notice how the recipe contains just the antibiotic selector functions - no need to define the columns specifically.
Notice how the recipe contains just the antibiotic selector functions - no need to define the columns specifically. In the preparation (retrieved with `prep()`) we can see that the columns or variables `r paste0("'", suppressMessages(prep(resistance_recipe))$steps[[1]]$removals, "'", collapse = " and ")` were removed as they correlate too much with existing, other variables.
#### 2. Specifying the Model
@ -113,6 +119,7 @@ We bundle the recipe and model together into a `workflow`, which organizes the e
resistance_workflow <- workflow() %>%
add_recipe(resistance_recipe) %>% # Add the preprocessing recipe
add_model(logistic_model) # Add the logistic regression model
resistance_workflow
```
### **Training and Evaluating the Model**