mirror of
https://github.com/msberends/AMR.git
synced 2025-04-19 08:33:49 +02:00
(v2.1.1.9182) fix AMR selectors for tidymodels, add unit tests
This commit is contained in:
parent
b85890449d
commit
9a9468fa84
8
.github/ISSUE_TEMPLATE/1-bug-report.yml
vendored
8
.github/ISSUE_TEMPLATE/1-bug-report.yml
vendored
@ -20,16 +20,12 @@ body:
|
||||
id: version
|
||||
attributes:
|
||||
label: AMR Package Version
|
||||
description: |
|
||||
What version of the AMR package are you running? You can retrieve this by running in R:
|
||||
|
||||
```r
|
||||
packageVersion("AMR")
|
||||
```
|
||||
description: Which version of the AMR package are you running? You can retrieve this by running `packageVersion("AMR")` in R. If you are not running any of these versions, then please update first and check whether the bug still persists.
|
||||
multiple: false
|
||||
options:
|
||||
- Latest CRAN version (2.1.1)
|
||||
- One of the latest GitHub versions (2.1.1.9xxx)
|
||||
default: 0
|
||||
validations:
|
||||
required: true
|
||||
- type: checkboxes
|
||||
|
4
.github/ISSUE_TEMPLATE/2-feature-request.yml
vendored
4
.github/ISSUE_TEMPLATE/2-feature-request.yml
vendored
@ -1,5 +1,5 @@
|
||||
name: Feature or Optimisation Request
|
||||
description: I have a suggestion.
|
||||
description: I have an idea!
|
||||
labels: "enhancement"
|
||||
body:
|
||||
- type: markdown
|
||||
@ -7,7 +7,7 @@ body:
|
||||
value: |
|
||||
Thanks for taking the time to make a suggestion!
|
||||
|
||||
We'll be happy to implement on a short notice if this improves the AMR package. Do note that BY FAR most of the content of the current version is because of collaborators like you!
|
||||
We'll be happy to implement on a short notice if this improves the AMR package. Do note that BY FAR most of the content of the current version is because of collaborators like you! So, many thanks in advance.
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
|
@ -1,6 +1,6 @@
|
||||
Package: AMR
|
||||
Version: 2.1.1.9163
|
||||
Date: 2025-02-27
|
||||
Version: 2.1.1.9182
|
||||
Date: 2025-03-03
|
||||
Title: Antimicrobial Resistance Data Analysis
|
||||
Description: Functions to simplify and standardise antimicrobial resistance (AMR)
|
||||
data analysis and to work with microbial and antimicrobial properties by
|
||||
|
2
NEWS.md
2
NEWS.md
@ -1,4 +1,4 @@
|
||||
# AMR 2.1.1.9163
|
||||
# AMR 2.1.1.9182
|
||||
|
||||
*(this beta version will eventually become v3.0. We're happy to reach a new major milestone soon, which will be all about the new One Health support! Install this beta using [the instructions here](https://msberends.github.io/AMR/#latest-development-version).)*
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
Metadata-Version: 2.2
|
||||
Name: AMR
|
||||
Version: 2.1.1.9163
|
||||
Version: 2.1.1.9182
|
||||
Summary: A Python wrapper for the AMR R package
|
||||
Home-page: https://github.com/msberends/AMR
|
||||
Author: Matthijs Berends
|
||||
|
BIN
PythonPackage/AMR/dist/amr-2.1.1.9163.tar.gz
vendored
BIN
PythonPackage/AMR/dist/amr-2.1.1.9163.tar.gz
vendored
Binary file not shown.
Binary file not shown.
BIN
PythonPackage/AMR/dist/amr-2.1.1.9182.tar.gz
vendored
Normal file
BIN
PythonPackage/AMR/dist/amr-2.1.1.9182.tar.gz
vendored
Normal file
Binary file not shown.
@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
name='AMR',
|
||||
version='2.1.1.9163',
|
||||
version='2.1.1.9182',
|
||||
packages=find_packages(),
|
||||
install_requires=[
|
||||
'rpy2',
|
||||
|
@ -989,6 +989,8 @@ ascertain_sir_classes <- function(x, obj_name) {
|
||||
}
|
||||
|
||||
get_current_data <- function(arg_name, call) {
|
||||
# This function enables AMR selectors (e.g., AMR::carbapenems()) to work seamlessly across different environments, including dplyr, base R, data.table, and tidymodels.
|
||||
# It identifies and extracts the appropriate data frame from the current execution context.
|
||||
valid_df <- function(x) {
|
||||
!is.null(x) && is.data.frame(x)
|
||||
}
|
||||
@ -1014,14 +1016,17 @@ get_current_data <- function(arg_name, call) {
|
||||
}
|
||||
}
|
||||
|
||||
# now go over all underlying environments looking for other dplyr, data.table and base R selection environments
|
||||
# now go over all underlying environments looking for other dplyr, tidymodels, data.table and base R selection environments
|
||||
with_generic <- vapply(FUN.VALUE = logical(1), frms, function(e) !is.null(e$`.Generic`))
|
||||
for (env in frms[which(with_generic)]) {
|
||||
if (valid_df(env$`.data`)) {
|
||||
# an element `.data` will be in the environment when using dplyr::select()
|
||||
return(env$`.data`)
|
||||
} else if (valid_df(env$training)) {
|
||||
# an element `training` will be in the environment when using some tidymodels functions such as `prep()`
|
||||
return(env$training)
|
||||
} else if (valid_df(env$data)) {
|
||||
# an element `data` will be in the environment when using older dplyr versions, or tidymodels
|
||||
# an element `data` will be in the environment when using older dplyr versions, or some tidymodels functions such as `fit()`
|
||||
return(env$data)
|
||||
} else if (valid_df(env$xx)) {
|
||||
# an element `xx` will be in the environment for rows + cols in base R, e.g. `example_isolates[c(1:3), carbapenems()]`
|
||||
@ -1038,7 +1043,7 @@ get_current_data <- function(arg_name, call) {
|
||||
for (env in frms[which(with_tbl)]) {
|
||||
if (!is.null(names(env)) && all(c(".tbl", ".vars", ".cols") %in% names(env), na.rm = TRUE)) {
|
||||
# an element `.tbl` will be in the environment when using scoped dplyr variants, with or without `dplyr::vars()`
|
||||
# (e.g. `dplyr::summarise_at()` or `dplyr::mutate_at()`)
|
||||
# e.g. `dplyr::summarise_at(carbapenems(), ...)` or `dplyr::mutate_at(vars(carbapenems()), ...)`
|
||||
return(env$`.tbl`)
|
||||
}
|
||||
}
|
||||
@ -1206,7 +1211,13 @@ try_colour <- function(..., before, after, collapse = " ") {
|
||||
}
|
||||
}
|
||||
is_dark <- function() {
|
||||
if (is.null(AMR_env$is_dark_theme)) {
|
||||
if (is.null(AMR_env$is_dark_theme) ||
|
||||
is.null(AMR_env$current_theme) ||
|
||||
(
|
||||
!is.null(AMR_env$current_theme) &&
|
||||
AMR_env$current_theme != tryCatch(getExportedValue("getThemeInfo", ns = asNamespace("rstudioapi"))()$editor, error = function(e) "")
|
||||
)) {
|
||||
AMR_env$current_theme <- tryCatch(getExportedValue("getThemeInfo", ns = asNamespace("rstudioapi"))()$editor, error = function(e) NULL)
|
||||
AMR_env$is_dark_theme <- !has_colour() || tryCatch(isTRUE(getExportedValue("getThemeInfo", ns = asNamespace("rstudioapi"))()$dark), error = function(e) FALSE)
|
||||
}
|
||||
isTRUE(AMR_env$is_dark_theme)
|
||||
|
2
R/disk.R
2
R/disk.R
@ -121,7 +121,7 @@ as.disk <- function(x, na.rm = FALSE) {
|
||||
cur_col <- get_current_column()
|
||||
warning_("in `as.disk()`: ", na_after - na_before, " result",
|
||||
ifelse(na_after - na_before > 1, "s", ""),
|
||||
ifelse(is.null(cur_col), "", paste0(" in column '", cur_col, "'")),
|
||||
ifelse(is.null(cur_col), "", paste0(" in index '", cur_col, "'")),
|
||||
" truncated (",
|
||||
round(((na_after - na_before) / length(x)) * 100),
|
||||
"%) that were invalid disk zones: ",
|
||||
|
2
R/mic.R
2
R/mic.R
@ -250,7 +250,7 @@ as.mic <- function(x, na.rm = FALSE, keep_operators = "all") {
|
||||
cur_col <- get_current_column()
|
||||
warning_("in `as.mic()`: ", na_after - na_before, " result",
|
||||
ifelse(na_after - na_before > 1, "s", ""),
|
||||
ifelse(is.null(cur_col), "", paste0(" in column '", cur_col, "'")),
|
||||
ifelse(is.null(cur_col), "", paste0(" in index '", cur_col, "'")),
|
||||
" truncated (",
|
||||
round(((na_after - na_before) / length(x)) * 100),
|
||||
"%) that were invalid MICs: ",
|
||||
|
2
R/sir.R
2
R/sir.R
@ -539,7 +539,7 @@ as.sir.default <- function(x,
|
||||
cur_col <- get_current_column()
|
||||
warning_("in `as.sir()`: ", na_after - na_before, " result",
|
||||
ifelse(na_after - na_before > 1, "s", ""),
|
||||
ifelse(is.null(cur_col), "", paste0(" in column '", cur_col, "'")),
|
||||
ifelse(is.null(cur_col), "", paste0(" in index '", cur_col, "'")),
|
||||
" truncated (",
|
||||
round(((na_after - na_before) / length(x)) * 100),
|
||||
"%) that were invalid antimicrobial interpretations: ",
|
||||
|
@ -1,6 +1,6 @@
|
||||
This knowledge base contains all context you must know about the AMR package for R. You are a GPT trained to be an assistant for the AMR package in R. You are an incredible R specialist, especially trained in this package and in the tidyverse.
|
||||
|
||||
First and foremost, you are trained on version 2.1.1.9163. Remember this whenever someone asks which AMR package version you’re at.
|
||||
First and foremost, you are trained on version 2.1.1.9182. Remember this whenever someone asks which AMR package version you’re at.
|
||||
|
||||
Below are the contents of the file, the file, and all the files (documentation) in the package. Every file content is split using 100 hypens.
|
||||
----------------------------------------------------------------------------------------------------
|
||||
@ -9083,8 +9083,8 @@ We begin by loading the required libraries and preparing the `example_isolates`
|
||||
|
||||
```{r}
|
||||
# Load required libraries
|
||||
library(tidymodels) # For machine learning workflows, and data manipulation (dplyr, tidyr, ...)
|
||||
library(AMR) # For AMR data analysis
|
||||
library(tidymodels) # For machine learning workflows, and data manipulation (dplyr, tidyr, ...)
|
||||
|
||||
# Select relevant columns for prediction
|
||||
data <- example_isolates %>%
|
||||
@ -9122,12 +9122,18 @@ resistance_recipe <- recipe(mo ~ ., data = data) %>%
|
||||
resistance_recipe
|
||||
```
|
||||
|
||||
For a recipe that includes at least one preprocessing operation, like we have with `step_corr()`, the necessary parameters can be estimated from a training set using `prep()`:
|
||||
|
||||
```{r}
|
||||
prep(resistance_recipe)
|
||||
```
|
||||
|
||||
**Explanation:**
|
||||
|
||||
- `recipe(mo ~ ., data = data)` will take the `mo` column as outcome and all other columns as predictors.
|
||||
- `step_corr()` removes predictors (i.e., antibiotic columns) that have a higher correlation than 90%.
|
||||
|
||||
Notice how the recipe contains just the antibiotic selector functions - no need to define the columns specifically.
|
||||
Notice how the recipe contains just the antibiotic selector functions - no need to define the columns specifically. In the preparation (retrieved with `prep()`) we can see that the columns or variables `r paste0("'", suppressMessages(prep(resistance_recipe))$steps[[1]]$removals, "'", collapse = " and ")` were removed as they correlate too much with existing, other variables.
|
||||
|
||||
#### 2. Specifying the Model
|
||||
|
||||
@ -9154,6 +9160,7 @@ We bundle the recipe and model together into a `workflow`, which organizes the e
|
||||
resistance_workflow <- workflow() %>%
|
||||
add_recipe(resistance_recipe) %>% # Add the preprocessing recipe
|
||||
add_model(logistic_model) # Add the logistic regression model
|
||||
resistance_workflow
|
||||
```
|
||||
|
||||
### **Training and Evaluating the Model**
|
@ -45,17 +45,47 @@ expect_equal(AMR:::trimws2(" test "), "test")
|
||||
expect_equal(AMR:::trimws2(" test ", "l"), "test ")
|
||||
expect_equal(AMR:::trimws2(" test ", "r"), " test")
|
||||
|
||||
# expect_warning(AMR:::generate_warning_abs_missing(c("AMP", "AMX")))
|
||||
# expect_warning(AMR:::generate_warning_abs_missing(c("AMP", "AMX"), any = TRUE))
|
||||
# expect_warning(AMR:::get_column_abx(example_isolates, hard_dependencies = "FUS"))
|
||||
expect_message(AMR:::get_column_abx(example_isolates, soft_dependencies = "FUS"))
|
||||
|
||||
if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0", also_load = TRUE)) {
|
||||
# expect_warning(AMR:::get_column_abx(rename(example_isolates, thisone = AMX), amox = "thisone", tmp = "thisone", verbose = TRUE))
|
||||
# expect_warning(AMR:::get_column_abx(rename(example_isolates, thisone = AMX), amox = "thisone", tmp = "thisone", verbose = FALSE))
|
||||
}
|
||||
|
||||
# we rely on "grouped_tbl" being a class of grouped tibbles, so run a test that checks for this:
|
||||
if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0", also_load = TRUE)) {
|
||||
expect_true(AMR:::is_null_or_grouped_tbl(example_isolates %>% group_by(ward)))
|
||||
}
|
||||
|
||||
|
||||
# test get_current_data() ----
|
||||
|
||||
is_right <- FALSE
|
||||
check_df <- function(check_element, return_val = 0) {
|
||||
is_right <<- FALSE
|
||||
for (env in sys.frames()) {
|
||||
if (!is.null(env[[check_element]]) && is.data.frame(env[[check_element]])) {
|
||||
is_right <<- TRUE
|
||||
}
|
||||
}
|
||||
return_val
|
||||
}
|
||||
|
||||
df <- example_isolates[, check_df("x")]
|
||||
expect_true(is_right, info = "the environmental data cannot be found for base/x")
|
||||
|
||||
df <- example_isolates[c(1:3), check_df("x")]
|
||||
expect_true(is_right, info = "the environmental data cannot be found for base/x")
|
||||
|
||||
if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0", also_load = TRUE)) {
|
||||
df <- example_isolates %>% select(mo, check_df("data123"))
|
||||
expect_false(is_right, info = "just a check if non-sense is not being gathered by get_current_data()")
|
||||
|
||||
df <- example_isolates %>% select(mo, check_df(".data"))
|
||||
expect_true(is_right, info = "the environmental data cannot be found for dplyr/select()")
|
||||
|
||||
df <- example_isolates %>% select_at(check_df(".tbl"))
|
||||
expect_true(is_right, info = "the environmental data cannot be found for dplyr/select_at()")
|
||||
}
|
||||
|
||||
if (AMR:::pkg_is_available("tidymodels", also_load = TRUE)) {
|
||||
resistance_recipe <- recipe(mo ~ ., data = example_isolates) %>%
|
||||
step_corr(check_df("training")) %>%
|
||||
prep()
|
||||
expect_true(is_right, info = "the environmental data cannot be found for tidymodels/prep()")
|
||||
}
|
||||
|
@ -42,8 +42,8 @@ We begin by loading the required libraries and preparing the `example_isolates`
|
||||
|
||||
```{r}
|
||||
# Load required libraries
|
||||
library(tidymodels) # For machine learning workflows, and data manipulation (dplyr, tidyr, ...)
|
||||
library(AMR) # For AMR data analysis
|
||||
library(tidymodels) # For machine learning workflows, and data manipulation (dplyr, tidyr, ...)
|
||||
|
||||
# Select relevant columns for prediction
|
||||
data <- example_isolates %>%
|
||||
@ -81,12 +81,18 @@ resistance_recipe <- recipe(mo ~ ., data = data) %>%
|
||||
resistance_recipe
|
||||
```
|
||||
|
||||
For a recipe that includes at least one preprocessing operation, like we have with `step_corr()`, the necessary parameters can be estimated from a training set using `prep()`:
|
||||
|
||||
```{r}
|
||||
prep(resistance_recipe)
|
||||
```
|
||||
|
||||
**Explanation:**
|
||||
|
||||
- `recipe(mo ~ ., data = data)` will take the `mo` column as outcome and all other columns as predictors.
|
||||
- `step_corr()` removes predictors (i.e., antibiotic columns) that have a higher correlation than 90%.
|
||||
|
||||
Notice how the recipe contains just the antibiotic selector functions - no need to define the columns specifically.
|
||||
Notice how the recipe contains just the antibiotic selector functions - no need to define the columns specifically. In the preparation (retrieved with `prep()`) we can see that the columns or variables `r paste0("'", suppressMessages(prep(resistance_recipe))$steps[[1]]$removals, "'", collapse = " and ")` were removed as they correlate too much with existing, other variables.
|
||||
|
||||
#### 2. Specifying the Model
|
||||
|
||||
@ -113,6 +119,7 @@ We bundle the recipe and model together into a `workflow`, which organizes the e
|
||||
resistance_workflow <- workflow() %>%
|
||||
add_recipe(resistance_recipe) %>% # Add the preprocessing recipe
|
||||
add_model(logistic_model) # Add the logistic regression model
|
||||
resistance_workflow
|
||||
```
|
||||
|
||||
### **Training and Evaluating the Model**
|
||||
|
Loading…
x
Reference in New Issue
Block a user