pm update, unit test fix?

This commit is contained in:
dr. M.S. (Matthijs) Berends 2023-02-08 13:48:06 +01:00
parent 4a54d59f70
commit 822e9de82c
13 changed files with 2118 additions and 720 deletions

View File

@ -1,6 +1,6 @@
Package: AMR
Version: 1.8.2.9109
Date: 2023-02-06
Version: 1.8.2.9110
Date: 2023-02-08
Title: Antimicrobial Resistance Data Analysis
Description: Functions to simplify and standardise antimicrobial resistance (AMR)
data analysis and to work with microbial and antimicrobial properties by

View File

@ -1,4 +1,4 @@
# AMR 1.8.2.9109
# AMR 1.8.2.9110
*(this beta version will eventually become v2.0! We're happy to reach a new major milestone soon!)*

1741
R/aa_helper_pm_functions.R Executable file → Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1425,13 +1425,15 @@ case_when <- function(...) {
}
# dplyr implementations ----
# dplyr/tidyr implementations ----
# take {dplyr} functions if available, and the slower {poorman} functions otherwise
if (pkg_is_available("dplyr", also_load = FALSE)) {
# take {dplyr} and {tidyr} functions if available, and the slower {poorman} functions otherwise
if (pkg_is_available("dplyr", min_version = "1.0.0", also_load = FALSE)) {
`%>%` <- import_fn("%>%", "dplyr", error_on_fail = FALSE)
across <- import_fn("across", "dplyr", error_on_fail = FALSE)
anti_join <- import_fn("anti_join", "dplyr", error_on_fail = FALSE)
arrange <- import_fn("arrange", "dplyr", error_on_fail = FALSE)
bind_rows <- import_fn("bind_rows", "dplyr", error_on_fail = FALSE)
count <- import_fn("count", "dplyr", error_on_fail = FALSE)
desc <- import_fn("desc", "dplyr", error_on_fail = FALSE)
distinct <- import_fn("distinct", "dplyr", error_on_fail = FALSE)
@ -1443,22 +1445,22 @@ if (pkg_is_available("dplyr", also_load = FALSE)) {
inner_join <- import_fn("inner_join", "dplyr", error_on_fail = FALSE)
lag <- import_fn("lag", "dplyr", error_on_fail = FALSE)
left_join <- import_fn("left_join", "dplyr", error_on_fail = FALSE)
mutate <- import_fn("mutate", "dplyr", error_on_fail = FALSE)
n_distinct <- import_fn("n_distinct", "dplyr", error_on_fail = FALSE)
pull <- import_fn("pull", "dplyr", error_on_fail = FALSE)
rename <- import_fn("rename", "dplyr", error_on_fail = FALSE)
right_join <- import_fn("right_join", "dplyr", error_on_fail = FALSE)
row_number <- import_fn("row_number", "dplyr", error_on_fail = FALSE)
select <- import_fn("select", "dplyr", error_on_fail = FALSE)
semi_join <- import_fn("semi_join", "dplyr", error_on_fail = FALSE)
summarise <- import_fn("summarise", "dplyr", error_on_fail = FALSE)
ungroup <- import_fn("ungroup", "dplyr", error_on_fail = FALSE)
mutate <- import_fn("mutate", "dplyr", error_on_fail = FALSE)
bind_rows <- import_fn("bind_rows", "dplyr", error_on_fail = FALSE)
where <- import_fn("where", "dplyr", error_on_fail = FALSE)
} else {
`%>%` <- `%pm>%`
across <- pm_across
anti_join <- pm_anti_join
arrange <- pm_arrange
bind_rows <- pm_bind_rows
count <- pm_count
desc <- pm_desc
distinct <- pm_distinct
@ -1470,62 +1472,22 @@ if (pkg_is_available("dplyr", also_load = FALSE)) {
inner_join <- pm_inner_join
lag <- pm_lag
left_join <- pm_left_join
mutate <- pm_mutate
n_distinct <- pm_n_distinct
pull <- pm_pull
rename <- pm_rename
right_join <- pm_right_join
row_number <- pm_row_number
select <- pm_select
semi_join <- pm_semi_join
summarise <- pm_summarise
ungroup <- pm_ungroup
mutate <- function(.data, ...) {
# pm_mutate is buggy, use this simple alternative
dots <- list(...)
for (i in seq_len(length(dots))) {
.data[, names(dots)[i]] <- dots[[i]]
}
.data
}
bind_rows <- function(..., fill = NA) {
# this AMAZING code is from ChatGPT when I asked for a base R dplyr::bind_rows alternative
dfs <- list(...)
all_cols <- unique(unlist(lapply(dfs, colnames)))
mat_list <- lapply(dfs, function(x) {
mat <- matrix(NA, nrow = nrow(x), ncol = length(all_cols))
colnames(mat) <- all_cols
mat[, colnames(x)] <- as.matrix(x)
mat
})
mat <- do.call(rbind, mat_list)
as.data.frame(mat, stringsAsFactors = FALSE)
}
where <- function(fn) {
# adapted from https://github.com/nathaneastwood/poorman/blob/52eb6947e0b4430cd588976ed8820013eddf955f/R/where.R#L17-L32
if (!is.function(fn)) {
stop_("`", deparse(substitute(fn)), "()` is not a valid predicate function.")
}
df <- pm_select_env$.data
cols <- pm_select_env$get_colnames()
if (is.null(df)) {
df <- get_current_data("where", call = FALSE)
cols <- colnames(df)
}
preds <- unlist(lapply(
df,
function(x, fn) {
do.call("fn", list(x))
},
fn
))
if (!is.logical(preds)) stop_("`where()` must be used with functions that return `TRUE` or `FALSE`.")
data_cols <- cols
cols <- data_cols[preds]
which(data_cols %in% cols)
}
where <- pm_where
}
if (pkg_is_available("tidyr", min_version = "1.0.0", also_load = FALSE)) {
pivot_longer <- import_fn("pivot_longer", "tidyr", error_on_fail = FALSE)
} else {
pivot_longer <- pm_pivot_longer
}
# Faster data.table implementations ----

View File

@ -49,7 +49,6 @@
#' @return (internally) a [character] vector of column names, with additional class `"ab_selector"`
#' @export
#' @inheritSection AMR Reference Data Publicly Available
#' @examples
#' # `example_isolates` is a data set available in the AMR package.
#' # See ?example_isolates.

View File

@ -85,10 +85,7 @@ bug_drug_combinations <- function(x,
}
# use dplyr and tidyr if they are available, they are much faster!
if (pkg_is_available("dplyr", min_version = "1.0.0", also_load = FALSE) &&
pkg_is_available("tidyr", min_version = "1.0.0", also_load = FALSE)) {
across <- import_fn("across", "dplyr")
pivot_longer <- import_fn("pivot_longer", "tidyr")
if (identical(pivot_longer, import_fn("pivot_longer", "tidyr", error_on_fail = FALSE))) {
out <- x %>%
ungroup() %>%
mutate(mo = FUN(ungroup(x)[, col_mo, drop = TRUE], ...)) %>%

View File

@ -926,7 +926,7 @@ eucast_rules <- function(x,
# Print overview ----------------------------------------------------------
if (isTRUE(info) || isTRUE(verbose)) {
verbose_info <- x.bak %>%
mutate(row = row_number()) %>%
mutate(row = seq_len(NROW(x.bak))) %>%
select(`.rowid`, row) %>%
right_join(verbose_info,
by = c(".rowid" = "rowid")

62
data-raw/antibiograms.Rmd Normal file
View File

@ -0,0 +1,62 @@
---
title: "Generating antibiograms with the AMR package"
author: "AMR package developers"
date: "`r Sys.Date()`"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, message = FALSE)
library(AMR)
```
This is an example R Markdown file to show the use of `antibiogram()` of the AMR package.
For starters, this is what our `example_isolates` data set looks like:
```{r}
example_isolates
```
### Traditional Antibiogram
```{r trad}
print(
antibiogram(example_isolates,
antibiotics = c(aminoglycosides(), carbapenems()))
)
```
### Combined Antibiogram
```{r comb}
print(
antibiogram(example_isolates,
antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"))
)
```
### Syndromic Antibiogram
```{r synd}
print(
antibiogram(example_isolates,
antibiotics = c(aminoglycosides(), carbapenems()),
syndromic_group = "ward")
)
```
### Weighted-Incidence Syndromic Combination Antibiogram (WISCA)
```{r wisca}
print(
antibiogram(example_isolates,
antibiotics = c("AMC", "AMC+CIP", "TZP", "TZP+TOB"),
mo_transform = "gramstain",
minimum = 10, # this should be >= 30, but now just as example
syndromic_group = ifelse(example_isolates$age >= 65 &
example_isolates$gender == "M",
"WISCA Group 1", "WISCA Group 2"))
)
```

848
data-raw/antibiograms.html Normal file

File diff suppressed because one or more lines are too long

View File

@ -32,11 +32,11 @@
# Source file: data-raw/reproduction_of_poorman.R
# ------------------------------------------------
# poorman: a package to replace all dplyr functions with base R so we can lose dependency on dplyr.
# {poorman}: a package to replace all dplyr functions with base R so we can lose dependency on {dplyr}.
# These functions were downloaded from https://github.com/nathaneastwood/poorman,
# from this commit: https://github.com/nathaneastwood/poorman/tree/{commit}.
#
# All functions are prefixed with 'pm_' to make it obvious that they are dplyr substitutes.
# All functions are prefixed with 'pm_' to make it obvious that they are {dplyr} substitutes.
#
# All code below was released under MIT license, that permits 'free of charge, to any person obtaining a
# copy of the software and associated documentation files (the "Software"), to deal in the Software

View File

@ -1,22 +1,28 @@
# get complete filenames of all R files in the GitHub repository of nathaneastwood/poorman
commit <- "52eb6947e0b4430cd588976ed8820013eddf955f"
library(magrittr)
commit <- "3cc0a9920b1eb559dd166f548561244189586b3a"
files <- xml2::read_html(paste0("https://github.com/nathaneastwood/poorman/tree/", commit, "/R")) %>%
rvest::html_nodes("a") %>%
rvest::html_attr("href")
files <- files[files %like% "/blob/.*R$"]
# get full URLs of all raw R files
files <- sort(paste0("https://raw.githubusercontent.com", gsub("blob/", "", files[files %like% "/R/.*.R$"])))
# remove files with only pkg specific code
files <- files[files %unlike% "(zzz|init)[.]R$"]
# also, there's a lot of functions we don't use
files <- files[files %unlike% "(slice|glimpse|recode|replace_na|coalesce)[.]R$"]
files <- files[files %unlike% "/(between|coalesce|cumulative|fill|glimpse|gluestick|group_cols|na_if|near|nest_by|poorman-package|print|recode|reconstruct|replace_na|replace_with|rownames|slice|union_all|unite|window_rank|with_groups)[.]R$"]
# add our prepend file, containing info about the source of the data
intro <- readLines("data-raw/poorman_prepend.R")
intro <- readLines("data-raw/poorman_prepend.R") %>%
# add commit to intro part
gsub("{commit}", commit, ., fixed = TRUE) %>%
# add date to intro part
gsub("{date}", trimws(format(Sys.Date(), "%e %B %Y")), ., fixed = TRUE)
# copyright info:
copyright <- paste0("# ", readLines("https://raw.githubusercontent.com/nathaneastwood/poorman/master/LICENSE"))
copyright <- paste0("# ", readLines(paste0("https://raw.githubusercontent.com/nathaneastwood/poorman/", commit, "/LICENSE")))
# read all contents to a character vector
contents <- character(0)
@ -25,31 +31,35 @@ sapply(files, function(file) {
contents <<- c(contents, readLines(file))
invisible()
})
contents <- c(
intro,
copyright,
"",
contents
)
# remove lines starting with "#'" and NULL and write to file
contents <- contents[!grepl("^(#'|NULL|\"_PACKAGE)", contents)]
contents.bak <- contents
# grouped attributes same as dplyr
contents <- gsub("grouped_data", "grouped_df", contents, fixed = TRUE)
# now make it independent on UseMethod, since we will not export these functions
contents <- gsub('UseMethod[(]"(.*?)"[)]',
'if ("grouped_data" %in% class(.data)) {||| \\1.grouped_data(.data, ...)||| } else {||| \\1.default(.data, ...)||| }',
paste(contents, collapse = "|||"),
perl = TRUE
) %>%
# add commit to intro part
gsub("{commit}", commit, ., fixed = TRUE) %>%
# add date to intro part
gsub("{date}", format(Sys.Date(), "%e %B %Y"), ., fixed = TRUE) %>%
strsplit(split = "|||", fixed = TRUE) %>%
unlist() %>%
# add "pm_" as prefix to all functions
gsub("^([a-z_.]+) <- function", "pm_\\1 <- function", .)
has_usemethods <- gsub("^([a-z_]+).*", "\\1", contents[which(contents %like% "usemethod") - 1])
for (use in has_usemethods) {
relevant_row <- which(contents %like% paste0("^", use, " <- function")) + 1
function_call <- trimws(gsub(".*function(.*)\\{.*", "\\1", contents[relevant_row - 1]))
function_call1 <- trimws(gsub("[()]", "", strsplit(function_call, ",")[[1]][1]))
if (any(contents %like% paste0(use, ".grouped_df"))) {
# this function will have methods for data.frame and grouped_df
contents[relevant_row] <- paste0(" if (\"grouped_df\" %in% class(", function_call1, ")) ", use, ".grouped_df", function_call, " else ", use, ".data.frame", function_call)
} else {
# this function will only have data.frame as method
contents[relevant_row] <- paste0(" ", use, ".data.frame", function_call)
}
# add pm_ prefix
contents[relevant_row - 1] <- paste0("pm_", contents[relevant_row - 1])
}
# correct for NextMethod
contents <- gsub("NextMethod\\(\"(.*)\"\\)", "\\1.data.frame(...)", contents)
# correct for 'default' method
contents <- gsub(".default <-", ".data.frame <-", contents, fixed = TRUE)
contents <- gsub("pm_group_by_drop.data.frame", "pm_group_by_drop", contents, fixed = TRUE)
# now get all those pm_* functions to replace all untransformed function name calls as well
new_pm_names <- sort(gsub("pm_(.*?) <-.*", "\\1", contents[grepl("^pm_", contents)]))
for (i in seq_len(length(new_pm_names))) {
@ -57,29 +67,39 @@ for (i in seq_len(length(new_pm_names))) {
# starting with a space or a straight bracket or an opening parenthesis, ending with nothing or a non-character or a closing parenthesis
contents <- gsub(paste0("( |\\[|\\()", new_pm_names[i], "($|[^a-z]|\\))"), paste0("\\1pm_", new_pm_names[i], "\\2"), contents)
}
# replace %>% with %pm>%
contents[which(contents %like% "^\\|\\|") - 1] <- paste0(contents[which(contents %like% "^\\|\\|") - 1], " ||")
contents[which(contents %like% "^\\|\\|")] <- gsub("^\\|\\|", "", contents[which(contents %like% "^\\|\\|")])
contents <- gsub("%>%", "%pm>%", contents, fixed = TRUE)
# fix for new lines, since n() also existed
contents <- gsub("\\pm_n", "\\n", contents, fixed = TRUE)
# prefix other functions also with "pm_"
contents <- gsub("^([a-z_]+)(\\$|)", "pm_\\1\\2", contents)
# prefix environments
contents <- gsub("eval_env", "pm_eval_env", contents, fixed = TRUE)
contents <- gsub("select_env", "pm_select_env", contents, fixed = TRUE)
contents <- gsub("context", "pm_context", contents, fixed = TRUE)
# prefix environmental objects and functions
contents <- gsub("(eval_env|select_env|select_context|context|dotdotdot|as_symbols|insert_dot|deparse_|groups_set|apply_grouped_function|split_into_groups|calculate_groups|has_groups|eval_select_pos|select_positions|eval_expr|eval_call|add_group_columns|find_used|is_nested|setup_|select_|group_)", "pm_\\1", contents)
# now some items are overprefixed
contents <- gsub("(pm_)+", "pm_", contents)
# special case for pm_distinct(), we need '.keep_all' to work
contents <- gsub("pm_distinct <- function(.data, ..., .keep_all = FALSE)", "pm_distinct <- function(.data, ...)", contents, fixed = TRUE)
# pm_pull does not correct for tibbles, misses the drop argument
contents[contents == ".data[, var]"] <- ".data[, var, drop = TRUE]"
contents <- gsub("pm_if (\"grouped_df", "if (\"grouped_df", contents, fixed = TRUE)
# remove comments and empty lines
contents <- gsub("#.*", "", contents)
contents <- contents[trimws(contents) != ""]
# fix for their relocate()
contents <- gsub("if (!missing(.before))", "if (!missing(.before) && !is.null(.before))", contents, fixed = TRUE)
contents <- gsub("if (!missing(.after))", "if (!missing(.after) && !is.null(.after))", contents, fixed = TRUE)
contents[which(contents %like% "reshape\\($") + 1] <- gsub("data", "as.data.frame(data, stringsAsFactors = FALSE)", contents[which(contents %like% "reshape\\($") + 1])
contents <- gsub('pm_relocate(.data = long, values_to, .after = -1)', 'pm_relocate(.data = long, "value", .after = -1)', contents, fixed = TRUE)
# who needs US spelling?
contents <- contents[!grepl("summarize", contents)]
contents <- contents[contents %unlike% "summarize"]
# add intro
contents <- c(
intro,
copyright,
"",
contents
)
writeLines(contents, "R/aa_helper_pm_functions.R")
# after this, comment out:
# pm_left_join() since we use a faster version
# pm_group_split() since we don't use it and it relies on R 3.5.0 for the use of ...length(), which is hard to support without C++ code
# note: pm_left_join() will be overwritten by aaa_helper_functions.R, which contains a faster implementation

View File

@ -68,7 +68,6 @@ import_functions <- c(
"read_html" = "xml2",
"rename" = "dplyr",
"right_join" = "dplyr",
"row_number" = "dplyr",
"select" = "dplyr",
"semi_join" = "dplyr",
"showQuestion" = "rstudioapi",

View File

@ -40,11 +40,9 @@ if (tryCatch(isTRUE(AMR:::import_fn("isJob", "rstudioapi")()), error = function(
if (AMR:::pkg_is_available("tinytest", also_load = TRUE)) {
library(AMR)
if (identical(AMR:::import_fn("select", "dplyr"), AMR:::select)) {
print("This test will rely on {dplyr} verbs")
message("This test will rely on {dplyr} verbs")
message("\n\n------------------------------------\nThis test will rely on {dplyr} verbs\n------------------------------------\n\n")
} else {
print("This test will rely on {poorman} verbs")
message("This test will rely on {poorman} verbs")
message("\n\n---------------------------------------------------------------------\nThis test will rely on {poorman} verbs (installed state dplyr: ", AMR:::pkg_is_available("dplyr", also_load = FALSE), ")\n---------------------------------------------------------------------\n\n")
}
# set language
set_AMR_locale("English")