pm update, unit test fix?

2025-08-24 11:52:11 +02:00 · 2023-02-08 13:48:06 +01:00
parent 4a54d59f70
commit 822e9de82c
13 changed files with 2118 additions and 720 deletions
--- a/4
+++ b/4
@@ -1,6 +1,6 @@
 Package: AMR
-Version: 1.8.2.9109
-Date: 2023-02-06
+Version: 1.8.2.9110
+Date: 2023-02-08
 Title: Antimicrobial Resistance Data Analysis
 Description: Functions to simplify and standardise antimicrobial resistance (AMR)
  data analysis and to work with microbial and antimicrobial properties by
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,4 @@
-# AMR 1.8.2.9109
+# AMR 1.8.2.9110

 *(this beta version will eventually become v2.0! We're happy to reach a new major milestone soon!)*

--- a/R/aa_helper_pm_functions.R
+++ b/R/aa_helper_pm_functions.R
--- a/R/aaa_helper_functions.R
+++ b/R/aaa_helper_functions.R
@@ -1425,13 +1425,15 @@ case_when <- function(...) {
 }


-# dplyr implementations ----
+# dplyr/tidyr implementations ----

-# take {dplyr} functions if available, and the slower {poorman} functions otherwise
-if (pkg_is_available("dplyr", also_load = FALSE)) {
+# take {dplyr} and {tidyr} functions if available, and the slower {poorman} functions otherwise
+if (pkg_is_available("dplyr", min_version = "1.0.0", also_load = FALSE)) {
  `%>%` <- import_fn("%>%", "dplyr", error_on_fail = FALSE)
+  across <- import_fn("across", "dplyr", error_on_fail = FALSE)
  anti_join <- import_fn("anti_join", "dplyr", error_on_fail = FALSE)
  arrange <- import_fn("arrange", "dplyr", error_on_fail = FALSE)
+  bind_rows <- import_fn("bind_rows", "dplyr", error_on_fail = FALSE)
  count <- import_fn("count", "dplyr", error_on_fail = FALSE)
  desc <- import_fn("desc", "dplyr", error_on_fail = FALSE)
  distinct <- import_fn("distinct", "dplyr", error_on_fail = FALSE)
@@ -1443,22 +1445,22 @@ if (pkg_is_available("dplyr", also_load = FALSE)) {
  inner_join <- import_fn("inner_join", "dplyr", error_on_fail = FALSE)
  lag <- import_fn("lag", "dplyr", error_on_fail = FALSE)
  left_join <- import_fn("left_join", "dplyr", error_on_fail = FALSE)
+  mutate <- import_fn("mutate", "dplyr", error_on_fail = FALSE)
  n_distinct <- import_fn("n_distinct", "dplyr", error_on_fail = FALSE)
  pull <- import_fn("pull", "dplyr", error_on_fail = FALSE)
  rename <- import_fn("rename", "dplyr", error_on_fail = FALSE)
  right_join <- import_fn("right_join", "dplyr", error_on_fail = FALSE)
-  row_number <- import_fn("row_number", "dplyr", error_on_fail = FALSE)
  select <- import_fn("select", "dplyr", error_on_fail = FALSE)
  semi_join <- import_fn("semi_join", "dplyr", error_on_fail = FALSE)
  summarise <- import_fn("summarise", "dplyr", error_on_fail = FALSE)
  ungroup <- import_fn("ungroup", "dplyr", error_on_fail = FALSE)
-  mutate <- import_fn("mutate", "dplyr", error_on_fail = FALSE)
-  bind_rows <- import_fn("bind_rows", "dplyr", error_on_fail = FALSE)
  where <- import_fn("where", "dplyr", error_on_fail = FALSE)
 } else {
  `%>%` <- `%pm>%`
+  across <- pm_across
  anti_join <- pm_anti_join
  arrange <- pm_arrange
+  bind_rows <- pm_bind_rows
  count <- pm_count
  desc <- pm_desc
  distinct <- pm_distinct
@@ -1470,62 +1472,22 @@ if (pkg_is_available("dplyr", also_load = FALSE)) {
  inner_join <- pm_inner_join
  lag <- pm_lag
  left_join <- pm_left_join
+  mutate <- pm_mutate
  n_distinct <- pm_n_distinct
  pull <- pm_pull
  rename <- pm_rename
  right_join <- pm_right_join
-  row_number <- pm_row_number
  select <- pm_select
  semi_join <- pm_semi_join
  summarise <- pm_summarise
  ungroup <- pm_ungroup
-  mutate <- function(.data, ...) {
-    # pm_mutate is buggy, use this simple alternative
-    dots <- list(...)
-    for (i in seq_len(length(dots))) {
-      .data[, names(dots)[i]] <- dots[[i]]
-    }
-    .data
-  }
-  bind_rows <- function(..., fill = NA) {
-    # this AMAZING code is from ChatGPT when I asked for a base R dplyr::bind_rows alternative
-    dfs <- list(...)
-    all_cols <- unique(unlist(lapply(dfs, colnames)))
-    mat_list <- lapply(dfs, function(x) {
-      mat <- matrix(NA, nrow = nrow(x), ncol = length(all_cols))
-      colnames(mat) <- all_cols
-      mat[, colnames(x)] <- as.matrix(x)
-      mat
-    })
-    mat <- do.call(rbind, mat_list)
-    as.data.frame(mat, stringsAsFactors = FALSE)
-  }
-  where <- function(fn) {
-    # adapted from https://github.com/nathaneastwood/poorman/blob/52eb6947e0b4430cd588976ed8820013eddf955f/R/where.R#L17-L32
-    if (!is.function(fn)) {
-      stop_("`", deparse(substitute(fn)), "()` is not a valid predicate function.")
-    }
-    df <- pm_select_env$.data
-    cols <- pm_select_env$get_colnames()
-    if (is.null(df)) {
-      df <- get_current_data("where", call = FALSE)
-      cols <- colnames(df)
-    }
-    preds <- unlist(lapply(
-      df,
-      function(x, fn) {
-        do.call("fn", list(x))
-      },
-      fn
-    ))
-    if (!is.logical(preds)) stop_("`where()` must be used with functions that return `TRUE` or `FALSE`.")
-    data_cols <- cols
-    cols <- data_cols[preds]
-    which(data_cols %in% cols)
-  }
-  
+  where <- pm_where
+}
+if (pkg_is_available("tidyr", min_version = "1.0.0", also_load = FALSE)) {
+  pivot_longer <- import_fn("pivot_longer", "tidyr", error_on_fail = FALSE)
+} else {
+  pivot_longer <- pm_pivot_longer
 }
-

 # Faster data.table implementations ----

--- a/R/ab_selectors.R
+++ b/R/ab_selectors.R
@@ -49,7 +49,6 @@
 #' @return (internally) a [character] vector of column names, with additional class `"ab_selector"`
 #' @export
 #' @inheritSection AMR Reference Data Publicly Available
-
 #' @examples
 #' # `example_isolates` is a data set available in the AMR package.
 #' # See ?example_isolates.
--- a/R/bug_drug_combinations.R
+++ b/R/bug_drug_combinations.R
@@ -85,10 +85,7 @@ bug_drug_combinations <- function(x,
  }
  
  # use dplyr and tidyr if they are available, they are much faster!
-  if (pkg_is_available("dplyr", min_version = "1.0.0", also_load = FALSE) &&
-      pkg_is_available("tidyr", min_version = "1.0.0", also_load = FALSE)) {
-    across <- import_fn("across", "dplyr")
-    pivot_longer <- import_fn("pivot_longer", "tidyr")
+  if (identical(pivot_longer, import_fn("pivot_longer", "tidyr", error_on_fail = FALSE))) {
    out <- x %>%
      ungroup() %>% 
      mutate(mo = FUN(ungroup(x)[, col_mo, drop = TRUE], ...)) %>%
--- a/R/eucast_rules.R
+++ b/R/eucast_rules.R
@@ -926,7 +926,7 @@ eucast_rules <- function(x,
  # Print overview ----------------------------------------------------------
  if (isTRUE(info) || isTRUE(verbose)) {
    verbose_info <- x.bak %>%
-      mutate(row = row_number()) %>%
+      mutate(row = seq_len(NROW(x.bak))) %>%
      select(`.rowid`, row) %>%
      right_join(verbose_info,
        by = c(".rowid" = "rowid")
--- a/data-raw/antibiograms.Rmd
+++ b/data-raw/antibiograms.Rmd
@@ -0,0 +1,62 @@
+---
+title: "Generating antibiograms with the AMR package"
+author: "AMR package developers"
+date: "`r Sys.Date()`"
+output: html_document
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE, message = FALSE)
+library(AMR)
+```
+
+This is an example R Markdown file to show the use of `antibiogram()` of the AMR package.
+
+For starters, this is what our `example_isolates` data set looks like:
+
+```{r}
+example_isolates
+```
+
+
+### Traditional Antibiogram
+
+```{r trad}
+print(
+  antibiogram(example_isolates,
+              antibiotics = c(aminoglycosides(), carbapenems()))
+)
+```
+
+### Combined Antibiogram
+
+```{r comb}
+print(
+  antibiogram(example_isolates,
+              antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"))
+)
+```
+
+### Syndromic Antibiogram
+
+```{r synd}
+print(
+  antibiogram(example_isolates,
+              antibiotics = c(aminoglycosides(), carbapenems()),
+              syndromic_group = "ward")
+)
+```
+
+### Weighted-Incidence Syndromic Combination Antibiogram (WISCA)
+
+```{r wisca}
+print(
+  antibiogram(example_isolates,
+              antibiotics = c("AMC", "AMC+CIP", "TZP", "TZP+TOB"),
+              mo_transform = "gramstain",
+              minimum = 10, # this should be >= 30, but now just as example
+              syndromic_group = ifelse(example_isolates$age >= 65 &
+                                         example_isolates$gender == "M",
+                                       "WISCA Group 1", "WISCA Group 2"))
+)
+```
--- a/data-raw/antibiograms.html
+++ b/data-raw/antibiograms.html
--- a/data-raw/poorman_prepend.R
+++ b/data-raw/poorman_prepend.R
@@ -32,11 +32,11 @@
 # Source file: data-raw/reproduction_of_poorman.R
 # ------------------------------------------------

-# poorman: a package to replace all dplyr functions with base R so we can lose dependency on dplyr.
+# {poorman}: a package to replace all dplyr functions with base R so we can lose dependency on {dplyr}.
 # These functions were downloaded from https://github.com/nathaneastwood/poorman,
 # from this commit: https://github.com/nathaneastwood/poorman/tree/{commit}.
 #
-# All functions are prefixed with 'pm_' to make it obvious that they are dplyr substitutes.
+# All functions are prefixed with 'pm_' to make it obvious that they are {dplyr} substitutes.
 #
 # All code below was released under MIT license, that permits 'free of charge, to any person obtaining a
 # copy of the software and associated documentation files (the "Software"), to deal in the Software
--- a/data-raw/reproduction_of_poorman.R
+++ b/data-raw/reproduction_of_poorman.R
@@ -1,22 +1,28 @@
 # get complete filenames of all R files in the GitHub repository of nathaneastwood/poorman

-commit <- "52eb6947e0b4430cd588976ed8820013eddf955f"
+library(magrittr)
+commit <- "3cc0a9920b1eb559dd166f548561244189586b3a"

 files <- xml2::read_html(paste0("https://github.com/nathaneastwood/poorman/tree/", commit, "/R")) %>%
  rvest::html_nodes("a") %>%
  rvest::html_attr("href")
+files <- files[files %like% "/blob/.*R$"]

 # get full URLs of all raw R files
 files <- sort(paste0("https://raw.githubusercontent.com", gsub("blob/", "", files[files %like% "/R/.*.R$"])))
 # remove files with only pkg specific code
 files <- files[files %unlike% "(zzz|init)[.]R$"]
 # also, there's a lot of functions we don't use
-files <- files[files %unlike% "(slice|glimpse|recode|replace_na|coalesce)[.]R$"]
+files <- files[files %unlike% "/(between|coalesce|cumulative|fill|glimpse|gluestick|group_cols|na_if|near|nest_by|poorman-package|print|recode|reconstruct|replace_na|replace_with|rownames|slice|union_all|unite|window_rank|with_groups)[.]R$"]

 # add our prepend file, containing info about the source of the data
-intro <- readLines("data-raw/poorman_prepend.R")
+intro <- readLines("data-raw/poorman_prepend.R") %>% 
+  # add commit to intro part
+  gsub("{commit}", commit, ., fixed = TRUE) %>%
+  # add date to intro part
+  gsub("{date}", trimws(format(Sys.Date(), "%e %B %Y")), ., fixed = TRUE)
 # copyright info:
-copyright <- paste0("# ", readLines("https://raw.githubusercontent.com/nathaneastwood/poorman/master/LICENSE"))
+copyright <- paste0("# ", readLines(paste0("https://raw.githubusercontent.com/nathaneastwood/poorman/", commit, "/LICENSE")))

 # read all contents to a character vector
 contents <- character(0)
@@ -25,31 +31,35 @@ sapply(files, function(file) {
  contents <<- c(contents, readLines(file))
  invisible()
 })
-contents <- c(
-  intro,
-  copyright,
-  "",
-  contents
-)

 # remove lines starting with "#'" and NULL and write to file
 contents <- contents[!grepl("^(#'|NULL|\"_PACKAGE)", contents)]
+contents.bak <- contents

+# grouped attributes same as dplyr
+contents <- gsub("grouped_data", "grouped_df", contents, fixed = TRUE)
 # now make it independent on UseMethod, since we will not export these functions
-contents <- gsub('UseMethod[(]"(.*?)"[)]',
-  'if ("grouped_data" %in% class(.data)) {|||    \\1.grouped_data(.data, ...)|||  } else {|||    \\1.default(.data, ...)|||  }',
-  paste(contents, collapse = "|||"),
-  perl = TRUE
-) %>%
-  # add commit to intro part
-  gsub("{commit}", commit, ., fixed = TRUE) %>%
-  # add date to intro part
-  gsub("{date}", format(Sys.Date(), "%e %B %Y"), ., fixed = TRUE) %>%
-  strsplit(split = "|||", fixed = TRUE) %>%
-  unlist() %>%
-  # add "pm_" as prefix to all functions
-  gsub("^([a-z_.]+) <- function", "pm_\\1 <- function", .)
-
+has_usemethods <- gsub("^([a-z_]+).*", "\\1", contents[which(contents %like% "usemethod") - 1])
+for (use in has_usemethods) {
+  relevant_row <- which(contents %like% paste0("^", use, " <- function")) + 1
+  function_call <- trimws(gsub(".*function(.*)\\{.*", "\\1", contents[relevant_row - 1]))
+  function_call1 <- trimws(gsub("[()]", "", strsplit(function_call, ",")[[1]][1]))
+  if (any(contents %like% paste0(use, ".grouped_df"))) {
+    # this function will have methods for data.frame and grouped_df
+    contents[relevant_row] <- paste0("  if (\"grouped_df\" %in% class(", function_call1, ")) ", use, ".grouped_df", function_call, " else ", use, ".data.frame", function_call)
+  } else {
+    # this function will only have data.frame as method
+    contents[relevant_row] <- paste0("  ", use, ".data.frame", function_call)
+  }
+  # add pm_ prefix
+  contents[relevant_row - 1] <- paste0("pm_", contents[relevant_row - 1])
+  
+}
+# correct for NextMethod
+contents <- gsub("NextMethod\\(\"(.*)\"\\)", "\\1.data.frame(...)", contents)
+# correct for 'default' method
+contents <- gsub(".default <-", ".data.frame <-", contents, fixed = TRUE)
+contents <- gsub("pm_group_by_drop.data.frame", "pm_group_by_drop", contents, fixed = TRUE)
 # now get all those pm_* functions to replace all untransformed function name calls as well
 new_pm_names <- sort(gsub("pm_(.*?) <-.*", "\\1", contents[grepl("^pm_", contents)]))
 for (i in seq_len(length(new_pm_names))) {
@@ -57,29 +67,39 @@ for (i in seq_len(length(new_pm_names))) {
  # starting with a space or a straight bracket or an opening parenthesis, ending with nothing or a non-character or a closing parenthesis
  contents <- gsub(paste0("( |\\[|\\()", new_pm_names[i], "($|[^a-z]|\\))"), paste0("\\1pm_", new_pm_names[i], "\\2"), contents)
 }
-
 # replace %>% with %pm>%
+contents[which(contents %like% "^\\|\\|") - 1] <- paste0(contents[which(contents %like% "^\\|\\|") - 1], " ||")
+contents[which(contents %like% "^\\|\\|")] <- gsub("^\\|\\|", "", contents[which(contents %like% "^\\|\\|")])
 contents <- gsub("%>%", "%pm>%", contents, fixed = TRUE)
 # fix for new lines, since n() also existed
 contents <- gsub("\\pm_n", "\\n", contents, fixed = TRUE)
 # prefix other functions also with "pm_"
 contents <- gsub("^([a-z_]+)(\\$|)", "pm_\\1\\2", contents)
-# prefix environments
-contents <- gsub("eval_env", "pm_eval_env", contents, fixed = TRUE)
-contents <- gsub("select_env", "pm_select_env", contents, fixed = TRUE)
-contents <- gsub("context", "pm_context", contents, fixed = TRUE)
+# prefix environmental objects and functions
+contents <- gsub("(eval_env|select_env|select_context|context|dotdotdot|as_symbols|insert_dot|deparse_|groups_set|apply_grouped_function|split_into_groups|calculate_groups|has_groups|eval_select_pos|select_positions|eval_expr|eval_call|add_group_columns|find_used|is_nested|setup_|select_|group_)", "pm_\\1", contents)
 # now some items are overprefixed
 contents <- gsub("(pm_)+", "pm_", contents)
-# special case for pm_distinct(), we need '.keep_all' to work
-contents <- gsub("pm_distinct <- function(.data, ..., .keep_all = FALSE)", "pm_distinct <- function(.data, ...)", contents, fixed = TRUE)
-# pm_pull does not correct for tibbles, misses the drop argument
-contents[contents == ".data[, var]"] <- ".data[, var, drop = TRUE]"
+contents <- gsub("pm_if (\"grouped_df", "if (\"grouped_df", contents, fixed = TRUE)
+# remove comments and empty lines
+contents <- gsub("#.*", "", contents)
+contents <- contents[trimws(contents) != ""]
+# fix for their relocate()
+contents <- gsub("if (!missing(.before))", "if (!missing(.before) && !is.null(.before))", contents, fixed = TRUE)
+contents <- gsub("if (!missing(.after))", "if (!missing(.after) && !is.null(.after))", contents, fixed = TRUE)
+contents[which(contents %like% "reshape\\($") + 1] <- gsub("data", "as.data.frame(data, stringsAsFactors = FALSE)", contents[which(contents %like% "reshape\\($") + 1])
+contents <- gsub('pm_relocate(.data = long, values_to, .after = -1)', 'pm_relocate(.data = long, "value", .after = -1)', contents, fixed = TRUE)

 # who needs US spelling?
-contents <- contents[!grepl("summarize", contents)]
+contents <- contents[contents %unlike% "summarize"]
+
+# add intro
+contents <- c(
+  intro,
+  copyright,
+  "",
+  contents
+)

 writeLines(contents, "R/aa_helper_pm_functions.R")

-# after this, comment out:
-# pm_left_join() since we use a faster version
-# pm_group_split() since we don't use it and it relies on R 3.5.0 for the use of ...length(), which is hard to support without C++ code
+# note: pm_left_join() will be overwritten by aaa_helper_functions.R, which contains a faster implementation
--- a/inst/tinytest/test-zzz.R
+++ b/inst/tinytest/test-zzz.R
@@ -68,7 +68,6 @@ import_functions <- c(
  "read_html" = "xml2",
  "rename" = "dplyr",
  "right_join" = "dplyr",
-  "row_number" = "dplyr",
  "select" = "dplyr",
  "semi_join" = "dplyr",
  "showQuestion" = "rstudioapi",
--- a/tests/tinytest.R
+++ b/tests/tinytest.R
@@ -40,11 +40,9 @@ if (tryCatch(isTRUE(AMR:::import_fn("isJob", "rstudioapi")()), error = function(
  if (AMR:::pkg_is_available("tinytest", also_load = TRUE)) {
    library(AMR)
    if (identical(AMR:::import_fn("select", "dplyr"), AMR:::select)) {
-      print("This test will rely on {dplyr} verbs")
-      message("This test will rely on {dplyr} verbs")
+      message("\n\n------------------------------------\nThis test will rely on {dplyr} verbs\n------------------------------------\n\n")
    } else {
-      print("This test will rely on {poorman} verbs")
-      message("This test will rely on {poorman} verbs")
+      message("\n\n---------------------------------------------------------------------\nThis test will rely on {poorman} verbs (installed state dplyr: ", AMR:::pkg_is_available("dplyr", also_load = FALSE), ")\n---------------------------------------------------------------------\n\n")
    }
    # set language
    set_AMR_locale("English")