From 1a0dc4bf46229ac1339122936ee003dfa649f43c Mon Sep 17 00:00:00 2001 From: "Matthijs S. Berends" Date: Thu, 9 Feb 2023 13:07:39 +0100 Subject: [PATCH] revert back to pre-antibiogram --- DESCRIPTION | 4 +- NAMESPACE | 4 - NEWS.md | 4 +- R/aa_globals.R | 14 +- ...lper_functions.R => aa_helper_functions.R} | 255 +-- R/aa_helper_pm_functions.R | 1707 ++++++----------- R/ab_property.R | 9 +- R/ab_selectors.R | 14 +- R/{antibiogram.R => antibiogram.R.bak} | 0 R/atc_online.R | 16 +- R/av_property.R | 2 +- R/bug_drug_combinations.R | 98 +- R/data.R | 30 +- R/disk.R | 6 +- R/eucast_rules.R | 90 +- R/first_isolate.R | 61 +- R/get_episode.R | 10 +- R/ggplot_sir.R | 16 +- R/join_microorganisms.R | 11 +- R/mean_amr_distance.R | 2 +- R/mic.R | 10 +- R/mo.R | 22 +- R/mo_property.R | 12 +- R/pca.R | 2 +- R/plot.R | 4 +- R/proportion.R | 10 +- R/random.R | 12 +- R/resistance_predict.R | 6 +- R/sir.R | 54 +- R/sir_calc.R | 24 +- R/translate.R | 5 +- R/zzz.R | 3 +- inst/tinytest/test-antibiogram.R | 132 -- inst/tinytest/test-first_isolate.R | 2 +- inst/tinytest/test-zzz.R | 26 +- man/ab_property.Rd | 3 +- man/antibiogram.Rd | 234 --- man/antibiotic_class_selectors.Rd | 2 +- man/as.sir.Rd | 4 +- man/bug_drug_combinations.Rd | 9 +- man/clinical_breakpoints.Rd | 2 +- man/eucast_rules.Rd | 2 +- man/example_isolates.Rd | 6 +- man/example_isolates_unclean.Rd | 4 +- man/first_isolate.Rd | 11 +- man/intrinsic_resistant.Rd | 2 +- man/key_antimicrobials.Rd | 2 +- man/mdro.Rd | 2 +- man/microorganisms.Rd | 4 +- man/microorganisms.codes.Rd | 4 +- man/mo_property.Rd | 2 +- man/proportion.Rd | 5 +- tests/tinytest.R | 5 - 53 files changed, 984 insertions(+), 1996 deletions(-) rename R/{aaa_helper_functions.R => aa_helper_functions.R} (93%) mode change 100644 => 100755 R/aa_helper_pm_functions.R rename R/{antibiogram.R => antibiogram.R.bak} (100%) delete mode 100644 inst/tinytest/test-antibiogram.R delete mode 100644 man/antibiogram.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 3fad7e07..f2d6a34c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 1.8.2.9111 -Date: 2023-02-08 +Version: 1.8.2.9112 +Date: 2023-02-09 Title: Antimicrobial Resistance Data Analysis Description: Functions to simplify and standardise antimicrobial resistance (AMR) data analysis and to work with microbial and antimicrobial properties by diff --git a/NAMESPACE b/NAMESPACE index 361eeb3a..7e9bfd51 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -73,7 +73,6 @@ S3method(asin,mic) S3method(asinh,mic) S3method(atan,mic) S3method(atanh,mic) -S3method(barplot,antibiogram) S3method(barplot,disk) S3method(barplot,mic) S3method(barplot,rsi) @@ -124,14 +123,12 @@ S3method(mean_amr_distance,mic) S3method(mean_amr_distance,sir) S3method(median,mic) S3method(min,mic) -S3method(plot,antibiogram) S3method(plot,disk) S3method(plot,mic) S3method(plot,resistance_predict) S3method(plot,rsi) S3method(plot,sir) S3method(print,ab) -S3method(print,antibiogram) S3method(print,av) S3method(print,bug_drug_combinations) S3method(print,custom_eucast_rules) @@ -219,7 +216,6 @@ export(aminoglycosides) export(aminopenicillins) export(amr_distance_from_row) export(anti_join_microorganisms) -export(antibiogram) export(antifungals) export(antimicrobials_equal) export(antimycobacterials) diff --git a/NEWS.md b/NEWS.md index 72b803d9..98b87d22 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# AMR 1.8.2.9111 +# AMR 1.8.2.9112 *(this beta version will eventually become v2.0! We're happy to reach a new major milestone soon!)* @@ -28,8 +28,6 @@ The 'RSI functions' will be removed in a future version, but not before late 202 ### New antibiogram function -Klinker *et al.* (2021, DOI [10.1177/20499361211011373](https://doi.org/10.1177/20499361211011373)) and Barbieri *et al.* (2021, DOI [10.1186/s13756-021-00939-2](https://doi.org/10.1186/s13756-021-00939-2)). - With the new `antibiogram()` function, users can now generate traditional, combined, syndromic, and even weighted-incidence syndromic combination antibiograms (WISCA). With this, we follow the logic in the previously described work of Klinker *et al.* (2021, DOI [10.1177/20499361211011373](https://doi.org/10.1177/20499361211011373)) and Barbieri *et al.* (2021, DOI [10.1186/s13756-021-00939-2](https://doi.org/10.1186/s13756-021-00939-2)). The help page for `antibiogram()` extensively elaborates on use cases, and `antibiogram()` also supports printing in R Markdown and Quarto, with support for 16 languages. diff --git a/R/aa_globals.R b/R/aa_globals.R index ee47c959..e87f094f 100755 --- a/R/aa_globals.R +++ b/R/aa_globals.R @@ -94,9 +94,7 @@ TAXONOMY_VERSION <- list( ) globalVariables(c( - ".mo", ".rowid", - ".syndromic_group", "ab", "ab_txt", "affect_ab_name", @@ -107,9 +105,8 @@ globalVariables(c( "atc_group1", "atc_group2", "base_ab", - "ci_max", "ci_min", - "clinical_breakpoints", + "ci_max", "code", "cols", "count", @@ -133,15 +130,14 @@ globalVariables(c( "language", "lookup", "method", - "mic ", "mic", + "mic ", "microorganism", "microorganisms", "microorganisms.codes", "mo", "name", "new", - "numerator", "observations", "old", "old_name", @@ -153,15 +149,13 @@ globalVariables(c( "reference.rule_group", "reference.version", "rowid", + "sir", + "clinical_breakpoints", "rule_group", "rule_name", - "S", "se_max", "se_min", - "SI", - "sir", "species", - "syndromic_group", "total", "txt", "type", diff --git a/R/aaa_helper_functions.R b/R/aa_helper_functions.R similarity index 93% rename from R/aaa_helper_functions.R rename to R/aa_helper_functions.R index 9f1eeb5a..42c137e1 100755 --- a/R/aaa_helper_functions.R +++ b/R/aa_helper_functions.R @@ -63,6 +63,99 @@ pm_left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { merged } +# support where() like tidyverse: +# adapted from https://github.com/nathaneastwood/poorman/blob/52eb6947e0b4430cd588976ed8820013eddf955f/R/where.R#L17-L32 +where <- function(fn) { + if (!is.function(fn)) { + stop(pm_deparse_var(fn), " is not a valid predicate function.") + } + preds <- unlist(lapply( + pm_select_env$.data, + function(x, fn) { + do.call("fn", list(x)) + }, + fn + )) + if (!is.logical(preds)) stop("`where()` must be used with functions that return `TRUE` or `FALSE`.") + data_cols <- pm_select_env$get_colnames() + cols <- data_cols[preds] + which(data_cols %in% cols) +} + +# copied and slightly rewritten from poorman under same license (2021-10-15) +quick_case_when <- function(...) { + fs <- list(...) + lapply(fs, function(x) { + if (!inherits(x, "formula")) { + stop("`case_when()` requires formula inputs.") + } + }) + n <- length(fs) + if (n == 0L) { + stop("No cases provided.") + } + + validate_case_when_length <- function(query, value, fs) { + lhs_lengths <- lengths(query) + rhs_lengths <- lengths(value) + all_lengths <- unique(c(lhs_lengths, rhs_lengths)) + if (length(all_lengths) <= 1L) { + return(all_lengths[[1L]]) + } + non_atomic_lengths <- all_lengths[all_lengths != 1L] + len <- non_atomic_lengths[[1L]] + if (length(non_atomic_lengths) == 1L) { + return(len) + } + inconsistent_lengths <- non_atomic_lengths[-1L] + lhs_problems <- lhs_lengths %in% inconsistent_lengths + rhs_problems <- rhs_lengths %in% inconsistent_lengths + problems <- lhs_problems | rhs_problems + if (any(problems)) { + stop("The following formulas must be length ", len, " or 1, not ", + paste(inconsistent_lengths, collapse = ", "), ".\n ", + paste(fs[problems], collapse = "\n "), + call. = FALSE + ) + } + } + + replace_with <- function(x, i, val, arg_name) { + if (is.null(val)) { + return(x) + } + i[is.na(i)] <- FALSE + if (length(val) == 1L) { + x[i] <- val + } else { + x[i] <- val[i] + } + x + } + + query <- vector("list", n) + value <- vector("list", n) + default_env <- parent.frame() + for (i in seq_len(n)) { + query[[i]] <- eval(fs[[i]][[2]], envir = default_env) + value[[i]] <- eval(fs[[i]][[3]], envir = default_env) + if (!is.logical(query[[i]])) { + stop(fs[[i]][[2]], " does not return a `logical` vector.") + } + } + m <- validate_case_when_length(query, value, fs) + out <- value[[1]][rep(NA_integer_, m)] + replaced <- rep(FALSE, m) + for (i in seq_len(n)) { + out <- replace_with( + out, query[[i]] & !replaced, value[[i]], + NULL + ) + replaced <- replaced | (query[[i]] & !is.na(query[[i]])) + } + out +} + # No export, no Rd addin_insert_in <- function() { import_fn("insertText", "rstudioapi")(" %in% ") @@ -259,7 +352,7 @@ is_valid_regex <- function(x) { } stop_ifnot_installed <- function(package) { - installed <- vapply(FUN.VALUE = logical(1), package, requireNamespace, lib.loc = base::.libPaths(), quietly = TRUE) + installed <- vapply(FUN.VALUE = logical(1), package, requireNamespace, quietly = TRUE) if (any(!installed) && any(package == "rstudioapi")) { stop("This function only works in RStudio when using R >= 3.2.", call. = FALSE) } else if (any(!installed)) { @@ -276,7 +369,7 @@ pkg_is_available <- function(pkg, also_load = TRUE, min_version = NULL) { if (also_load == TRUE) { out <- suppressWarnings(require(pkg, character.only = TRUE, warn.conflicts = FALSE)) } else { - out <- requireNamespace(pkg, lib.loc = base::.libPaths(), quietly = TRUE) + out <- requireNamespace(pkg, quietly = TRUE) } if (!is.null(min_version)) { out <- out && utils::packageVersion(pkg) >= min_version @@ -293,7 +386,7 @@ import_fn <- function(name, pkg, error_on_fail = TRUE) { getExportedValue(name = name, ns = asNamespace(pkg)), error = function(e) { if (isTRUE(error_on_fail)) { - stop_("function `", name, "()` is not an exported object from package '", pkg, + stop_("function ", name, "() is not an exported object from package '", pkg, "'. Please create an issue at ", font_url("https://github.com/msberends/AMR/issues"), ". Many thanks!", call = FALSE ) @@ -1179,7 +1272,7 @@ create_pillar_column <- function(x, ...) { new_pillar_shaft_simple(x, ...) } -as_original_data_class <- function(df, old_class = NULL, extra_class = NULL) { +as_original_data_class <- function(df, old_class = NULL) { if ("tbl_df" %in% old_class && pkg_is_available("tibble", also_load = FALSE)) { # this will then also remove groups fn <- import_fn("as_tibble", "tibble") @@ -1192,11 +1285,7 @@ as_original_data_class <- function(df, old_class = NULL, extra_class = NULL) { } else { fn <- function(x) base::as.data.frame(df, stringsAsFactors = FALSE) } - out <- fn(df) - if (!is.null(extra_class)) { - class(out) <- c(extra_class, class(out)) - } - out + fn(df) } # works exactly like round(), but rounds `round2(44.55, 1)` to 44.6 instead of 44.5 @@ -1336,7 +1425,7 @@ add_MO_lookup_to_AMR_env <- function() { } trimws2 <- function(..., whitespace = "[\u0009\u000A\u000B\u000C\u000D\u0020\u0085\u00A0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u200C\u200D\u2028\u2029\u202F\u205F\u2060\u3000\uFEFF]") { - # this is even faster than trimws() itself which sets "[ \t\r\n]". + # this is even faster than trimws() itself which sets " \t\n\r". trimws(..., whitespace = whitespace) } @@ -1347,154 +1436,12 @@ readRDS2 <- function(file, refhook = NULL) { readRDS(con, refhook = refhook) } - -# dplyr implementations ---- - -# copied from https://github.com/nathaneastwood/poorman under same license (2021-10-15) -case_when <- function(...) { - fs <- list(...) - lapply(fs, function(x) { - if (!inherits(x, "formula")) { - stop("`case_when()` requires formula inputs.") - } - }) - n <- length(fs) - if (n == 0L) { - stop("No cases provided.") - } - - validate_case_when_length <- function(query, value, fs) { - lhs_lengths <- lengths(query) - rhs_lengths <- lengths(value) - all_lengths <- unique(c(lhs_lengths, rhs_lengths)) - if (length(all_lengths) <= 1L) { - return(all_lengths[[1L]]) - } - non_atomic_lengths <- all_lengths[all_lengths != 1L] - len <- non_atomic_lengths[[1L]] - if (length(non_atomic_lengths) == 1L) { - return(len) - } - inconsistent_lengths <- non_atomic_lengths[-1L] - lhs_problems <- lhs_lengths %in% inconsistent_lengths - rhs_problems <- rhs_lengths %in% inconsistent_lengths - problems <- lhs_problems | rhs_problems - if (any(problems)) { - stop("The following formulas must be length ", len, " or 1, not ", - paste(inconsistent_lengths, collapse = ", "), ".\n ", - paste(fs[problems], collapse = "\n "), - call. = FALSE - ) - } - } - - replace_with <- function(x, i, val, arg_name) { - if (is.null(val)) { - return(x) - } - i[is.na(i)] <- FALSE - if (length(val) == 1L) { - x[i] <- val - } else { - x[i] <- val[i] - } - x - } - - query <- vector("list", n) - value <- vector("list", n) - default_env <- parent.frame() - for (i in seq_len(n)) { - query[[i]] <- eval(fs[[i]][[2]], envir = default_env) - value[[i]] <- eval(fs[[i]][[3]], envir = default_env) - if (!is.logical(query[[i]])) { - stop(fs[[i]][[2]], " does not return a `logical` vector.") - } - } - m <- validate_case_when_length(query, value, fs) - out <- value[[1]][rep(NA_integer_, m)] - replaced <- rep(FALSE, m) - for (i in seq_len(n)) { - out <- replace_with( - out, query[[i]] & !replaced, value[[i]], - NULL - ) - replaced <- replaced | (query[[i]] & !is.na(query[[i]])) - } - out -} - - -# dplyr/tidyr implementations ---- - -# take {dplyr} and {tidyr} functions if available, and the slower {poorman} functions otherwise -if (pkg_is_available("dplyr", min_version = "1.0.0", also_load = FALSE)) { - `%>%` <- import_fn("%>%", "dplyr", error_on_fail = FALSE) - across <- import_fn("across", "dplyr", error_on_fail = FALSE) - anti_join <- import_fn("anti_join", "dplyr", error_on_fail = FALSE) - arrange <- import_fn("arrange", "dplyr", error_on_fail = FALSE) - bind_rows <- import_fn("bind_rows", "dplyr", error_on_fail = FALSE) - count <- import_fn("count", "dplyr", error_on_fail = FALSE) - desc <- import_fn("desc", "dplyr", error_on_fail = FALSE) - distinct <- import_fn("distinct", "dplyr", error_on_fail = FALSE) - everything <- import_fn("everything", "dplyr", error_on_fail = FALSE) - filter <- import_fn("filter", "dplyr", error_on_fail = FALSE) - full_join <- import_fn("full_join", "dplyr", error_on_fail = FALSE) - group_by <- import_fn("group_by", "dplyr", error_on_fail = FALSE) - group_vars <- import_fn("group_vars", "dplyr", error_on_fail = FALSE) - inner_join <- import_fn("inner_join", "dplyr", error_on_fail = FALSE) - lag <- import_fn("lag", "dplyr", error_on_fail = FALSE) - left_join <- import_fn("left_join", "dplyr", error_on_fail = FALSE) - mutate <- import_fn("mutate", "dplyr", error_on_fail = FALSE) - n_distinct <- import_fn("n_distinct", "dplyr", error_on_fail = FALSE) - pull <- import_fn("pull", "dplyr", error_on_fail = FALSE) - rename <- import_fn("rename", "dplyr", error_on_fail = FALSE) - right_join <- import_fn("right_join", "dplyr", error_on_fail = FALSE) - select <- import_fn("select", "dplyr", error_on_fail = FALSE) - semi_join <- import_fn("semi_join", "dplyr", error_on_fail = FALSE) - summarise <- import_fn("summarise", "dplyr", error_on_fail = FALSE) - ungroup <- import_fn("ungroup", "dplyr", error_on_fail = FALSE) - where <- import_fn("where", "dplyr", error_on_fail = FALSE) -} else { - `%>%` <- `%pm>%` - across <- pm_across - anti_join <- pm_anti_join - arrange <- pm_arrange - bind_rows <- pm_bind_rows - count <- pm_count - desc <- pm_desc - distinct <- pm_distinct - everything <- pm_everything - filter <- pm_filter - full_join <- pm_full_join - group_by <- pm_group_by - group_vars <- pm_group_vars - inner_join <- pm_inner_join - lag <- pm_lag - left_join <- pm_left_join - mutate <- pm_mutate - n_distinct <- pm_n_distinct - pull <- pm_pull - rename <- pm_rename - right_join <- pm_right_join - select <- pm_select - semi_join <- pm_semi_join - summarise <- pm_summarise - ungroup <- pm_ungroup - where <- pm_where -} -if (pkg_is_available("tidyr", min_version = "1.0.0", also_load = FALSE)) { - pivot_longer <- import_fn("pivot_longer", "tidyr", error_on_fail = FALSE) -} else { - pivot_longer <- pm_pivot_longer -} - # Faster data.table implementations ---- match <- function(x, table, ...) { chmatch <- import_fn("chmatch", "data.table", error_on_fail = FALSE) if (!is.null(chmatch) && is.character(x) && is.character(table)) { - # data.table::chmatch() is much faster than base::match() for character + # data.table::chmatch() is 35% faster than base::match() for character chmatch(x, table, ...) } else { base::match(x, table, ...) @@ -1503,7 +1450,7 @@ match <- function(x, table, ...) { `%in%` <- function(x, table) { chin <- import_fn("%chin%", "data.table", error_on_fail = FALSE) if (!is.null(chin) && is.character(x) && is.character(table)) { - # data.table::`%chin%`() is much faster than base::`%in%`() for character + # data.table::`%chin%`() is 20-50% faster than base::`%in%`() for character chin(x, table) } else { base::`%in%`(x, table) diff --git a/R/aa_helper_pm_functions.R b/R/aa_helper_pm_functions.R old mode 100644 new mode 100755 index e4b5f3c5..2ad3ea12 --- a/R/aa_helper_pm_functions.R +++ b/R/aa_helper_pm_functions.R @@ -32,11 +32,11 @@ # Source file: data-raw/reproduction_of_poorman.R # ------------------------------------------------ -# {poorman}: a package to replace all dplyr functions with base R so we can lose dependency on {dplyr}. +# poorman: a package to replace all dplyr functions with base R so we can lose dependency on dplyr. # These functions were downloaded from https://github.com/nathaneastwood/poorman, -# from this commit: https://github.com/nathaneastwood/poorman/tree/3cc0a9920b1eb559dd166f548561244189586b3a. +# from this commit: https://github.com/nathaneastwood/poorman/tree/52eb6947e0b4430cd588976ed8820013eddf955f. # -# All functions are prefixed with 'pm_' to make it obvious that they are {dplyr} substitutes. +# All functions are prefixed with 'pm_' to make it obvious that they are dplyr substitutes. # # All code below was released under MIT license, that permits 'free of charge, to any person obtaining a # copy of the software and associated documentation files (the "Software"), to deal in the Software @@ -44,306 +44,108 @@ # distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software # is furnished to do so', given that a copyright notice is given in the software. # -# Copyright notice on 8 February 2023, the day this code was downloaded, as found on -# https://github.com/nathaneastwood/poorman/blob/3cc0a9920b1eb559dd166f548561244189586b3a/LICENSE: +# Copyright notice on 19 September 2020, the day this code was downloaded, as found on +# https://github.com/nathaneastwood/poorman/blob/52eb6947e0b4430cd588976ed8820013eddf955f/LICENSE: # YEAR: 2020 # COPYRIGHT HOLDER: Nathan Eastwood -pm_across <- function(.cols = everything(), .fns = NULL, ..., .names = NULL) { - setup <- pm_setup_across(substitute(.cols), .fns, .names) - if (length(setup$names) == 1 && grepl("\\{\\.col\\}|\\{\\.fn\\}", setup$names)) { - ref <- setup$names - id <- 1 - fn_names <- unique(names(setup$funs)) - for (i in seq_along(setup$cols)) { - .col <- setup$cols[i] - for (j in seq_along(fn_names)) { - .fn <- fn_names[j] - setup$names[id] <- pm_gluestick(ref) - id <- id + 1 - } - } - } - cols <- setup$cols - n_cols <- length(cols) - if (n_cols == 0L) return(data.frame()) - funs <- setup$funs - data <- pm_context$get_columns(cols) - names <- setup$names - if (is.null(funs)) { - data <- data.frame(data) - if (is.null(names)) { - return(data) - } else { - return(stats::setNames(data, names)) - } - } - n_fns <- length(funs) - res <- vector(mode = "list", length = n_fns * n_cols) - k <- 1L - for (i in seq_len(n_cols)) { - pm_context$cur_column <- cols[[i]] - col <- data[[i]] - for (j in seq_len(n_fns)) { - res[[k]] <- funs[[j]](col, ...) - k <- k + 1L - } - } - if (is.null(names(res))) names(res) <- names - as.data.frame(res) -} -pm_if_any <- function(.cols, .fns = NULL, ..., .names = NULL) { - df <- do.call(across, list(.cols = substitute(.cols), .fns = .fns, ..., .names = .names)) - if (nrow(df) == 0L) return(FALSE) - pm_check_if_types(df) - Reduce(`|`, df) -} -pm_if_all <- function(.cols, .fns = NULL, ..., .names = NULL) { - df <- do.call(across, list(.cols = substitute(.cols), .fns = .fns, ..., .names = .names)) - if (nrow(df) == 0L) return(FALSE) - pm_check_if_types(df) - Reduce(`&`, df) -} -pm_check_if_types <- function(df) { - types <- vapply(df, class, NA_character_) - not_logical <- types != "logical" - if (any(not_logical)) { - stop( - "Cannot convert the following columns to :\n ", - paste0(colnames(df)[not_logical], " <", types, "> ", collapse = "\n ") - ) - } -} -pm_setup_across <- function(.cols, .fns, .names) { - cols <- pm_eval_select_pos(.data = pm_context$.data, .cols, .pm_group_pos = FALSE) - cols <- pm_context$get_colnames()[cols] - if (pm_context$is_grouped()) cols <- setdiff(cols, pm_group_vars(pm_context$.data)) - funs <- if (is.null(.fns)) NULL else if (!is.list(.fns)) list(.fns) else .fns - if (is.null(funs)) return(list(cols = cols, funs = funs, names = .names)) - f_nms <- names(funs) - if (is.null(f_nms) && !is.null(.fns)) names(funs) <- seq_along(funs) - if (any(nchar(f_nms) == 0L)) { - miss <- which(nchar(f_nms) == 0L) - names(funs)[miss] <- miss - f_nms <- names(funs) - } - funs <- lapply(funs, pm_as_function) - names <- if (!is.null(.names)) { - .names - } else { - if (length(funs) == 1L && is.null(f_nms)) { - cols - } else { - nms <- do.call(paste, c(rev(expand.grid(names(funs), cols)), sep = "_")) - if (length(nms) == 0L) nms <- NULL - nms - } - } - list(cols = cols, funs = funs, names = names) -} pm_arrange <- function(.data, ...) { - pm_arrange.data.frame(.data, ...) + pm_check_is_dataframe(.data) + if ("grouped_data" %in% class(.data)) { + pm_arrange.grouped_data(.data, ...) + } else { + pm_arrange.default(.data, ...) + } } -pm_arrange.data.frame <- function(.data, ..., .by_group = FALSE) { - dots <- pm_dotdotdot(...) - is_grouped <- pm_has_groups(.data) - if (isTRUE(.by_group)) dots <- c(pm_groups(.data), dots) - rows <- pm_arrange_rows(.data = .data, dots) - row_number <- attr(.data, "row.names") - out <- .data[rows, , drop = FALSE] - if (is.numeric(row_number)) { - row.names(out) <- row_number - } - if (is_grouped) { - attr(out, "pm_groups") <- pm_calculate_groups(out, pm_group_vars(out)) - } - out + +pm_arrange.default <- function(.data, ...) { + pm_context$setup(.data) + on.exit(pm_context$clean(), add = TRUE) + rows <- eval(substitute(order(...)), envir = pm_context$.data) + .data[rows, , drop = FALSE] } -pm_arrange_rows <- function(.data, dots) { - if (length(dots) == 0L) return(seq_len(nrow(.data))) - for (i in seq_along(dots)) { - tmp <- deparse(dots[[i]]) - if (startsWith(tmp, "desc(")) { - tmp <- gsub("^desc\\(", "-", tmp) - tmp <- gsub("\\)$", "", tmp) - } - dots[[i]] <- parse(text = tmp, keep.source = FALSE)[[1]] - } - used <- unname(do.call(c, lapply(dots, pm_find_used))) - used <- used[used %in% colnames(.data)] - for (i in seq_along(dots)) { - if (is.character(.data[[used[[i]]]])) { - .data[[used[[i]]]] <- factor(.data[[used[[i]]]]) - } - if (is.factor(.data[[used[[i]]]]) && - (startsWith(deparse(dots[[i]]), "desc(") || - startsWith(deparse(dots[[i]]), "-"))) { - dots[[i]] <- bquote(-xtfrm(.(as.name(used[[i]])))) - } - } - data <- do.call(pm_transmute, c(list(.data = pm_ungroup(.data)), dots)) - do.call(order, c(data, list(decreasing = FALSE, na.last = TRUE))) + +pm_arrange.grouped_data <- function(.data, ...) { + pm_apply_grouped_function("pm_arrange", .data, drop = TRUE, ...) } -pm_bind_cols <- function(...) { - lsts <- list(...) - lsts <- pm_squash(lsts) - lsts <- Filter(Negate(is.null), lsts) - if (length(lsts) == 0L) return(data.frame()) - lapply(lsts, function(x) pm_is_df_or_vector(x)) - lsts <- do.call(cbind, lsts) - if (!is.data.frame(lsts)) lsts <- as.data.frame(lsts) - lsts -} -pm_bind_rows <- function(..., .id = NULL) { - lsts <- list(...) - lsts <- pm_flatten(lsts) - lsts <- Filter(Negate(is.null), lsts) - lapply(lsts, function(x) pm_is_df_or_vector(x)) - lapply(lsts, function(x) if (is.atomic(x) && !pm_is_named(x)) stop("Vectors must be named.")) - if (!missing(.id)) { - lsts <- lapply(seq_along(lsts), function(i) { - nms <- names(lsts) - id_df <- data.frame(id = if (is.null(nms)) as.character(i) else nms[i], stringsAsFactors = FALSE) - colnames(id_df) <- .id - cbind(id_df, lsts[[i]]) - }) - } - nms <- unique(unlist(lapply(lsts, names))) - lsts <- lapply( - lsts, - function(x) { - if (!is.data.frame(x)) x <- data.frame(as.list(x), stringsAsFactors = FALSE) - for (i in nms[!nms %in% names(x)]) x[[i]] <- NA - x - } - ) - names(lsts) <- NULL - do.call(rbind, lsts) -} -pm_case_when <- function(...) { - fs <- list(...) - lapply(fs, function(x) if (!inherits(x, "formula")) stop("`case_when()` requires formula inputs.")) - n <- length(fs) - if (n == 0L) stop("No cases provided.") - query <- vector("list", n) - value <- vector("list", n) - default_env <- parent.frame() - for (i in seq_len(n)) { - query[[i]] <- eval(fs[[i]][[2]], envir = default_env) - value[[i]] <- eval(fs[[i]][[3]], envir = default_env) - if (!is.logical(query[[i]])) stop(fs[[i]][[2]], " does not return a `logical` vector.") - } - m <- pm_validate_case_when_length(query, value, fs) - out <- value[[1]][rep(NA_integer_, m)] - replaced <- rep(FALSE, m) - for (i in seq_len(n)) { - out <- pm_replace_with(out, query[[i]] & !replaced, value[[i]], NULL) - replaced <- replaced | (query[[i]] & !is.na(query[[i]])) - } - out -} -pm_validate_case_when_length <- function(query, value, fs) { - lhs_lengths <- lengths(query) - rhs_lengths <- lengths(value) - all_lengths <- unique(c(lhs_lengths, rhs_lengths)) - if (length(all_lengths) <= 1L) return(all_lengths[[1L]]) - non_atomic_lengths <- all_lengths[all_lengths != 1L] - len <- non_atomic_lengths[[1L]] - if (length(non_atomic_lengths) == 1L) return(len) - inconsistent_lengths <- non_atomic_lengths[-1L] - lhs_problems <- lhs_lengths %in% inconsistent_lengths - rhs_problems <- rhs_lengths %in% inconsistent_lengths - problems <- lhs_problems | rhs_problems - if (any(problems)) { - stop( - "The following formulas must be length ", len, " or 1, not ", - paste(inconsistent_lengths, collapse = ", "), ".\n ", - paste(fs[problems], collapse = "\n ") - ) +pm_between <- function(x, left, right) { + if (!is.null(attr(x, "class")) && !inherits(x, c("Date", "POSIXct"))) { + warning("`pm_between()` called on numeric vector with S3 class") } + if (!is.double(x)) x <- as.numeric(x) + x >= as.numeric(left) & x <= as.numeric(right) } pm_context <- new.env() + +# Data pm_context$setup <- function(.data) pm_context$.data <- .data pm_context$get_data <- function() pm_context$.data -pm_context$get_columns <- function(cols) pm_context$.data[, cols, drop = FALSE] -pm_context$cur_column <- NULL pm_context$get_nrow <- function() nrow(pm_context$.data) pm_context$get_colnames <- function() colnames(pm_context$.data) -pm_context$is_grouped <- function() pm_has_groups(pm_context$.data) -pm_context$as_env <- function() { - if (any(pm_is_nested(pm_context$.data))) { - lapply(as.list(pm_context$.data), function(x) if (is.data.frame(x[[1]])) x[[1]] else x) - } else { - pm_context$.data - } -} -pm_context$pm_group_env <- NULL -pm_context$clean <- function() { - rm(list = c(".data"), envir = pm_context) - if (!is.null(pm_context$cur_column)) rm(list = c("cur_column"), envir = pm_context) -} +pm_context$clean <- function() rm(list = c(".data"), envir = pm_context) + + pm_n <- function() { - pm_check_context("`n()`", pm_context$.data) + pm_check_group_pm_context("`pm_n()`") pm_context$get_nrow() } + pm_cur_data <- function() { - pm_check_context("`cur_data()`", pm_context$.data) + pm_check_group_pm_context("`pm_cur_data()`") data <- pm_context$get_data() - data[, !(colnames(data) %in% pm_group_vars(data)), drop = FALSE] -} -pm_cur_data_all <- function() { - pm_check_context("`cur_data_all()`", pm_context$.data) - pm_ungroup(pm_context$get_data()) + data[, !(colnames(data) %in% pm_get_groups(data)), drop = FALSE] } + pm_cur_group <- function() { - pm_check_context("`cur_group()`", pm_context$.data) + pm_check_group_pm_context("`pm_cur_group()`") data <- pm_context$get_data() - res <- data[1L, pm_group_vars(data), drop = FALSE] + res <- data[1L, pm_get_groups(data), drop = FALSE] rownames(res) <- NULL res } + pm_cur_group_id <- function() { - pm_check_context("`cur_group_id()`", pm_context$.data) + pm_check_group_pm_context("`pm_cur_group_id()`") data <- pm_context$get_data() - res <- data[1L, pm_group_vars(data), drop = FALSE] + res <- data[1L, pm_get_groups(data), drop = FALSE] details <- pm_get_group_details(data) - details[, ".pm_group_id"] <- seq_len(nrow(details)) - res <- suppressMessages(semi_join(details, res)) - res[, ".pm_group_id"] + details[, ".group_id"] <- seq_len(nrow(details)) + res <- suppressMessages(pm_semi_join(details, res)) + list(res[, ".group_id"]) } + pm_cur_group_rows <- function() { - pm_check_context("`cur_group_rows()`", pm_context$.data) + pm_check_group_pm_context("`pm_cur_group_rows()`") data <- pm_context$get_data() - res <- data[1L, pm_group_vars(data), drop = FALSE] - res <- suppressMessages(semi_join(pm_get_group_details(data), res)) + res <- data[1L, pm_get_groups(data), drop = FALSE] + res <- suppressMessages(pm_semi_join(pm_get_group_details(data), res)) unlist(res[, ".rows"]) } -pm_cur_column <- function() { - pm_check_context("`cur_column()`", pm_context$cur_column, "`across`") - pm_context$cur_column -} -pm_check_context <- function(fn, pm_context, name = NULL) { - if (is.null(pm_context)) { - stop(fn, " must only be used inside ", if (is.null(name)) "poorman verbs" else name) + +pm_check_group_pm_context <- function(fn) { + if (is.null(pm_context$.data)) { + stop(fn, " must only be used inside poorman verbs") } } pm_count <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) { - pm_groups <- pm_group_vars(x) + pm_groups <- pm_get_groups(x) if (!missing(...)) x <- pm_group_by(x, ..., .add = TRUE) wt <- pm_deparse_var(wt) res <- do.call(pm_tally, list(x, wt, sort, name)) if (length(pm_groups) > 0L) res <- do.call(pm_group_by, list(res, as.name(pm_groups))) res } + pm_tally <- function(x, wt = NULL, sort = FALSE, name = NULL) { name <- pm_check_name(x, name) wt <- pm_deparse_var(wt) - res <- do.call(pm_summarise, stats::setNames(list(x, pm_tally_n(x, wt)), c(".data", name))) + res <- do.call(pm_summarise, pm_set_names(list(x, pm_tally_n(x, wt)), c(".data", name))) res <- pm_ungroup(res) - if (isTRUE(sort)) res <- do.call(pm_arrange, list(res, call("desc", as.name(name)))) + if (isTRUE(sort)) res <- do.call(pm_arrange, list(res, call("pm_desc", as.name(name)))) rownames(res) <- NULL res } + pm_add_count <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) { name <- pm_check_name(x, name) row_names <- rownames(x) @@ -352,60 +154,73 @@ pm_add_count <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) { res <- do.call(pm_add_tally, list(x, wt, sort, name)) res[row_names, ] } + pm_add_tally <- function(x, wt = NULL, sort = FALSE, name = NULL) { wt <- pm_deparse_var(wt) - n <- pm_tally_n(x, wt) + pm_n <- pm_tally_n(x, wt) name <- pm_check_name(x, name) - res <- do.call(pm_mutate, stats::setNames(list(x, n), c(".data", name))) + res <- do.call(pm_mutate, pm_set_names(list(x, pm_n), c(".data", name))) + if (isTRUE(sort)) { - do.call(pm_arrange, list(res, call("desc", as.name(name)))) + do.call(pm_arrange, list(res, call("pm_desc", as.name(name)))) } else { res } } + pm_tally_n <- function(x, wt) { - if (is.null(wt) && "n" %in% colnames(x)) { - message("Using `n` as weighting variable") - wt <- "n" + if (is.null(wt) && "pm_n" %in% colnames(x)) { + message("Using `pm_n` as weighting variable") + wt <- "pm_n" } pm_context$setup(.data = x) on.exit(pm_context$clean(), add = TRUE) if (is.null(wt)) { - call("n") + call("pm_n") } else { call("sum", as.name(wt), na.rm = TRUE) } } + pm_check_name <- function(df, name) { if (is.null(name)) { - if ("n" %in% colnames(df)) { + if ("pm_n" %in% colnames(df)) { stop( - "Column 'n' is already present in output\n", + "Column 'pm_n' is already present in output\n", "* Use `name = \"new_name\"` to pick a new name" ) } - return("n") + return("pm_n") } + if (!is.character(name) || length(name) != 1) { stop("`name` must be a single string") } + name } pm_desc <- function(x) -xtfrm(x) -pm_distinct <- function(.data, ..., .keep_all = FALSE) { - if ("grouped_df" %in% class(.data)) pm_distinct.grouped_df(.data, ..., .keep_all = FALSE) else pm_distinct.data.frame(.data, ..., .keep_all = FALSE) +pm_distinct <- function(.data, ...) { + pm_check_is_dataframe(.data) + if ("grouped_data" %in% class(.data)) { + pm_distinct.grouped_data(.data, ...) + } else { + pm_distinct.default(.data, ...) + } } -pm_distinct.data.frame <- function(.data, ..., .keep_all = FALSE) { - if (ncol(.data) == 0L) return(.data[1, ]) - cols <- pm_dotdotdot(...) + +pm_distinct.default <- function(.data, ..., .keep_all = FALSE) { + if (ncol(.data) == 0L) { + return(.data[1, ]) + } + cols <- pm_deparse_dots(...) col_names <- names(cols) col_len <- length(cols) if (is.null(col_names) && col_len > 0L) names(cols) <- cols if (col_len == 0L) { res <- .data } else { - mut <- pm_mutate_df(.data, ...) - res <- mut$data + res <- pm_mutate(.data, ...) col_names <- names(cols) res <- if (!is.null(col_names)) { zero_names <- nchar(col_names) == 0L @@ -413,9 +228,9 @@ pm_distinct.data.frame <- function(.data, ..., .keep_all = FALSE) { names(cols)[zero_names] <- cols[zero_names] col_names <- names(cols) } - suppressMessages(select(res, col_names)) + suppressMessages(pm_select(res, col_names)) } else { - suppressMessages(select(res, cols)) + suppressMessages(pm_select(res, cols)) } } res <- unique(res) @@ -423,43 +238,26 @@ pm_distinct.data.frame <- function(.data, ..., .keep_all = FALSE) { res <- cbind(res, .data[rownames(res), setdiff(colnames(.data), colnames(res)), drop = FALSE]) } common_cols <- c(intersect(colnames(.data), colnames(res)), setdiff(col_names, colnames(.data))) - if (is.numeric(attr(res, "row.names"))) { - row.names(res) <- seq_len(nrow(res)) - } if (length(common_cols) > 0L) res[, common_cols, drop = FALSE] else res } -pm_distinct.grouped_df <- function(.data, ..., .keep_all = FALSE) { + +pm_distinct.grouped_data <- function(.data, ..., .keep_all = FALSE) { pm_apply_grouped_function("pm_distinct", .data, drop = TRUE, ..., .keep_all = .keep_all) } -pm_dotdotdot <- function(..., .impute_names = FALSE) { - dots <- eval(substitute(alist(...))) - if (isTRUE(.impute_names)) { - pm_deparse_dots <- lapply(dots, deparse) - names_dots <- names(dots) - unnamed <- if (is.null(names_dots)) rep(TRUE, length(dots)) else nchar(names_dots) == 0L - names(dots)[unnamed] <- pm_deparse_dots[unnamed] - } - dots -} -pm_deparse_dots <- function(...) { - vapply(substitute(...()), deparse, NA_character_) -} -pm_deparse_var <- function(var, frame = if (is.null(pm_eval_env$env)) parent.frame() else pm_eval_env$env) { - sub_var <- eval(substitute(substitute(var)), frame) - if (is.symbol(sub_var)) var <- as.character(sub_var) - var -} pm_eval_env <- new.env() -pm_filter <- function(.data, ..., .preserve = FALSE) { - if ("grouped_df" %in% class(.data)) pm_filter.grouped_df(.data, ..., .preserve = FALSE) else pm_filter.data.frame(.data, ..., .preserve = FALSE) +pm_filter <- function(.data, ...) { + pm_check_is_dataframe(.data) + if ("grouped_data" %in% class(.data)) { + pm_filter.grouped_data(.data, ...) + } else { + pm_filter.default(.data, ...) + } } -pm_filter.data.frame <- function(.data, ..., .preserve = FALSE) { + +pm_filter.default <- function(.data, ...) { conditions <- pm_dotdotdot(...) - if (length(conditions) == 0L) return(.data) - pm_check_filter(conditions) cond_class <- vapply(conditions, typeof, NA_character_) - cond_class <- cond_class[!cond_class %in% c("language", "logical")] - if (length(cond_class) > 0L) stop("Conditions must be logical vectors") + if (any(cond_class != "language")) stop("Conditions must be logical vectors") pm_context$setup(.data) on.exit(pm_context$clean(), add = TRUE) pm_eval_env$env <- parent.frame() @@ -472,125 +270,108 @@ pm_filter.data.frame <- function(.data, ..., .preserve = FALSE) { rows <- Reduce("&", rows) .data[rows & !is.na(rows), ] } -pm_filter.grouped_df <- function(.data, ..., .preserve = FALSE) { + +pm_filter.grouped_data <- function(.data, ...) { rows <- rownames(.data) res <- pm_apply_grouped_function("pm_filter", .data, drop = TRUE, ...) - res <- res[rows[rows %in% rownames(res)], ] - pm_groups <- pm_group_vars(.data) - pre_filtered_groups <- pm_group_data(.data) - post_filtered_groups <- pm_calculate_groups(res, pm_groups) - if (!(!.preserve && isTRUE(attr(pre_filtered_groups, ".drop")))) { - filtered_groups <- anti_join(pre_filtered_groups, post_filtered_groups, by = pm_groups) - filtered_groups <- filtered_groups[, pm_groups, drop = FALSE] - filtered_groups[[".rows"]] <- rep(list(integer()), length.out = nrow(filtered_groups)) - post_filtered_groups <- bind_rows(post_filtered_groups, filtered_groups) - ordered <- do.call(pm_arrange_rows, list(post_filtered_groups, pm_as_symbols(pm_groups))) - post_filtered_groups <- post_filtered_groups[ordered, ] - } - attr(res, "pm_groups") <- post_filtered_groups - res + res[rows[rows %in% rownames(res)], ] } -pm_check_filter <- function(conditions) { - named <- pm_have_name(conditions) - for (i in which(named)) { - if (!is.logical(conditions[[i]])) { - stop( - sprintf("Problem with `pm_filter()` input `..%s`.\n", i), - sprintf("Input `..%s` is named.\n", i), - "This usually means that you've used `=` instead of `==`.\n", - sprintf("Did you mean `%s == %s`?", names(conditions)[[i]], conditions[[i]]) - ) - } - } -} -pm_gluestick <- function(fmt, src = parent.frame(), open = "{", close = "}", eval = TRUE) { - nchar_open <- nchar(open) - nchar_close <- nchar(close) - stopifnot(exprs = { - is.character(fmt) - length(fmt) == 1L - is.character(open) - length(open) == 1L - nchar_open > 0L - is.character(close) - length(close) == 1 - nchar_close > 0 - }) - open <- gsub("(.)", "\\\\\\1", open) - close <- gsub("(.)", "\\\\\\1", close) - re <- paste0(open, ".*?", close) - matches <- gregexpr(re, fmt) - exprs <- regmatches(fmt, matches)[[1]] - exprs <- substr(exprs, nchar_open + 1L, nchar(exprs) - nchar_close) - fmt_sprintf <- gsub(re, "%s", fmt) - fmt_sprintf <- gsub("%(?!s)", "%%", fmt_sprintf, perl = TRUE) - args <- if (eval) { - lapply(exprs, function(expr) eval(parse(text = expr), envir = src)) - } else { - unname(mget(exprs, envir = as.environment(src))) - } - do.call(sprintf, c(list(fmt_sprintf), args)) -} -pm_group_by <- function(.data, ..., .add = FALSE, .drop = pm_group_by_drop_default(.data)) { - pm_group_by.data.frame(.data, ..., .add = FALSE, .drop = pm_group_by_drop_default(.data)) -} -pm_group_by.data.frame <- function(.data, ..., .add = FALSE, .drop = pm_group_by_drop_default(.data)) { - vars <- pm_dotdotdot(..., .impute_names = TRUE) - if (all(vapply(vars, is.null, FALSE))) { - res <- pm_groups_set(.data, NULL) - class(res) <- class(res)[!(class(res) %in% "grouped_df")] - return(res) - } - new_cols <- pm_add_group_columns(.data, vars) - res <- new_cols$data - pm_groups <- new_cols$pm_groups - if (isTRUE(.add)) pm_groups <- union(pm_group_vars(.data), pm_groups) - unknown <- !(pm_groups %in% colnames(res)) +pm_group_by <- function(.data, ..., .add = FALSE) { + pm_check_is_dataframe(.data) + pre_groups <- pm_get_groups(.data) + pm_groups <- pm_deparse_dots(...) + if (isTRUE(.add)) pm_groups <- unique(c(pre_groups, pm_groups)) + unknown <- !(pm_groups %in% colnames(.data)) if (any(unknown)) stop("Invalid pm_groups: ", pm_groups[unknown]) - if (length(pm_groups) > 0L) { - res <- pm_groups_set(res, pm_groups, .drop) - class(res) <- union("grouped_df", class(res)) + class(.data) <- c("grouped_data", class(.data)) + pm_set_groups(.data, pm_groups) +} + +pm_ungroup <- function(x, ...) { + pm_check_is_dataframe(x) + rm_groups <- pm_deparse_dots(...) + pm_groups <- pm_get_groups(x) + if (length(rm_groups) == 0L) rm_groups <- pm_groups + x <- pm_set_groups(x, pm_groups[!(pm_groups %in% rm_groups)]) + if (length(attr(x, "pm_groups")) == 0L) { + attr(x, "pm_groups") <- NULL + class(x) <- class(x)[!(class(x) %in% "grouped_data")] + } + x +} + +pm_set_groups <- function(x, pm_groups) { + attr(x, "pm_groups") <- if (is.null(pm_groups) || length(pm_groups) == 0L) { + NULL + } else { + pm_group_data_worker(x, pm_groups) + } + x +} + +pm_get_groups <- function(x) { + pm_groups <- attr(x, "pm_groups", exact = TRUE) + if (is.null(pm_groups)) character(0) else colnames(pm_groups)[!colnames(pm_groups) %in% c(".group_id", ".rows")] +} + +pm_get_group_details <- function(x) { + pm_groups <- attr(x, "pm_groups", exact = TRUE) + if (is.null(pm_groups)) character(0) else pm_groups +} + +pm_has_groups <- function(x) { + pm_groups <- pm_get_groups(x) + if (length(pm_groups) == 0L) FALSE else TRUE +} + +pm_apply_grouped_function <- function(fn, .data, drop = FALSE, ...) { + pm_groups <- pm_get_groups(.data) + grouped <- pm_split_into_groups(.data, pm_groups, drop) + res <- do.call(rbind, unname(lapply(grouped, fn, ...))) + if (any(pm_groups %in% colnames(res))) { + class(res) <- c("grouped_data", class(res)) + res <- pm_set_groups(res, pm_groups[pm_groups %in% colnames(res)]) } res } -pm_group_by_drop_default <- function(.tbl) { - if ("grouped_df" %in% class(.tbl)) pm_group_by_drop_default.grouped_df(.tbl) else pm_group_by_drop_default.data.frame(.tbl) -} -pm_group_by_drop_default.data.frame <- function(.tbl) { - TRUE -} -pm_group_by_drop_default.grouped_df <- function(.tbl) { - tryCatch({ - !identical(attr(pm_group_data(.tbl), ".drop"), FALSE) - }, error = function(e) { - TRUE - }) -} -pm_add_group_columns <- function(.data, vars) { - vars <- vars[!vapply(vars, is.null, FALSE)] - types <- do.call(c, lapply(vars, typeof)) - test <- any(types == "language") - needs_mutate <- if (test) unname(which(types == "language")) else NULL - if (!is.null(needs_mutate)) { - .data <- do.call(pm_mutate, c(list(.data = pm_ungroup(.data)), vars[needs_mutate])) - } - list(data = .data, pm_groups = names(vars)) + +pm_print.grouped_data <- function(x, ..., digits = NULL, quote = FALSE, right = TRUE, row.names = TRUE, max = NULL) { + class(x) <- "data.frame" + print(x, ..., digits = digits, quote = quote, right = right, row.names = row.names, max = max) + cat("\nGroups: ", paste(pm_get_groups(x), collapse = ", "), "\n\n") } + pm_group_data <- function(.data) { - if ("grouped_df" %in% class(.data)) pm_group_data.grouped_df(.data) else pm_group_data.data.frame(.data) + if (!pm_has_groups(.data)) { + return(data.frame(.rows = I(list(seq_len(nrow(.data)))))) + } + pm_groups <- pm_get_groups(.data) + pm_group_data_worker(.data, pm_groups) } -pm_group_data.data.frame <- function(.data) { - structure(list(.rows = list(seq_len(nrow(.data)))), class = "data.frame", row.names = c(NA, -1L)) -} -pm_group_data.grouped_df <- function(.data) { - attr(.data, "pm_groups") + +pm_group_data_worker <- function(.data, pm_groups) { + res <- unique(.data[, pm_groups, drop = FALSE]) + class(res) <- "data.frame" + nrow_res <- nrow(res) + rows <- rep(list(NA), nrow_res) + for (i in seq_len(nrow_res)) { + rows[[i]] <- which(interaction(.data[, pm_groups]) %in% interaction(res[i, pm_groups])) + } + res$`.rows` <- rows + res <- res[do.call(order, lapply(pm_groups, function(x) res[, x])), , drop = FALSE] + rownames(res) <- NULL + res } + pm_group_rows <- function(.data) { pm_group_data(.data)[[".rows"]] } + pm_group_indices <- function(.data) { - if (!pm_has_groups(.data)) return(rep(1L, nrow(.data))) - pm_groups <- pm_group_vars(.data) + if (!pm_has_groups(.data)) { + return(rep(1L, nrow(.data))) + } + pm_groups <- pm_get_groups(.data) res <- unique(.data[, pm_groups, drop = FALSE]) res <- res[do.call(order, lapply(pm_groups, function(x) res[, x])), , drop = FALSE] class(res) <- "data.frame" @@ -601,115 +382,64 @@ pm_group_indices <- function(.data) { } rows } + pm_group_vars <- function(x) { - pm_groups <- attr(x, "pm_groups", exact = TRUE) - if (is.null(pm_groups)) character(0) else colnames(pm_groups)[!colnames(pm_groups) %in% c(".pm_group_id", ".rows")] + pm_get_groups(x) } + pm_groups <- function(x) { - pm_as_symbols(pm_group_vars(x)) + lapply(pm_get_groups(x), as.symbol) } + pm_group_size <- function(x) { lengths(pm_group_rows(x)) } + pm_n_groups <- function(x) { nrow(pm_group_data(x)) } -pm_group_split <- function(.data, ..., .keep = TRUE) { - dots_len <- length(pm_dotdotdot(...)) > 0L - if (pm_has_groups(.data) && isTRUE(dots_len)) { - warning("... is ignored in pm_group_split(), please use pm_group_by(..., .add = TRUE) %pm>% pm_group_split()") - } - if (!pm_has_groups(.data) && isTRUE(dots_len)) { - .data <- pm_group_by(.data, ...) - } - if (!pm_has_groups(.data) && !isTRUE(dots_len)) { - return(list(.data)) - } - pm_context$setup(.data) - on.exit(pm_context$clean(), add = TRUE) - pm_groups <- pm_group_vars(.data) - attr(pm_context$.data, "pm_groups") <- NULL - res <- pm_split_into_groups(pm_context$.data, pm_groups) - names(res) <- NULL - if (!isTRUE(.keep)) { - res <- lapply(res, function(x) x[, !colnames(x) %in% pm_groups]) - } - any_empty <- unlist(lapply(res, function(x) !(nrow(x) == 0L))) - res[any_empty] -} +# pm_group_split <- function(.data, ..., .keep = TRUE) { +# dots_len <- ...length() > 0L +# if (pm_has_groups(.data) && isTRUE(dots_len)) { +# warning("... is ignored in pm_group_split(), please use pm_group_by(..., .add = TRUE) %pm>% pm_group_split()") +# } +# if (!pm_has_groups(.data) && isTRUE(dots_len)) { +# .data <- pm_group_by(.data, ...) +# } +# if (!pm_has_groups(.data) && isFALSE(dots_len)) { +# return(list(.data)) +# } +# pm_context$setup(.data) +# on.exit(pm_context$clean(), add = TRUE) +# pm_groups <- pm_get_groups(.data) +# attr(pm_context$.data, "pm_groups") <- NULL +# res <- pm_split_into_groups(pm_context$.data, pm_groups) +# names(res) <- NULL +# if (isFALSE(.keep)) { +# res <- lapply(res, function(x) x[, !colnames(x) %in% pm_groups]) +# } +# any_empty <- unlist(lapply(res, function(x) !(nrow(x) == 0L))) +# res[any_empty] +# } + pm_group_keys <- function(.data) { - pm_groups <- pm_group_vars(.data) + pm_groups <- pm_get_groups(.data) pm_context$setup(.data) - res <- pm_context$get_columns(pm_context$get_colnames() %in% pm_groups) + res <- pm_context$.data[, pm_context$get_colnames() %in% pm_groups, drop = FALSE] res <- res[!duplicated(res), , drop = FALSE] - if (nrow(res) == 0L) return(res) + if (nrow(res) == 0L) { + return(res) + } class(res) <- "data.frame" res <- res[do.call(order, lapply(pm_groups, function(x) res[, x])), , drop = FALSE] rownames(res) <- NULL res } + pm_split_into_groups <- function(.data, pm_groups, drop = FALSE, ...) { class(.data) <- "data.frame" - pm_group_factors <- lapply(pm_groups, function(x, .data) as.factor(.data[, x]), .data) - split(x = .data, f = pm_group_factors, drop = drop, ...) -} -pm_groups_set <- function(x, pm_groups, drop = pm_group_by_drop_default(x)) { - attr(x, "pm_groups") <- if (is.null(pm_groups) || length(pm_groups) == 0L) { - NULL - } else { - pm_calculate_groups(x, pm_groups, drop) - } - x -} -pm_get_group_details <- function(x) { - pm_groups <- attr(x, "pm_groups", exact = TRUE) - if (is.null(pm_groups)) character(0) else pm_groups -} -pm_has_groups <- function(x) { - pm_groups <- pm_group_vars(x) - if (length(pm_groups) == 0L) FALSE else TRUE -} -pm_apply_grouped_function <- function(fn, .data, drop = FALSE, ...) { - pm_groups <- pm_group_vars(.data) - grouped <- pm_split_into_groups(.data, pm_groups, drop) - res <- do.call(rbind, unname(lapply(grouped, fn, ...))) - if (any(pm_groups %in% colnames(res))) { - class(res) <- c("grouped_df", class(res)) - res <- pm_groups_set(res, pm_groups[pm_groups %in% colnames(res)]) - } - res -} -pm_calculate_groups <- function(data, pm_groups, drop = pm_group_by_drop_default(data)) { - data <- pm_ungroup(data) - unknown <- setdiff(pm_groups, colnames(data)) - if (length(unknown) > 0L) { - stop(sprintf("`pm_groups` missing from `data`: %s.", paste0(pm_groups, collapse = ", "))) - } - unique_groups <- unique(data[, pm_groups, drop = FALSE]) - is_factor <- do.call(c, lapply(unique_groups, function(x) is.factor(x))) - n_comb <- nrow(unique_groups) - rows <- rep(list(NA), n_comb) - data_groups <- interaction(data[, pm_groups, drop = TRUE]) - for (i in seq_len(n_comb)) { - rows[[i]] <- which(data_groups %in% interaction(unique_groups[i, pm_groups])) - } - if (!isTRUE(drop) && any(is_factor)) { - na_lvls <- do.call( - expand.grid, - lapply(unique_groups, function(x) if (is.factor(x)) levels(x)[!(levels(x) %in% x)] else NA) - ) - unique_groups <- rbind(unique_groups, na_lvls) - for (i in seq_len(nrow(na_lvls))) { - rows[[length(rows) + 1]] <- integer(0) - } - } - unique_groups[[".rows"]] <- rows - unique_groups <- unique_groups[do.call(order, lapply(pm_groups, function(x) unique_groups[, x])), , drop = FALSE] - rownames(unique_groups) <- NULL - structure(unique_groups, .drop = drop) -} -pm_is.grouped_df <- function(x) { - inherits(x, "grouped_df") + group_factors <- lapply(pm_groups, function(x, .data) as.factor(.data[, x]), .data) + split(x = .data, f = group_factors, drop = drop, ...) } pm_if_else <- function(condition, true, false, missing = NULL) { if (!is.logical(condition)) stop("`condition` must be a logical vector.") @@ -727,12 +457,15 @@ pm_if_else <- function(condition, true, false, missing = NULL) { attributes(res) <- attributes(true) res } + pm_anti_join <- function(x, y, by = NULL) { pm_filter_join_worker(x, y, by, type = "anti") } + pm_semi_join <- function(x, y, by = NULL) { pm_filter_join_worker(x, y, by, type = "semi") } + pm_filter_join_worker <- function(x, y, by = NULL, type = c("anti", "semi")) { type <- match.arg(type, choices = c("anti", "semi"), several.ok = FALSE) if (is.null(by)) { @@ -743,45 +476,41 @@ pm_filter_join_worker <- function(x, y, by = NULL, type = c("anti", "semi")) { if (type == "anti") rows <- !rows res <- x[rows, , drop = FALSE] rownames(res) <- NULL - pm_reconstruct_attrs(res, x) + res } -pm_inner_join <- function(x, y, by = NULL, suffix = c(".x", ".y"), ..., na_matches = c("na", "never")) { - pm_join_worker(x = x, y = y, by = by, suffix = suffix, sort = FALSE, ..., keep = FALSE, na_matches = na_matches) + +pm_inner_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { + pm_join_worker(x = x, y = y, by = by, suffix = suffix, sort = FALSE) } -pm_left_join <- function(x, y, by = NULL, suffix = c(".x", ".y"), ..., keep = FALSE, na_matches = c("na", "never")) { - pm_join_worker(x = x, y = y, by = by, suffix = suffix, all.x = TRUE, ..., keep = keep, na_matches = na_matches) + +# pm_left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { +# pm_join_worker(x = x, y = y, by = by, suffix = suffix, all.x = TRUE) +# } + +pm_right_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { + pm_join_worker(x = x, y = y, by = by, suffix = suffix, all.y = TRUE) } -pm_right_join <- function(x, y, by = NULL, suffix = c(".x", ".y"), ..., keep = FALSE, na_matches = c("na", "never")) { - pm_join_worker(x = x, y = y, by = by, suffix = suffix, all.y = TRUE, ..., keep = keep, na_matches = na_matches) + +pm_full_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { + pm_join_worker(x = x, y = y, by = by, suffix = suffix, all = TRUE) } -pm_full_join <- function(x, y, by = NULL, suffix = c(".x", ".y"), ..., keep = FALSE, na_matches = c("na", "never")) { - pm_join_worker(x = x, y = y, by = by, suffix = suffix, all = TRUE, ..., keep = keep, na_matches = na_matches) -} -pm_join_worker <- function(x, y, by = NULL, suffix = c(".x", ".y"), keep = FALSE, na_matches = c("na", "never"), ...) { - na_matches <- match.arg(arg = na_matches, choices = c("na", "never"), several.ok = FALSE) - incomparables <- if (na_matches == "never") NA else NULL + +pm_join_worker <- function(x, y, by = NULL, suffix = c(".x", ".y"), ...) { x[, ".join_id"] <- seq_len(nrow(x)) - merged <- if (is.null(by)) { + if (is.null(by)) { by <- intersect(names(x), names(y)) pm_join_message(by) - merge( - x = x, y = y, by = by, suffixes = suffix, incomparables = incomparables, ... - )[, union(names(x), names(y)), drop = FALSE] + merged <- merge(x = x, y = y, by = by, suffixes = suffix, ...)[, union(names(x), names(y))] } else if (is.null(names(by))) { - merge(x = x, y = y, by = by, suffixes = suffix, incomparables = incomparables, ...) + merged <- merge(x = x, y = y, by = by, suffixes = suffix, ...) } else { - merge(x = x, y = y, by.x = names(by), by.y = by, suffixes = suffix, incomparables = incomparables, ...) - } - merged <- merged[order(merged[, ".join_id"]), colnames(merged) != ".join_id", drop = FALSE] - if (isTRUE(keep)) { - keep_pos <- match(by, names(merged)) - x_by <- paste0(by, suffix[1L]) - colnames(merged)[keep_pos] <- x_by - merged[, paste0(by, suffix[2L])] <- merged[, x_by] + merged <- merge(x = x, y = y, by.x = names(by), by.y = by, suffixes = suffix, ...) } + merged <- merged[order(merged[, ".join_id"]), colnames(merged) != ".join_id"] rownames(merged) <- NULL - pm_reconstruct_attrs(merged, x) + merged } + pm_join_message <- function(by) { if (length(by) > 1L) { message("Joining, by = c(\"", paste0(by, collapse = "\", \""), "\")\n", sep = "") @@ -789,54 +518,12 @@ pm_join_message <- function(by) { message("Joining, by = \"", by, "\"\n", sep = "") } } -pm_as_function <- function(x, env = parent.frame()) { - if (is.function(x)) return(x) - if (pm_is_formula(x)) { - if (length(x) > 2) stop("Can't convert a two-sided formula to a function") - env <- attr(x, ".Environment", exact = TRUE) - rhs <- as.list(x)[[2]] - return(as.function(list(... = substitute(), .x = quote(..1), .y = quote(..2), . = quote(..1), rhs), envir = env)) +pm_lag <- function(x, pm_n = 1L, default = NA) { + if (inherits(x, "ts")) stop("`x` must be a vector, not a `ts` object, do you want `stats::pm_lag()`?") + if (length(pm_n) != 1L || !is.numeric(pm_n) || pm_n < 0L) stop("`pm_n` must be a nonnegative integer scalar") + if (pm_n == 0L) { + return(x) } - if (pm_is_string(x)) return(get(x, envir = env, mode = "function")) - stop("Can't convert an object of class ", class(x), " to a function.") -} -pm_is_formula <- function(x) { - inherits(x, "formula") -} -pm_is_string <- function(x) { - is.character(x) && length(x) == 1L -} -pm_is_wholenumber <- function(x) { - x %% 1L == 0L -} -pm_names_are_invalid <- function(x) { - x == "" | is.na(x) -} -pm_is_named <- function(x) { - nms <- names(x) - if (is.null(nms)) return(FALSE) - if (any(pm_names_are_invalid(nms))) return(FALSE) - TRUE -} -pm_have_name <- function(x) { - nms <- names(x) - if (is.null(nms)) rep(FALSE, length(x)) else !pm_names_are_invalid(nms) -} -pm_is_empty_list <- function(x) { - inherits(x, "list") && length(x) == 0L -} -pm_as_symbols <- function(x) { - lapply(x, as.symbol) -} -pm_is_df_or_vector <- function(x) { - res <- is.data.frame(x) || is.atomic(x) - if (!isTRUE(res)) stop("You must pass vector(s) and/or data.frame(s).") - TRUE -} -pm_lag <- function(x, n = 1L, default = NA) { - if (inherits(x, "ts")) stop("`x` must be a vector, not a `ts` object, do you want `stats::lag()`?") - if (length(n) != 1L || !is.numeric(n) || n < 0L) stop("`n` must be a nonnegative integer scalar") - if (n == 0L) return(x) tryCatch( storage.mode(default) <- typeof(x), warning = function(w) { @@ -844,14 +531,17 @@ pm_lag <- function(x, n = 1L, default = NA) { } ) xlen <- length(x) - n <- pmin(n, xlen) - res <- c(rep(default, n), x[seq_len(xlen - n)]) + pm_n <- pmin(pm_n, xlen) + res <- c(rep(default, pm_n), x[seq_len(xlen - pm_n)]) attributes(res) <- attributes(x) res } -pm_lead <- function(x, n = 1L, default = NA) { - if (length(n) != 1L || !is.numeric(n) || n < 0L) stop("n must be a nonnegative integer scalar") - if (n == 0L) return(x) + +pm_lead <- function(x, pm_n = 1L, default = NA) { + if (length(pm_n) != 1L || !is.numeric(pm_n) || pm_n < 0L) stop("pm_n must be a nonnegative integer scalar") + if (pm_n == 0L) { + return(x) + } tryCatch( storage.mode(default) <- typeof(x), warning = function(w) { @@ -859,426 +549,114 @@ pm_lead <- function(x, n = 1L, default = NA) { } ) xlen <- length(x) - n <- pmin(n, xlen) - res <- c(x[-seq_len(n)], rep(default, n)) + pm_n <- pmin(pm_n, xlen) + res <- c(x[-seq_len(pm_n)], rep(default, pm_n)) attributes(res) <- attributes(x) res } -pm_lst <- function(...) { - fn_call <- match.call() - list_to_eval <- as.list(fn_call)[-1] - out <- vector(mode = "list", length = length(list_to_eval)) - names(out) <- names(list_to_eval) - exprs <- lapply(substitute(list(...)), deparse)[-1] - for (element in seq_along(list_to_eval)) { - value <- list_to_eval[[element]] - if (is.language(value)) { - value <- eval( - value, - envir = if (length(out) == 0) { - list_to_eval - } else { - pm_drop_dup_list(out[1:(element - 1)]) - } - ) - } - if (is.null(value)) { - out[element] <- list(NULL) - } else { - out[[element]] <- value - } - invalid_name <- is.null(names(out)[element]) || - is.na(names(out)[element]) || - names(out)[element] == "" - if (invalid_name) { - if (exprs[[element]] != "NULL" || (exprs[[element]] == "NULL" && is.null(out[[element]]))) { - names(out)[element] <- exprs[[element]] - } - } - } - out -} -pm_drop_dup_list <- function(x) { - list_names <- names(x) - if (identical(list_names, unique(list_names))) return(x) - count <- table(list_names) - dupes <- names(count[count > 1]) - uniques <- names(count[count == 1]) - to_drop <- do.call(c, lapply( - dupes, - function(x) { - matches <- which(list_names == x) - matches[-length(matches)] - } - )) - x[uniques] <- Filter(Negate(is.null), x[uniques]) - return(x[-to_drop]) -} pm_mutate <- function(.data, ...) { - if ("grouped_df" %in% class(.data)) pm_mutate.grouped_df(.data, ...) else pm_mutate.data.frame(.data, ...) -} -pm_mutate.data.frame <- function( - .data, - ..., - .keep = c("all", "used", "unused", "none"), - .before = NULL, - .after = NULL -) { - keep <- match.arg(arg = .keep, choices = c("all", "used", "unused", "none"), several.ok = FALSE) - res <- pm_mutate_df(.data = .data, ...) - data <- res$data - new_cols <- res$new_cols - .before <- substitute(.before) - .after <- substitute(.after) - if (!is.null(.before) || !is.null(.after)) { - new <- setdiff(new_cols, names(.data)) - data <- do.call(pm_relocate, c(list(.data = data), new, .before = .before, .after = .after)) - } - if (keep == "all") { - data - } else if (keep == "unused") { - unused <- setdiff(colnames(.data), res$used_cols) - keep <- intersect(colnames(data), c(pm_group_vars(.data), unused, new_cols)) - select(.data = data, keep) - } else if (keep == "used") { - keep <- intersect(colnames(data), c(pm_group_vars(.data), res$used_cols, new_cols)) - select(.data = data, keep) - } else if (keep == "none") { - keep <- c(setdiff(pm_group_vars(.data), new_cols), intersect(new_cols, colnames(data))) - select(.data = data, keep) + pm_check_is_dataframe(.data) + if ("grouped_data" %in% class(.data)) { + pm_mutate.grouped_data(.data, ...) + } else { + pm_mutate.default(.data, ...) } } -pm_mutate.grouped_df <- function(.data, ...) { - pm_context$pm_group_env <- parent.frame(n = 1) - on.exit(rm(list = c("pm_group_env"), envir = pm_context), add = TRUE) - rows <- rownames(.data) - res <- pm_apply_grouped_function("pm_mutate", .data, drop = TRUE, ...) - res[rows, , drop = FALSE] -} -pm_mutate_df <- function(.data, ...) { + +pm_mutate.default <- function(.data, ...) { conditions <- pm_dotdotdot(..., .impute_names = TRUE) - cond_nms <- names(pm_dotdotdot(..., .impute_names = FALSE)) - if (length(conditions) == 0L) { - return(list( - data = .data, - used_cols = NULL, - new_cols = NULL - )) - } - used <- unname(do.call(c, lapply(conditions, pm_find_used))) - used <- used[used %in% colnames(.data)] + .data[, setdiff(names(conditions), names(.data))] <- NA pm_context$setup(.data) on.exit(pm_context$clean(), add = TRUE) for (i in seq_along(conditions)) { - not_named <- (is.null(cond_nms) || cond_nms[i] == "") - res <- eval( - conditions[[i]], - envir = pm_context$as_env(), - enclos = if (!is.null(pm_context$pm_group_env)) pm_context$pm_group_env else parent.frame(n = 2) - ) - res_nms <- names(res) - if (is.data.frame(res)) { - if (not_named) { - pm_context$.data[, res_nms] <- res - } else { - pm_context$.data[[cond_nms[i]]] <- res - } - } else if (is.atomic(res)) { - cond_nms[i] <- names(conditions)[[i]] - pm_context$.data[[cond_nms[i]]] <- res - } else { - if (is.null(res_nms)) names(res) <- names(conditions)[[i]] - pm_context$.data[[names(res)]] <- res - } + pm_context$.data[, names(conditions)[i]] <- eval(conditions[[i]], envir = pm_context$.data) } - list( - data = pm_context$.data, - used_cols = used, - new_cols = cond_nms - ) + pm_context$.data } -pm_find_used <- function(expr) { - if (is.symbol(expr)) { - as.character(expr) - } else { - unique(unlist(lapply(expr[-1], pm_find_used))) - } + +pm_mutate.grouped_data <- function(.data, ...) { + rows <- rownames(.data) + res <- pm_apply_grouped_function("pm_mutate", .data, drop = TRUE, ...) + res[rows, ] } pm_n_distinct <- function(..., na.rm = FALSE) { - res <- do.call(cbind, list(...)) - if (isTRUE(na.rm)) res <- res[!is.na(res), , drop = FALSE] - nrow(unique(res)) + res <- c(...) + if (is.list(res)) { + return(nrow(unique(as.data.frame(res, stringsAsFactors = FALSE)))) + } + if (isTRUE(na.rm)) res <- res[!is.na(res)] + length(unique(res)) } -pm_nth <- function(x, n, order_by = NULL, default = pm_default_missing(x)) { - if (length(n) != 1 || !is.numeric(n)) stop("`n` must be a single integer.") - n <- trunc(n) - if (n == 0 || n > length(x) || n < -length(x)) return(default) - if (n < 0) n <- length(x) + n + 1 - if (is.null(order_by)) x[[n]] else x[[order(order_by)[[n]]]] +pm_na_if <- function(x, y) { + y_len <- length(y) + x_len <- length(x) + if (!(y_len %in% c(1L, x_len))) stop("`y` must be length ", x_len, " (same as `x`) or 1, not ", y_len) + x[x == y] <- NA + x } -pm_first <- function(x, order_by = NULL, default = pm_default_missing(x)) { - pm_nth(x, 1L, order_by = order_by, default = default) -} -pm_last <- function(x, order_by = NULL, default = pm_default_missing(x)) { - pm_nth(x, -1L, order_by = order_by, default = default) -} -pm_default_missing <- function(x) { - pm_default_missing.data.frame(x) -} -pm_default_missing.data.frame <- function(x) { - if (!is.object(x) && is.list(x)) NULL else x[NA_real_] -} -pm_default_missing.data.frame <- function(x) { - rep(NA, nrow(x)) +pm_near <- function(x, y, tol = .Machine$double.eps^0.5) { + abs(x - y) < tol } `%pm>%` <- function(lhs, rhs) { - rhs_call <- pm_insert_dot(substitute(rhs)) - eval(rhs_call, envir = list(`.` = lhs), enclos = parent.frame()) -} -pm_insert_dot <- function(expr) { - if (is.symbol(expr) || expr[[1]] == quote(`(`)) { - expr <- as.call(c(expr, quote(`.`))) - } else if (length(expr) == 1) { - expr <- as.call(c(expr[[1]], quote(`.`))) - } else if ( - expr[[1]] != quote(`{`) && - !any(vapply(expr[-1], identical, quote(`.`), FUN.VALUE = logical(1))) && - !any(vapply(expr[-1], identical, quote(`!!!.`), FUN.VALUE = logical(1))) - ) { - expr <- as.call(c(expr[[1]], quote(`.`), as.list(expr[-1]))) - } - expr -} -pm_pivot_longer <- function( - data, - cols, - names_to = "name", - names_prefix = NULL, - names_sep = NULL, - names_pattern = NULL, - values_to = "value", - values_drop_na = FALSE, - ... -) { - if (missing(cols)) { - stop("`cols` must select at least one column.") - } - cols <- names(pm_eval_select_pos(data, substitute(cols))) - if (any(names_to %in% setdiff(names(data), cols))) { - stop( - paste0( - "Some values of the columns specified in 'names_to' are already present - as column names. Either use another value in `names_to` or pm_rename the - following columns: ", - paste(names_to[which(names_to %in% setdiff(names(data), cols))], sep = ", ") - ), - call. = FALSE) - } - if (length(cols) == 0L) { - stop("No columns found for reshaping data.", call. = FALSE) - } - data[["_Row"]] <- as.numeric(rownames(data)) - names_to_2 <- paste(names_to, collapse = "_") - long <- stats::reshape( - as.data.frame(data, stringsAsFactors = FALSE), - varying = cols, - idvar = "_Row", - v.names = values_to, - timevar = names_to_2, - direction = "long" - ) - long <- long[do.call(order, long[, c("_Row", names_to_2)]), ] - long[["_Row"]] <- NULL - long[[names_to_2]] <- cols[long[[names_to_2]]] - if (length(names_to) > 1) { - if (is.null(names_pattern)) { - for (i in seq_along(names_to)) { - new_vals <- unlist(lapply( - strsplit(unique(long[[names_to_2]]), names_sep, fixed = TRUE), - function(x) x[i] - )) - long[[names_to[i]]] <- new_vals - } - } else { - tmp <- regmatches( - unique(long[[names_to_2]]), - regexec(names_pattern, unique(long[[names_to_2]])) - ) - tmp <- as.data.frame(do.call(rbind, tmp), stringsAsFactors = FALSE) - names(tmp) <- c(names_to_2, names_to) - long <- cbind(long, tmp[match(long[[names_to_2]], tmp[[names_to_2]]), -1]) - } - long[[names_to_2]] <- NULL - } - long <- pm_relocate(.data = long, "value", .after = -1) - if (!is.null(names_prefix)) { - if (length(names_to) > 1) { - stop("`names_prefix` only works when `names_to` is of length 1.", call. = FALSE) - } - long[[names_to]] <- gsub(paste0("^", names_prefix), "", long[[names_to]]) - } - if (values_drop_na) { - long <- long[!is.na(long[, values_to]), ] - } - rownames(long) <- NULL - attributes(long)$reshapeLong <- NULL - long -} -pm_pivot_wider <- function( - data, - id_cols = NULL, - values_from = "Value", - names_from = "Name", - names_sep = "_", - names_prefix = "", - names_glue = NULL, - values_fill = NULL, - ... -) { - old_names <- names(data) - names_from <- names(pm_eval_select_pos(data, substitute(names_from))) - values_from <- names(pm_eval_select_pos(data, substitute(values_from))) - variable_attr <- lapply(data, attributes) - if (is.null(id_cols)) { - row_index <- do.call( - paste, - c(data[, !names(data) %in% c(values_from, names_from), drop = FALSE], sep = "_") - ) - if (length(row_index) == 0) row_index <- rep("", nrow(data)) - data[["_Rows"]] <- row_index - id_cols <- "_Rows" - } - current_colnames <- colnames(data) - current_colnames <- current_colnames[current_colnames != "_Rows"] - if (is.null(names_glue)) { - future_colnames <- unique(do.call(paste, c(data[, names_from, drop = FALSE], sep = names_sep))) - } else { - vars <- regmatches(names_glue, gregexpr("\\{\\K[^{}]+(?=\\})", names_glue, perl = TRUE))[[1]] - tmp_data <- unique(data[, vars]) - future_colnames <- unique(apply(tmp_data, 1, function(x) { - tmp_vars <- list() - for (i in seq_along(vars)) { - tmp_vars[[i]] <- x[vars[i]] - } - tmp_colname <- gsub("\\{\\K[^{}]+(?=\\})", "", names_glue, perl = TRUE) - tmp_colname <- gsub("\\{\\}", "%s", tmp_colname) - do.call(sprintf, c(fmt = tmp_colname, tmp_vars)) - })) - } - if (any(future_colnames %in% current_colnames)) { - stop( - paste0( - "Some values of the columns specified in 'names_from' are already present - as column names. Either use `name_prefix` or pm_rename the following columns: ", - paste(current_colnames[which(current_colnames %in% future_colnames)], sep = ", ") - ), - call. = FALSE - ) - } - data$new_time <- do.call(paste, c(data[, names_from, drop = FALSE], sep = "_")) - data[, names_from] <- NULL - wide <- stats::reshape( - as.data.frame(data, stringsAsFactors = FALSE), - v.names = values_from, - idvar = id_cols, - timevar = "new_time", - sep = names_sep, - direction = "wide" - ) - if ("_Rows" %in% names(wide)) wide[["_Rows"]] <- NULL - rownames(wide) <- NULL - if (length(values_from) == 1) { - to_rename <- which(startsWith(names(wide), paste0(values_from, names_sep))) - names(wide)[to_rename] <- future_colnames - } - if (length(values_from) > 1) { - for (i in values_from) { - tmp1 <- wide[, which(!startsWith(names(wide), i))] - tmp2 <- wide[, which(startsWith(names(wide), i))] - wide <- cbind(tmp1, tmp2) - } - } - new_cols <- setdiff(names(wide), old_names) - names(wide)[which(names(wide) %in% new_cols)] <- paste0(names_prefix, new_cols) - if (!is.null(values_fill)) { - if (length(values_fill) == 1) { - if (is.numeric(wide[[new_cols[1]]])) { - if (!is.numeric(values_fill)) { - stop(paste0("`values_fill` must be of type numeric."), call. = FALSE) - } else { - for (i in new_cols) { - wide[[i]] <- pm_replace_na(wide[[i]], replace = values_fill) - } - } - } else if (is.character(wide[[new_cols[1]]])) { - if (!is.character(values_fill)) { - stop(paste0("`values_fill` must be of type character."), call. = FALSE) - } else { - for (i in new_cols) { - wide[[i]] <- pm_replace_na(wide[[i]], replace = values_fill) - } - } - } else if (is.factor(wide[[new_cols[1]]])) { - if (!is.factor(values_fill)) { - stop(paste0("`values_fill` must be of type factor."), call. = FALSE) - } else { - for (i in new_cols) { - wide[[i]] <- pm_replace_na(wide[[i]], replace = values_fill) - } - } - } - } else { - stop("`values_fill` must be of length 1.", call. = FALSE) - } - } - attributes(wide)$reshapeWide <- NULL - for (i in colnames(wide)) { - attributes(wide[[i]]) <- variable_attr[[i]] - } - wide + lhs <- substitute(lhs) + rhs <- substitute(rhs) + eval(as.call(c(rhs[[1L]], lhs, as.list(rhs[-1L]))), envir = parent.frame()) } pm_pull <- function(.data, var = -1) { - var_list <- as.list(seq_along(.data)) - names(var_list) <- names(.data) - .var <- eval(substitute(var), var_list) - if (.var < 0L) .var <- length(var_list) + .var + 1L - .data[[.var]] + var_deparse <- pm_deparse_var(var) + col_names <- colnames(.data) + if (!(var_deparse %in% col_names) & grepl("^[[:digit:]]+L|[[:digit:]]", var_deparse)) { + var <- as.integer(gsub("L", "", var_deparse)) + var <- pm_if_else(var < 1L, rev(col_names)[abs(var)], col_names[var]) + } else if (var_deparse %in% col_names) { + var <- var_deparse + } + .data[, var, drop = TRUE] +} +pm_set_names <- function(object = nm, nm) { + names(object) <- nm + object +} + +pm_vec_head <- function(x, pm_n = 6L, ...) { + stopifnot(length(pm_n) == 1L) + pm_n <- if (pm_n < 0L) max(length(x) + pm_n, 0L) else min(pm_n, length(x)) + x[seq_len(pm_n)] } pm_relocate <- function(.data, ..., .before = NULL, .after = NULL) { - pm_relocate.data.frame(.data, ..., .before = NULL, .after = NULL) -} -pm_relocate.data.frame <- function(.data, ..., .before = NULL, .after = NULL) { + pm_check_is_dataframe(.data) data_names <- colnames(.data) col_pos <- pm_select_positions(.data, ...) - if (!missing(.before) && !is.null(.before)) .before <- colnames(.data)[pm_eval_select_pos(.data, substitute(.before))] - if (!missing(.after) && !is.null(.after)) .after <- colnames(.data)[pm_eval_select_pos(.data, substitute(.after))] + + .before <- pm_deparse_var(.before) + .after <- pm_deparse_var(.after) has_before <- !is.null(.before) has_after <- !is.null(.after) + if (has_before && has_after) { stop("You must supply only one of `.before` and `.after`") } else if (has_before) { - where <- min(match(.before, data_names)) - col_pos <- c(setdiff(col_pos, where), where) + pm_where <- min(match(.before, data_names)) + col_pos <- c(setdiff(col_pos, pm_where), pm_where) } else if (has_after) { - where <- max(match(.after, data_names)) - col_pos <- c(where, setdiff(col_pos, where)) + pm_where <- max(match(.after, data_names)) + col_pos <- c(pm_where, setdiff(col_pos, pm_where)) } else { - where <- 1L - col_pos <- union(col_pos, where) + pm_where <- 1L + col_pos <- union(col_pos, pm_where) } - lhs <- setdiff(seq(1L, where - 1L), col_pos) - rhs <- setdiff(seq(where + 1L, ncol(.data)), col_pos) + lhs <- setdiff(seq(1L, pm_where - 1L), col_pos) + rhs <- setdiff(seq(pm_where + 1L, ncol(.data)), col_pos) col_pos <- unique(c(lhs, col_pos, rhs)) col_pos <- col_pos[col_pos <= length(data_names)] + res <- .data[col_pos] - if (pm_has_groups(.data)) res <- pm_groups_set(res, pm_group_vars(.data)) + if (pm_has_groups(.data)) res <- pm_set_groups(res, pm_get_groups(.data)) res } pm_rename <- function(.data, ...) { - pm_rename.data.frame(.data, ...) -} -pm_rename.data.frame <- function(.data, ...) { - new_names <- names(pm_dotdotdot(...)) + pm_check_is_dataframe(.data) + new_names <- names(pm_deparse_dots(...)) if (length(new_names) == 0L) { warning("You didn't give any new names") return(.data) @@ -1293,31 +671,88 @@ pm_rename.data.frame <- function(.data, ...) { colnames(.data)[col_pos] <- new_names .data } -pm_rename_with <- function(.data, .fn, .cols = everything(), ...) { - pm_rename_with.data.frame(.data, .fn, .cols = everything(), ...) -} -pm_rename_with.data.frame <- function(.data, .fn, .cols = everything(), ...) { + +pm_rename_with <- function(.data, .fn, .cols = pm_everything(), ...) { if (!is.function(.fn)) stop("`", .fn, "` is not a valid function") - grouped <- pm_is.grouped_df(.data) + grouped <- inherits(.data, "grouped_data") if (grouped) grp_pos <- which(colnames(.data) %in% pm_group_vars(.data)) - col_pos <- pm_eval_select_pos(.data = .data, .pm_group_pos = TRUE, .cols = substitute(.cols)) + col_pos <- eval(substitute(pm_select_positions(.data, .cols))) cols <- colnames(.data)[col_pos] new_cols <- .fn(cols, ...) if (any(duplicated(new_cols))) { stop("New names must be unique however `", deparse(substitute(.fn)), "` returns duplicate column names") } colnames(.data)[col_pos] <- new_cols - if (grouped) .data <- pm_groups_set(.data, colnames(.data)[grp_pos]) + if (grouped) .data <- pm_set_groups(.data, colnames(.data)[grp_pos]) .data } +pm_replace_with <- function(x, i, val, arg_name) { + if (is.null(val)) { + return(x) + } + pm_check_length(val, x, arg_name) + pm_check_type(val, x, arg_name) + pm_check_class(val, x, arg_name) + i[is.na(i)] <- FALSE + if (length(val) == 1L) { + x[i] <- val + } else { + x[i] <- val[i] + } + x +} + +pm_check_length <- function(x, y, arg_name) { + length_x <- length(x) + length_y <- length(y) + if (all(length_x %in% c(1L, length_y))) { + return() + } + if (length_y == 1) { + stop(arg_name, " must be length 1, not ", paste(length_x, sep = ", ")) + } else { + stop(arg_name, " must be length ", length_y, " or 1, not ", length_x) + } +} + +pm_check_type <- function(x, y, arg_name) { + x_type <- typeof(x) + y_type <- typeof(y) + if (identical(x_type, y_type)) { + return() + } + stop(arg_name, " must be `", y_type, "`, not `", x_type, "`") +} + +pm_check_class <- function(x, y, arg_name) { + if (!is.object(x)) { + return() + } + exp_classes <- class(y) + out_classes <- class(x) + if (identical(out_classes, exp_classes)) { + return() + } + stop(arg_name, " must have class `", exp_classes, "`, not class `", out_classes, "`") +} +pm_rownames_to_column <- function(.data, var = "rowname") { + pm_check_is_dataframe(.data) + col_names <- colnames(.data) + if (var %in% col_names) stop("Column `", var, "` already exists in `.data`") + .data[, var] <- rownames(.data) + rownames(.data) <- NULL + .data[, c(var, setdiff(col_names, var))] +} pm_starts_with <- function(match, ignore.case = TRUE, vars = pm_peek_vars()) { grep(pattern = paste0("^", paste0(match, collapse = "|^")), x = vars, ignore.case = ignore.case) } + pm_ends_with <- function(match, ignore.case = TRUE, vars = pm_peek_vars()) { grep(pattern = paste0(paste0(match, collapse = "$|"), "$"), x = vars, ignore.case = ignore.case) } + pm_contains <- function(match, ignore.case = TRUE, vars = pm_peek_vars()) { - matches <- lapply( + pm_matches <- lapply( match, function(x) { if (isTRUE(ignore.case)) { @@ -1331,11 +766,13 @@ pm_contains <- function(match, ignore.case = TRUE, vars = pm_peek_vars()) { } } ) - unique(unlist(matches)) + unique(unlist(pm_matches)) } + pm_matches <- function(match, ignore.case = TRUE, perl = FALSE, vars = pm_peek_vars()) { grep(pattern = match, x = vars, ignore.case = ignore.case, perl = perl) } + pm_num_range <- function(prefix, range, width = NULL, vars = pm_peek_vars()) { if (!is.null(width)) { range <- sprintf(paste0("%0", width, "d"), range) @@ -1348,6 +785,7 @@ pm_num_range <- function(prefix, range, width = NULL, vars = pm_peek_vars()) { x[!is.na(x)] } } + pm_all_of <- function(x, vars = pm_peek_vars()) { x_ <- !x %in% vars if (any(x_)) { @@ -1361,35 +799,36 @@ pm_all_of <- function(x, vars = pm_peek_vars()) { which(vars %in% x) } } + pm_any_of <- function(x, vars = pm_peek_vars()) { which(vars %in% x) } + pm_everything <- function(vars = pm_peek_vars()) { seq_along(vars) } + pm_last_col <- function(offset = 0L, vars = pm_peek_vars()) { if (!pm_is_wholenumber(offset)) stop("`offset` must be an integer") - n <- length(vars) - if (offset && n <= offset) { + pm_n <- length(vars) + if (offset && pm_n <= offset) { stop("`offset` must be smaller than the number of `vars`") - } else if (n == 0) { - stop("Can't select last column when `vars` is empty") + } else if (pm_n == 0) { + stop("Can't pm_select last column when `vars` is empty") } else { - n - offset + pm_n - offset } } + pm_peek_vars <- function() { pm_select_env$get_colnames() } -pm_select_positions <- function(.data, ..., .pm_group_pos = FALSE) { +pm_select_positions <- function(.data, ..., .group_pos = FALSE) { cols <- pm_dotdotdot(...) - cols <- cols[!vapply(cols, is.null, FALSE)] - if (length(cols) == 0L) return(integer(0)) pm_select_env$setup(.data = .data, calling_frame = parent.frame(2L)) on.exit(pm_select_env$clean(), add = TRUE) data_names <- pm_select_env$get_colnames() pos <- unlist(lapply(cols, pm_eval_expr)) - if (length(pos) > 0) pos <- if (pos[1] >= 0) pos[pos >= 0] else pos[pos < 0] col_len <- pm_select_env$get_ncol() if (any(pos > col_len)) { oor <- pos[which(pos > col_len)] @@ -1399,45 +838,23 @@ pm_select_positions <- function(.data, ..., .pm_group_pos = FALSE) { if (oor_len > 1) " don't " else " doesn't ", "exist. There are only ", col_len, " columns." ) } - if (isTRUE(.pm_group_pos)) { - pm_groups <- pm_group_vars(.data) + if (isTRUE(.group_pos)) { + pm_groups <- pm_get_groups(.data) missing_groups <- !(pm_groups %in% cols) if (any(missing_groups)) { sel_missing <- pm_groups[missing_groups] + message("Adding missing grouping variables: `", paste(sel_missing, collapse = "`, `"), "`") readd <- match(sel_missing, data_names) - readd <- readd[!(readd %in% pos)] - if (length(readd) > 0L) { - message("Adding missing grouping variables: `", paste(sel_missing, collapse = "`, `"), "`") - if (length(names(cols)) > 0L) names(readd) <- data_names[readd] - pos <- c(readd, pos) - } + if (length(names(cols)) > 0L) names(readd) <- data_names[readd] + pos <- c(readd, pos) } } - if (length(data_names[pos]) != 0L) { - nm_pos <- names(pos) - if (any(nm_pos == "")) { - names(pos)[which(nm_pos == "")] <- data_names[pos[which(nm_pos == "")]] - } - if (is.null(nm_pos)) { - names(pos) <- data_names[abs(pos)] - } - } - uniques <- pos[!duplicated(pos)] - res_nms <- data_names[uniques] - res <- match(res_nms, data_names) - if (length(res) != 0L) { - res <- if (length(setdiff(names(uniques), data_names)) > 0L) { - if (all(uniques > 0L)) structure(res, .Names = names(uniques)) else structure(res, .Names = res_nms) - } else { - structure(res, .Names = res_nms) - } - } - res + pos[!duplicated(pos)] } + pm_eval_expr <- function(x) { type <- typeof(x) - switch( - type, + switch(type, "integer" = x, "double" = as.integer(x), "character" = pm_select_char(x), @@ -1446,17 +863,19 @@ pm_eval_expr <- function(x) { stop("Expressions of type <", typeof(x), "> cannot be evaluated for use when subsetting.") ) } + pm_select_char <- function(expr) { pos <- match(expr, pm_select_env$get_colnames()) - if (any(is.na(pos))) stop("The following columns do not exist:\n ", paste(expr, collapse = "\n ")) + if (is.na(pos)) stop("Column `", expr, "` does not exist") pos } + pm_select_symbol <- function(expr) { expr_name <- as.character(expr) - if (grepl("^is\\.", expr_name) && is.function(expr)) { + if (grepl("^is\\.", expr_name) && pm_is_function(expr)) { stop( - "Predicate functions must be wrapped in `where()`.\n\n", - sprintf(" data %%pm>%% select(where(%s))", expr_name) + "Predicate functions must be wrapped in `pm_where()`.\n\n", + sprintf(" data %%pm>%% pm_select(pm_where(%s))", expr_name) ) } res <- try(pm_select_char(as.character(expr)), silent = TRUE) @@ -1468,34 +887,25 @@ pm_select_symbol <- function(expr) { } res } + pm_eval_call <- function(x) { type <- as.character(x[[1]]) - if (length(type) > 1L) { - type <- "pm_context" - } - switch( - type, + switch(type, `:` = pm_select_seq(x), `!` = pm_select_negate(x), `-` = pm_select_minus(x), `c` = pm_select_c(x), `(` = pm_select_bracket(x), - `&` = pm_select_and(x), - pm_select_context(x) + pm_select_pm_context(x) ) } -pm_select_and <- function(expr) { - exprs <- as.list(expr)[-1] - res <- do.call(c, lapply(exprs, pm_eval_expr)) - if (all(res > 0) || all(res < 0)) return(unique(res)) - res <- res[!(duplicated(abs(res)) | duplicated(abs(res), fromLast = TRUE))] - res[res > 0] -} + pm_select_seq <- function(expr) { x <- pm_eval_expr(expr[[2]]) y <- pm_eval_expr(expr[[3]]) x:y } + pm_select_negate <- function(expr) { x <- if (pm_is_negated_colon(expr)) { expr <- call(":", expr[[2]][[2]], expr[[2]][[3]][[2]]) @@ -1505,24 +915,30 @@ pm_select_negate <- function(expr) { } x * -1L } + pm_is_negated_colon <- function(expr) { expr[[1]] == "!" && length(expr[[2]]) > 1L && expr[[2]][[1]] == ":" && expr[[2]][[3]][[1]] == "!" } + pm_select_minus <- function(expr) { x <- pm_eval_expr(expr[[2]]) x * -1L } + pm_select_c <- function(expr) { lst_expr <- as.list(expr) lst_expr[[1]] <- NULL unlist(lapply(lst_expr, pm_eval_expr)) } + pm_select_bracket <- function(expr) { pm_eval_expr(expr[[2]]) } -pm_select_context <- function(expr) { + +pm_select_pm_context <- function(expr) { eval(expr, envir = pm_select_env$.data) } + pm_select_env <- new.env() pm_select_env$setup <- function(.data, calling_frame) { pm_select_env$.data <- .data @@ -1534,123 +950,144 @@ pm_select_env$clean <- function() { pm_select_env$get_colnames <- function() colnames(pm_select_env$.data) pm_select_env$get_nrow <- function() nrow(pm_select_env$.data) pm_select_env$get_ncol <- function() ncol(pm_select_env$.data) -pm_eval_select_pos <- function(.data, .cols, .pm_group_pos = FALSE) { - do.call(pm_select_positions, list(.data = .data, .cols, .pm_group_pos = .pm_group_pos)) -} + pm_select <- function(.data, ...) { - col_pos <- pm_select_positions(.data, ..., .pm_group_pos = TRUE) + col_pos <- pm_select_positions(.data, ..., .group_pos = TRUE) + map_names <- names(col_pos) + map_names_length <- nchar(map_names) + if (any(map_names_length == 0L)) { + no_new_names <- which(map_names_length == 0L) + map_names[no_new_names] <- colnames(.data)[no_new_names] + } res <- .data[, col_pos, drop = FALSE] - if (length(names(res)) != 0) colnames(res) <- names(col_pos) - if (pm_has_groups(.data)) res <- pm_groups_set(res, pm_group_vars(.data)) + if (!is.null(map_names) && all(col_pos > 0L)) colnames(res) <- map_names + if (pm_has_groups(.data)) res <- pm_set_groups(res, pm_get_groups(.data)) res } -pm_summarise <- function(.data, ..., .pm_groups = NULL) { - if ("grouped_df" %in% class(.data)) pm_summarise.grouped_df(.data, ..., .pm_groups = NULL) else pm_summarise.data.frame(.data, ..., .pm_groups = NULL) +pm_summarise <- function(.data, ...) { + pm_check_is_dataframe(.data) + if ("grouped_data" %in% class(.data)) { + pm_summarise.grouped_data(.data, ...) + } else { + pm_summarise.default(.data, ...) + } } -pm_summarise.data.frame <- function(.data, ..., .pm_groups = NULL) { + +pm_summarise.default <- function(.data, ...) { fns <- pm_dotdotdot(...) pm_context$setup(.data) on.exit(pm_context$clean(), add = TRUE) - pm_groups_exist <- pm_context$is_grouped() + pm_groups_exist <- pm_has_groups(pm_context$.data) if (pm_groups_exist) { - group <- unique(pm_context$get_columns(pm_group_vars(pm_context$.data))) + group <- unique(pm_context$.data[, pm_get_groups(pm_context$.data), drop = FALSE]) } - if (pm_is_empty_list(fns)) { - if (pm_groups_exist) return(group) else return(data.frame()) - } - res <- vector(mode = "list", length = length(fns)) - pm_eval_env <- c(as.list(pm_context$.data), vector(mode = "list", length = length(fns))) - new_pos <- seq(length(pm_context$.data) + 1L, length(pm_eval_env), 1L) - for (i in seq_along(fns)) { - pm_eval_env[[new_pos[i]]] <- do.call(with, list(pm_eval_env, fns[[i]])) - nms <- if (!pm_is_named(pm_eval_env[[new_pos[i]]])) { - if (!is.null(names(fns)[[i]])) names(fns)[[i]] else deparse(fns[[i]]) - } else { - NULL + res <- lapply( + fns, + function(x) { + x_res <- do.call(with, list(pm_context$.data, x)) + if (is.list(x_res)) I(x_res) else x_res } - if (!is.null(nms)) names(pm_eval_env)[[new_pos[i]]] <- nms - res[[i]] <- pm_build_data_frame(pm_eval_env[[new_pos[i]]], nms = nms) - } - res <- do.call(cbind, res) + ) + res <- as.data.frame(res) + fn_names <- names(fns) + colnames(res) <- if (is.null(fn_names)) fns else fn_names if (pm_groups_exist) res <- cbind(group, res, row.names = NULL) res } -pm_summarise.grouped_df <- function(.data, ..., .pm_groups = NULL) { - if (!is.null(.pm_groups)) { - .pm_groups <- match.arg(arg = .pm_groups, choices = c("drop", "drop_last", "keep"), several.ok = FALSE) - } - pm_groups <- pm_group_vars(.data) + +pm_summarise.grouped_data <- function(.data, ...) { + pm_groups <- pm_get_groups(.data) res <- pm_apply_grouped_function("pm_summarise", .data, drop = TRUE, ...) - res <- res[pm_arrange_rows(res, pm_as_symbols(pm_groups)), , drop = FALSE] - verbose <- pm_summarise_verbose(.pm_groups) - if (is.null(.pm_groups)) { - all_one <- as.data.frame(table(res[, pm_groups])) - all_one <- all_one[all_one$Freq != 0, ] - .pm_groups <- if (all(all_one$Freq == 1)) "drop_last" else "keep" - } - if (.pm_groups == "drop_last") { - n <- length(pm_groups) - if (n > 1) { - if (verbose) pm_summarise_inform(pm_groups[-n]) - res <- pm_groups_set(res, pm_groups[-n], pm_group_by_drop_default(.data)) - } - } else if (.pm_groups == "keep") { - if (verbose) pm_summarise_inform(pm_groups) - res <- pm_groups_set(res, pm_groups, pm_group_by_drop_default(.data)) - } else if (.pm_groups == "drop") { - attr(res, "pm_groups") <- NULL - } + res <- res[do.call(order, lapply(pm_groups, function(x) res[, x])), ] rownames(res) <- NULL res } -pm_summarise_inform <- function(new_groups) { - message(sprintf( - "`pm_summarise()` has grouped output by %s. You can override using the `.pm_groups` argument.", - paste0("'", new_groups, "'", collapse = ", ") - )) -} -pm_summarise_verbose <- function(.pm_groups) { - is.null(.pm_groups) && - !identical(getOption("poorman.summarise.inform"), FALSE) -} + pm_transmute <- function(.data, ...) { - if ("grouped_df" %in% class(.data)) pm_transmute.grouped_df(.data, ...) else pm_transmute.data.frame(.data, ...) + pm_check_is_dataframe(.data) + if ("grouped_data" %in% class(.data)) { + pm_transmute.grouped_data(.data, ...) + } else { + pm_transmute.default(.data, ...) + } } -pm_transmute.data.frame <- function(.data, ...) { - pm_mutate(.data, ..., .keep = "none") + +pm_transmute.default <- function(.data, ...) { + conditions <- pm_deparse_dots(...) + mutated <- pm_mutate(.data, ...) + mutated[, names(conditions), drop = FALSE] } -pm_transmute.grouped_df <- function(.data, ...) { + +pm_transmute.grouped_data <- function(.data, ...) { rows <- rownames(.data) res <- pm_apply_grouped_function("pm_transmute", .data, drop = TRUE, ...) res[rows, ] } -pm_ungroup <- function(x, ...) { - if ("grouped_df" %in% class(x)) pm_ungroup.grouped_df(x, ...) else pm_ungroup.data.frame(x, ...) -} -pm_ungroup.data.frame <- function(x, ...) { - rm_groups <- pm_deparse_dots(...) - pm_groups <- pm_group_vars(x) - if (length(rm_groups) == 0L) rm_groups <- pm_groups - x <- pm_groups_set(x, pm_groups[!(pm_groups %in% rm_groups)]) - if (length(attr(x, "pm_groups")) == 0L) { - attr(x, "pm_groups") <- NULL - class(x) <- class(x)[!(class(x) %in% "grouped_df")] +pm_dotdotdot <- function(..., .impute_names = FALSE) { + dots <- eval(substitute(alist(...))) + if (isTRUE(.impute_names)) { + pm_deparse_dots <- lapply(dots, deparse) + names_dots <- names(dots) + unnamed <- if (is.null(names_dots)) rep(TRUE, length(dots)) else nchar(names_dots) == 0L + names(dots)[unnamed] <- pm_deparse_dots[unnamed] } - x + dots } -pm_ungroup.grouped_df <- function(x, ...) { - pm_ungroup.data.frame(...) + +pm_deparse_dots <- function(...) { + vapply(substitute(...()), deparse, NA_character_) } + +pm_deparse_var <- function(var, frame = if (is.null(pm_eval_env$env)) parent.frame() else pm_eval_env$env) { + sub_var <- eval(substitute(substitute(var)), frame) + if (is.symbol(sub_var)) var <- as.character(sub_var) + var +} + pm_check_is_dataframe <- function(.data) { parent_fn <- all.names(sys.call(-1L), max.names = 1L) if (!is.data.frame(.data)) stop(parent_fn, " must be given a data.frame") invisible() } + +pm_is_wholenumber <- function(x) { + x %% 1L == 0L +} + pm_seq2 <- function(from, to) { if (length(from) != 1) stop("`from` must be length one") if (length(to) != 1) stop("`to` must be length one") if (from > to) integer() else seq.int(from, to) } + +pm_is_function <- function(x, frame) { + res <- tryCatch( + is.function(x), + warning = function(w) FALSE, + error = function(e) FALSE + ) + if (isTRUE(res)) { + return(res) + } + res <- tryCatch( + is.function(eval(x)), + warning = function(w) FALSE, + error = function(e) FALSE + ) + if (isTRUE(res)) { + return(res) + } + res <- tryCatch( + is.function(eval(as.symbol(deparse(substitute(x))))), + warning = function(w) FALSE, + error = function(e) FALSE + ) + if (isTRUE(res)) { + return(res) + } + FALSE +} + pm_collapse_to_sentence <- function(x) { len_x <- length(x) if (len_x == 0L) { @@ -1663,28 +1100,8 @@ pm_collapse_to_sentence <- function(x) { paste(paste(x[1:(len_x - 1)], collapse = ", "), x[len_x], sep = " and ") } } -pm_build_data_frame <- function(x, nms = NULL) { - res <- if (is.atomic(x)) { - data.frame(x) - } else if (is.list(x) && !is.data.frame(x)) { - structure(list(x = x), class = "data.frame", row.names = c(NA, -1L)) - } else if (is.data.frame(x)) { - x - } - if (!is.null(nms)) colnames(res) <- nms - res -} -pm_is_nested <- function(lst) vapply(lst, function(x) inherits(x[1L], "list"), FALSE) -pm_squash <- function(lst) { - do.call(c, lapply(lst, function(x) if (is.list(x) && !is.data.frame(x)) pm_squash(x) else list(x))) -} -pm_flatten <- function(lst) { - nested <- pm_is_nested(lst) - res <- c(lst[!nested], unlist(lst[nested], recursive = FALSE)) - if (sum(nested)) Recall(res) else return(res) -} pm_where <- function(fn) { - if (!is.function(fn)) { + if (!pm_is_function(fn)) { stop(pm_deparse_var(fn), " is not a valid predicate function.") } preds <- unlist(lapply( @@ -1694,8 +1111,50 @@ pm_where <- function(fn) { }, fn )) - if (!is.logical(preds)) stop("`where()` must be used with functions that return `TRUE` or `FALSE`.") + if (!is.logical(preds)) stop("`pm_where()` must be used with functions that return `TRUE` or `FALSE`.") data_cols <- pm_select_env$get_colnames() cols <- data_cols[preds] which(data_cols %in% cols) } + +pm_cume_dist <- function(x) { + rank(x, ties.method = "max", na.last = "keep") / sum(!is.na(x)) +} + +pm_dense_rank <- function(x) { + match(x, sort(unique(x))) +} + +pm_min_rank <- function(x) { + rank(x, ties.method = "min", na.last = "keep") +} + +pm_ntile <- function(x = pm_row_number(), pm_n) { + if (!missing(x)) x <- pm_row_number(x) + len <- length(x) - sum(is.na(x)) + pm_n <- as.integer(floor(pm_n)) + if (len == 0L) { + rep(NA_integer_, length(x)) + } else { + pm_n_larger <- as.integer(len %% pm_n) + pm_n_smaller <- as.integer(pm_n - pm_n_larger) + size <- len / pm_n + larger_size <- as.integer(ceiling(size)) + smaller_size <- as.integer(floor(size)) + larger_threshold <- larger_size * pm_n_larger + bins <- pm_if_else( + x <= larger_threshold, + (x + (larger_size - 1L)) / larger_size, + (x + (-larger_threshold + smaller_size - 1L)) / smaller_size + pm_n_larger + ) + as.integer(floor(bins)) + } +} + +pm_percent_rank <- function(x) { + (pm_min_rank(x) - 1) / (sum(!is.na(x)) - 1) +} + +pm_row_number <- function(x) { + if (missing(x)) seq_len(pm_n()) else rank(x, ties.method = "first", na.last = "keep") +} diff --git a/R/ab_property.R b/R/ab_property.R index f0dc192f..a90b781f 100755 --- a/R/ab_property.R +++ b/R/ab_property.R @@ -113,7 +113,8 @@ #' set_ab_names(property = "atc") #' #' example_isolates %>% -#' set_ab_names(where(is.sir)) +#' set_ab_names(where(is.sir)) %>% +#' colnames() #' #' example_isolates %>% #' set_ab_names(NIT:VAN) %>% @@ -334,7 +335,7 @@ ab_url <- function(x, open = FALSE, ...) { ab_property <- function(x, property = "name", language = get_AMR_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(property, is_in = colnames(AMR::antibiotics), has_length = 1) - language <- validate_language(language) + meet_criteria(language, is_in = c(LANGUAGES_SUPPORTED, ""), has_length = 1, allow_NULL = TRUE, allow_NA = TRUE) translate_into_language(ab_validate(x = x, property = property, ...), language = language) } @@ -359,7 +360,7 @@ set_ab_names <- function(data, ..., property = "name", language = get_AMR_locale if (is.data.frame(data)) { if (tryCatch(length(c(...)) > 1, error = function(e) TRUE)) { - df <- tryCatch(suppressWarnings(select(data, ...)), + df <- tryCatch(suppressWarnings(pm_select(data, ...)), error = function(e) { data[, c(...), drop = FALSE] }) @@ -434,7 +435,7 @@ set_ab_names <- function(data, ..., property = "name", language = get_AMR_locale ab_validate <- function(x, property, ...) { if (tryCatch(all(x[!is.na(x)] %in% AMR_env$AB_lookup$ab), error = function(e) FALSE)) { - # # special case for ab_* functions where class is already 'ab' + # special case for ab_* functions where class is already 'ab' x <- AMR_env$AB_lookup[match(x, AMR_env$AB_lookup$ab), property, drop = TRUE] } else { # try to catch an error when inputting an invalid argument diff --git a/R/ab_selectors.R b/R/ab_selectors.R index 37b86157..5ef59593 100755 --- a/R/ab_selectors.R +++ b/R/ab_selectors.R @@ -784,14 +784,14 @@ is_all <- function(el1) { find_ab_group <- function(ab_class_args) { ab_class_args <- gsub("[^a-zA-Z0-9]", ".*", ab_class_args) - AMR_env$AB_lookup %>% - filter(group %like% ab_class_args | + AMR_env$AB_lookup %pm>% + subset(group %like% ab_class_args | atc_group1 %like% ab_class_args | - atc_group2 %like% ab_class_args) %>% - pull(group) %>% - unique() %>% - tolower() %>% - sort() %>% + atc_group2 %like% ab_class_args) %pm>% + pm_pull(group) %pm>% + unique() %pm>% + tolower() %pm>% + sort() %pm>% paste(collapse = "/") } diff --git a/R/antibiogram.R b/R/antibiogram.R.bak similarity index 100% rename from R/antibiogram.R rename to R/antibiogram.R.bak diff --git a/R/atc_online.R b/R/atc_online.R index af645c5a..e2350ac2 100755 --- a/R/atc_online.R +++ b/R/atc_online.R @@ -139,9 +139,9 @@ atc_online_property <- function(atc_code, if (property == "groups") { out <- tryCatch( - read_html(atc_url) %>% - html_node("#content") %>% - html_children() %>% + read_html(atc_url) %pm>% + html_node("#content") %pm>% + html_children() %pm>% html_node("a"), error = function(e) NULL ) @@ -151,9 +151,9 @@ atc_online_property <- function(atc_code, } # get URLS of items - hrefs <- out %>% html_attr("href") + hrefs <- out %pm>% html_attr("href") # get text of items - texts <- out %>% html_text() + texts <- out %pm>% html_text() # select only text items where URL like "code=" texts <- texts[grepl("?code=", tolower(hrefs), fixed = TRUE)] # last one is antibiotics, skip it @@ -161,9 +161,9 @@ atc_online_property <- function(atc_code, returnvalue <- c(list(texts), returnvalue) } else { out <- tryCatch( - read_html(atc_url) %>% - html_nodes("table") %>% - html_table(header = TRUE) %>% + read_html(atc_url) %pm>% + html_nodes("table") %pm>% + html_table(header = TRUE) %pm>% as.data.frame(stringsAsFactors = FALSE), error = function(e) NULL ) diff --git a/R/av_property.R b/R/av_property.R index b190e3dd..7c7a9067 100755 --- a/R/av_property.R +++ b/R/av_property.R @@ -252,7 +252,7 @@ av_url <- function(x, open = FALSE, ...) { av_property <- function(x, property = "name", language = get_AMR_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(property, is_in = colnames(AMR::antivirals), has_length = 1) - language <- validate_language(language) + meet_criteria(language, is_in = c(LANGUAGES_SUPPORTED, ""), has_length = 1, allow_NULL = TRUE, allow_NA = TRUE) translate_into_language(av_validate(x = x, property = property, ...), language = language) } diff --git a/R/bug_drug_combinations.R b/R/bug_drug_combinations.R index f54c7957..61bc1b1c 100755 --- a/R/bug_drug_combinations.R +++ b/R/bug_drug_combinations.R @@ -45,11 +45,8 @@ #' @return The function [bug_drug_combinations()] returns a [data.frame] with columns "mo", "ab", "S", "I", "R" and "total". #' @examples #' \donttest{ -#' #' # example_isolates is a data set available in the AMR package. -#' # run ?example_isolates for more info. -#' example_isolates -#' #' x <- bug_drug_combinations(example_isolates) +#' head(x) #' format(x, translate_ab = "name (atc)") #' #' # Use FUN to change to transformation of microorganism codes @@ -82,27 +79,7 @@ bug_drug_combinations <- function(x, } else { stop_ifnot(col_mo %in% colnames(x), "column '", col_mo, "' (`col_mo`) not found") } - - # use dplyr and tidyr if they are available, they are much faster! - if (identical(pivot_longer, import_fn("pivot_longer", "tidyr", error_on_fail = FALSE))) { - out <- x %>% - ungroup() %>% - mutate(mo = FUN(ungroup(x)[, col_mo, drop = TRUE], ...)) %>% - pivot_longer(where(is.sir), names_to = "ab") %>% - group_by(across(c(group_vars(x), mo, ab))) %>% - summarise(S = sum(value == "S", na.rm = TRUE), - I = sum(value == "I", na.rm = TRUE), - R = sum(value == "R", na.rm = TRUE), - .groups = "drop") %>% - mutate(total = S + I + R) - out <- out %>% arrange(mo, ab) - return(structure(out, - class = c("bug_drug_combinations", - ifelse(is_null_or_grouped_tbl(x), "grouped", character(0)), - class(out)))) - } - - # no dplyr or tidyr available, so use base R + x.bak <- x x <- as.data.frame(x, stringsAsFactors = FALSE) x[, col_mo] <- FUN(x[, col_mo, drop = TRUE], ...) @@ -173,7 +150,7 @@ bug_drug_combinations <- function(x, res <- do.call(rbind, unname(lapply(grouped, fn, ...))) if (any(groups %in% colnames(res))) { class(res) <- c("grouped_data", class(res)) - res <- pm_groups_set(res, groups[groups %in% colnames(res)]) + res <- pm_set_groups(res, groups[groups %in% colnames(res)]) } res } @@ -184,7 +161,6 @@ bug_drug_combinations <- function(x, out <- run_it(x) } rownames(out) <- NULL - out <- out %>% arrange(mo, ab) out <- as_original_data_class(out, class(x.bak)) # will remove tibble groups structure(out, class = c("bug_drug_combinations", ifelse(data_has_groups, "grouped", character(0)), class(out))) } @@ -200,12 +176,12 @@ format.bug_drug_combinations <- function(x, add_ab_group = TRUE, remove_intrinsic_resistant = FALSE, decimal.mark = getOption("OutDec"), - big.mark = ifelse(decimal.mark == ",", " ", ","), + big.mark = ifelse(decimal.mark == ",", ".", ","), ...) { meet_criteria(x, allow_class = "data.frame") meet_criteria(translate_ab, allow_class = c("character", "logical"), has_length = 1, allow_NA = TRUE) language <- validate_language(language) - meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE) + meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE) meet_criteria(combine_SI, allow_class = "logical", has_length = 1) meet_criteria(add_ab_group, allow_class = "logical", has_length = 1) meet_criteria(remove_intrinsic_resistant, allow_class = "logical", has_length = 1) @@ -270,38 +246,46 @@ format.bug_drug_combinations <- function(x, .data } - y <- x %>% - mutate( + create_var <- function(.data, ...) { + dots <- list(...) + for (i in seq_len(length(dots))) { + .data[, names(dots)[i]] <- dots[[i]] + } + .data + } + + y <- x %pm>% + create_var( ab = as.ab(x$ab), ab_txt = give_ab_name(ab = x$ab, format = translate_ab, language = language) - ) %>% - group_by(ab, ab_txt, mo) %>% - summarise( + ) %pm>% + pm_group_by(ab, ab_txt, mo) %pm>% + pm_summarise( isolates = sum(isolates, na.rm = TRUE), total = sum(total, na.rm = TRUE) - ) %>% - ungroup() + ) %pm>% + pm_ungroup() - y <- y %>% - mutate(txt = paste0( + y <- y %pm>% + create_var(txt = paste0( percentage(y$isolates / y$total, decimal.mark = decimal.mark, big.mark = big.mark), " (", trimws(format(y$isolates, big.mark = big.mark)), "/", trimws(format(y$total, big.mark = big.mark)), ")" - )) %>% - select(ab, ab_txt, mo, txt) %>% - arrange(mo) + )) %pm>% + pm_select(ab, ab_txt, mo, txt) %pm>% + pm_arrange(mo) # replace tidyr::pivot_wider() from here for (i in unique(y$mo)) { mo_group <- y[which(y$mo == i), c("ab", "txt"), drop = FALSE] colnames(mo_group) <- c("ab", i) rownames(mo_group) <- NULL - y <- y %>% - left_join(mo_group, by = "ab") + y <- y %pm>% + pm_left_join(mo_group, by = "ab") } - y <- y %>% - distinct(ab, .keep_all = TRUE) %>% - select(-mo, -txt) %>% + y <- y %pm>% + pm_distinct(ab, .keep_all = TRUE) %pm>% + pm_select(-mo, -txt) %pm>% # replace tidyr::pivot_wider() until here remove_NAs() @@ -309,21 +293,21 @@ format.bug_drug_combinations <- function(x, .data[, c("ab_group", "ab_txt", colnames(.data)[!colnames(.data) %in% c("ab_group", "ab_txt", "ab")]), drop = FALSE] } - y <- y %>% - mutate(ab_group = ab_group(y$ab, language = language)) %>% - select_ab_vars() %>% - arrange(ab_group, ab_txt) - y <- y %>% - mutate(ab_group = ifelse(y$ab_group != lag(y$ab_group) | is.na(lag(y$ab_group)), y$ab_group, "")) + y <- y %pm>% + create_var(ab_group = ab_group(y$ab, language = language)) %pm>% + select_ab_vars() %pm>% + pm_arrange(ab_group, ab_txt) + y <- y %pm>% + create_var(ab_group = ifelse(y$ab_group != pm_lag(y$ab_group) | is.na(pm_lag(y$ab_group)), y$ab_group, "")) if (add_ab_group == FALSE) { - y <- y %>% - select(-ab_group) %>% - rename("Drug" = ab_txt) + y <- y %pm>% + pm_select(-ab_group) %pm>% + pm_rename("Drug" = ab_txt) colnames(y)[1] <- translate_into_language(colnames(y)[1], language, only_unknown = FALSE) } else { - y <- y %>% - rename( + y <- y %pm>% + pm_rename( "Group" = ab_group, "Drug" = ab_txt ) diff --git a/R/data.R b/R/data.R index ab7b2f88..5edbc4ab 100755 --- a/R/data.R +++ b/R/data.R @@ -27,7 +27,7 @@ # how to conduct AMR data analysis: https://msberends.github.io/AMR/ # # ==================================================================== # -#' Data Sets with `r format(nrow(antibiotics) + nrow(antivirals), big.mark = " ")` Antimicrobial Drugs +#' Data Sets with `r format(nrow(antibiotics) + nrow(antivirals), big.mark = ",")` Antimicrobial Drugs #' #' Two data sets containing all antibiotics/antimycotics and antivirals. Use [as.ab()] or one of the [`ab_*`][ab_property()] functions to retrieve values from the [antibiotics] data set. Three identifiers are included in this data set: an antibiotic ID (`ab`, primarily used in this package) as defined by WHONET/EARS-Net, an ATC code (`atc`) as defined by the WHO, and a Compound ID (`cid`) as found in PubChem. Other properties in this data set are derived from one or more of these codes. Note that some drugs have multiple ATC codes. #' @format @@ -82,10 +82,10 @@ #' @rdname antibiotics "antivirals" -#' Data Set with `r format(nrow(microorganisms), big.mark = " ")` Microorganisms +#' Data Set with `r format(nrow(microorganisms), big.mark = ",")` Microorganisms #' #' A data set containing the full microbial taxonomy (**last updated: `r documentation_date(max(TAXONOMY_VERSION$GBIF$accessed_date, TAXONOMY_VERSION$LPSN$accessed_date))`**) of `r nr2char(length(unique(microorganisms$kingdom[!microorganisms$kingdom %like% "unknown"])))` kingdoms from the List of Prokaryotic names with Standing in Nomenclature (LPSN) and the Global Biodiversity Information Facility (GBIF). This data set is the backbone of this `AMR` package. MO codes can be looked up using [as.mo()]. -#' @format A [tibble][tibble::tibble] with `r format(nrow(microorganisms), big.mark = " ")` observations and `r ncol(microorganisms)` variables: +#' @format A [tibble][tibble::tibble] with `r format(nrow(microorganisms), big.mark = ",")` observations and `r ncol(microorganisms)` variables: #' - `mo`\cr ID of microorganism as used by this package #' - `fullname`\cr Full name, like `"Escherichia coli"`. For the taxonomic ranks genus, species and subspecies, this is the 'pasted' text of genus, species, and subspecies. For all taxonomic ranks higher than genus, this is the name of the taxon. #' - `status` \cr Status of the taxon, either `r vector_or(microorganisms$status)` @@ -150,10 +150,10 @@ #' microorganisms "microorganisms" -#' Data Set with `r format(nrow(microorganisms.codes), big.mark = " ")` Common Microorganism Codes +#' Data Set with `r format(nrow(microorganisms.codes), big.mark = ",")` Common Microorganism Codes #' #' A data set containing commonly used codes for microorganisms, from laboratory systems and WHONET. Define your own with [set_mo_source()]. They will all be searched when using [as.mo()] and consequently all the [`mo_*`][mo_property()] functions. -#' @format A [tibble][tibble::tibble] with `r format(nrow(microorganisms.codes), big.mark = " ")` observations and `r ncol(microorganisms.codes)` variables: +#' @format A [tibble][tibble::tibble] with `r format(nrow(microorganisms.codes), big.mark = ",")` observations and `r ncol(microorganisms.codes)` variables: #' - `code`\cr Commonly used code of a microorganism #' - `mo`\cr ID of the microorganism in the [microorganisms] data set #' @details @@ -163,10 +163,10 @@ #' microorganisms.codes "microorganisms.codes" -#' Data Set with `r format(nrow(example_isolates), big.mark = " ")` Example Isolates +#' Data Set with `r format(nrow(example_isolates), big.mark = ",")` Example Isolates #' -#' A data set containing `r format(nrow(example_isolates), big.mark = " ")` microbial isolates with their full antibiograms. This data set contains randomised fictitious data, but reflects reality and can be used to practise AMR data analysis. For examples, please read [the tutorial on our website](https://msberends.github.io/AMR/articles/AMR.html). -#' @format A [tibble][tibble::tibble] with `r format(nrow(example_isolates), big.mark = " ")` observations and `r ncol(example_isolates)` variables: +#' A data set containing `r format(nrow(example_isolates), big.mark = ",")` microbial isolates with their full antibiograms. This data set contains randomised fictitious data, but reflects reality and can be used to practise AMR data analysis. For examples, please read [the tutorial on our website](https://msberends.github.io/AMR/articles/AMR.html). +#' @format A [tibble][tibble::tibble] with `r format(nrow(example_isolates), big.mark = ",")` observations and `r ncol(example_isolates)` variables: #' - `date`\cr Date of receipt at the laboratory #' - `patient`\cr ID of the patient #' - `age`\cr Age of the patient @@ -182,8 +182,8 @@ #' Data Set with Unclean Data #' -#' A data set containing `r format(nrow(example_isolates_unclean), big.mark = " ")` microbial isolates that are not cleaned up and consequently not ready for AMR data analysis. This data set can be used for practice. -#' @format A [tibble][tibble::tibble] with `r format(nrow(example_isolates_unclean), big.mark = " ")` observations and `r ncol(example_isolates_unclean)` variables: +#' A data set containing `r format(nrow(example_isolates_unclean), big.mark = ",")` microbial isolates that are not cleaned up and consequently not ready for AMR data analysis. This data set can be used for practice. +#' @format A [tibble][tibble::tibble] with `r format(nrow(example_isolates_unclean), big.mark = ",")` observations and `r ncol(example_isolates_unclean)` variables: #' - `patient_id`\cr ID of the patient #' - `date`\cr date of receipt at the laboratory #' - `hospital`\cr ID of the hospital, from A to C @@ -195,10 +195,10 @@ #' example_isolates_unclean "example_isolates_unclean" -#' Data Set with `r format(nrow(WHONET), big.mark = " ")` Isolates - WHONET Example +#' Data Set with `r format(nrow(WHONET), big.mark = ",")` Isolates - WHONET Example #' #' This example data set has the exact same structure as an export file from WHONET. Such files can be used with this package, as this example data set shows. The antibiotic results are from our [example_isolates] data set. All patient names are created using online surname generators and are only in place for practice purposes. -#' @format A [tibble][tibble::tibble] with `r format(nrow(WHONET), big.mark = " ")` observations and `r ncol(WHONET)` variables: +#' @format A [tibble][tibble::tibble] with `r format(nrow(WHONET), big.mark = ",")` observations and `r ncol(WHONET)` variables: #' - `Identification number`\cr ID of the sample #' - `Specimen number`\cr ID of the specimen #' - `Organism`\cr Name of the microorganism. Before analysis, you should transform this to a valid microbial class, using [as.mo()]. @@ -234,7 +234,7 @@ #' Data Set with Clinical Breakpoints for SIR Interpretation #' #' Data set containing clinical breakpoints to interpret MIC and disk diffusion to SIR values, according to international guidelines. Currently implemented guidelines are EUCAST (`r min(as.integer(gsub("[^0-9]", "", subset(clinical_breakpoints, guideline %like% "EUCAST")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(clinical_breakpoints, guideline %like% "EUCAST")$guideline)))`) and CLSI (`r min(as.integer(gsub("[^0-9]", "", subset(clinical_breakpoints, guideline %like% "CLSI")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(clinical_breakpoints, guideline %like% "CLSI")$guideline)))`). Use [as.sir()] to transform MICs or disks measurements to SIR values. -#' @format A [tibble][tibble::tibble] with `r format(nrow(clinical_breakpoints), big.mark = " ")` observations and `r ncol(clinical_breakpoints)` variables: +#' @format A [tibble][tibble::tibble] with `r format(nrow(clinical_breakpoints), big.mark = ",")` observations and `r ncol(clinical_breakpoints)` variables: #' - `guideline`\cr Name of the guideline #' - `method`\cr Either `r vector_or(clinical_breakpoints$method)` #' - `site`\cr Body site, e.g. "Oral" or "Respiratory" @@ -258,7 +258,7 @@ #' Data Set with Bacterial Intrinsic Resistance #' #' Data set containing defined intrinsic resistance by EUCAST of all bug-drug combinations. -#' @format A [tibble][tibble::tibble] with `r format(nrow(intrinsic_resistant), big.mark = " ")` observations and `r ncol(intrinsic_resistant)` variables: +#' @format A [tibble][tibble::tibble] with `r format(nrow(intrinsic_resistant), big.mark = ",")` observations and `r ncol(intrinsic_resistant)` variables: #' - `mo`\cr Microorganism ID #' - `ab`\cr Antibiotic ID #' @details @@ -275,7 +275,7 @@ #' Data Set with Treatment Dosages as Defined by EUCAST #' #' EUCAST breakpoints used in this package are based on the dosages in this data set. They can be retrieved with [eucast_dosage()]. -#' @format A [tibble][tibble::tibble] with `r format(nrow(dosage), big.mark = " ")` observations and `r ncol(dosage)` variables: +#' @format A [tibble][tibble::tibble] with `r format(nrow(dosage), big.mark = ",")` observations and `r ncol(dosage)` variables: #' - `ab`\cr Antibiotic ID as used in this package (such as `AMC`), using the official EARS-Net (European Antimicrobial Resistance Surveillance Network) codes where available #' - `name`\cr Official name of the antimicrobial drug as used by WHONET/EARS-Net or the WHO #' - `type`\cr Type of the dosage, either `r vector_or(dosage$type)` diff --git a/R/disk.R b/R/disk.R index be130079..5278ab3b 100755 --- a/R/disk.R +++ b/R/disk.R @@ -114,9 +114,9 @@ as.disk <- function(x, na.rm = FALSE) { na_after <- length(x[is.na(x)]) if (na_before != na_after) { - list_missing <- x.bak[is.na(x) & !is.na(x.bak)] %>% - unique() %>% - sort() %>% + list_missing <- x.bak[is.na(x) & !is.na(x.bak)] %pm>% + unique() %pm>% + sort() %pm>% vector_and(quotes = TRUE) cur_col <- get_current_column() warning_("in `as.disk()`: ", na_after - na_before, " result", diff --git a/R/eucast_rules.R b/R/eucast_rules.R index ef581776..d98a460b 100755 --- a/R/eucast_rules.R +++ b/R/eucast_rules.R @@ -331,12 +331,12 @@ eucast_rules <- function(x, # Some helper functions --------------------------------------------------- get_antibiotic_names <- function(x) { - x <- x %>% - strsplit(",") %>% - unlist() %>% - trimws2() %>% - vapply(FUN.VALUE = character(1), function(x) if (x %in% AMR::antibiotics$ab) ab_name(x, language = NULL, tolower = TRUE, fast_mode = TRUE) else x) %>% - sort() %>% + x <- x %pm>% + strsplit(",") %pm>% + unlist() %pm>% + trimws2() %pm>% + vapply(FUN.VALUE = character(1), function(x) if (x %in% AMR::antibiotics$ab) ab_name(x, language = NULL, tolower = TRUE, fast_mode = TRUE) else x) %pm>% + sort() %pm>% paste(collapse = ", ") x <- gsub("_", " ", x, fixed = TRUE) x <- gsub("except CAZ", paste("except", ab_name("CAZ", language = NULL, tolower = TRUE)), x, fixed = TRUE) @@ -419,10 +419,10 @@ eucast_rules <- function(x, # save original table, with the new .rowid column x.bak <- x # keep only unique rows for MO and ABx - x <- x %>% - arrange(`.rowid`) %>% + x <- x %pm>% + pm_arrange(`.rowid`) %pm>% # big speed gain! only analyse unique rows: - distinct(`.rowid`, .keep_all = TRUE) %>% + pm_distinct(`.rowid`, .keep_all = TRUE) %pm>% as.data.frame(stringsAsFactors = FALSE) x[, col_mo] <- as.mo(as.character(x[, col_mo, drop = TRUE]), info = info) # rename col_mo to prevent interference with joined columns @@ -925,16 +925,16 @@ eucast_rules <- function(x, # Print overview ---------------------------------------------------------- if (isTRUE(info) || isTRUE(verbose)) { - verbose_info <- x.bak %>% - mutate(row = seq_len(NROW(x.bak))) %>% - select(`.rowid`, row) %>% - right_join(verbose_info, + verbose_info <- x.bak %pm>% + pm_mutate(row = pm_row_number()) %pm>% + pm_select(`.rowid`, row) %pm>% + pm_right_join(verbose_info, by = c(".rowid" = "rowid") - ) %>% - select(-`.rowid`) %>% - select(row, everything()) %>% - filter(!is.na(new) | is.na(new) & !is.na(old)) %>% - arrange(row, rule_group, rule_name, col) + ) %pm>% + pm_select(-`.rowid`) %pm>% + pm_select(row, pm_everything()) %pm>% + pm_filter(!is.na(new) | is.na(new) & !is.na(old)) %pm>% + pm_arrange(row, rule_group, rule_name, col) rownames(verbose_info) <- NULL } @@ -949,7 +949,7 @@ eucast_rules <- function(x, cat(word_wrap(paste0( "The rules ", paste0(wouldve, "affected "), font_bold( - formatnr(n_distinct(verbose_info$row)), + formatnr(pm_n_distinct(verbose_info$row)), "out of", formatnr(nrow(x.bak)), "rows" ), @@ -957,8 +957,8 @@ eucast_rules <- function(x, font_bold(formatnr(nrow(verbose_info)), "edits\n") ))) - total_n_added <- verbose_info %>% filter(is.na(old)) %>% nrow() - total_n_changed <- verbose_info %>% filter(!is.na(old)) %>% nrow() + total_n_added <- verbose_info %pm>% pm_filter(is.na(old)) %pm>% nrow() + total_n_changed <- verbose_info %pm>% pm_filter(!is.na(old)) %pm>% nrow() # print added values if (total_n_added == 0) { @@ -968,15 +968,15 @@ eucast_rules <- function(x, } cat(colour(paste0( "=> ", wouldve, "added ", - font_bold(formatnr(verbose_info %>% - filter(is.na(old)) %>% + font_bold(formatnr(verbose_info %pm>% + pm_filter(is.na(old)) %pm>% nrow()), "test results"), "\n" ))) if (total_n_added > 0) { - added_summary <- verbose_info %>% - filter(is.na(old)) %>% - count(new, name = "n") + added_summary <- verbose_info %pm>% + pm_filter(is.na(old)) %pm>% + pm_count(new, name = "n") cat(paste(" -", paste0( formatnr(added_summary$n), " test result", ifelse(added_summary$n > 1, "s", ""), @@ -997,16 +997,16 @@ eucast_rules <- function(x, } cat(colour(paste0( "=> ", wouldve, "changed ", - font_bold(formatnr(verbose_info %>% - filter(!is.na(old)) %>% + font_bold(formatnr(verbose_info %pm>% + pm_filter(!is.na(old)) %pm>% nrow()), "test results"), "\n" ))) if (total_n_changed > 0) { - changed_summary <- verbose_info %>% - filter(!is.na(old)) %>% - mutate(new = ifelse(is.na(new), "NA", new)) %>% - count(old, new, name = "n") + changed_summary <- verbose_info %pm>% + pm_filter(!is.na(old)) %pm>% + pm_mutate(new = ifelse(is.na(new), "NA", new)) %pm>% + pm_count(old, new, name = "n") cat(paste(" -", paste0( formatnr(changed_summary$n), " test result", ifelse(changed_summary$n > 1, "s", ""), " changed from ", @@ -1049,8 +1049,8 @@ eucast_rules <- function(x, # x was analysed with only unique rows, so join everything together again x <- x[, c(cols_ab, ".rowid"), drop = FALSE] x.bak <- x.bak[, setdiff(colnames(x.bak), cols_ab), drop = FALSE] - x.bak <- x.bak %>% - left_join(x, by = ".rowid") + x.bak <- x.bak %pm>% + pm_left_join(x, by = ".rowid") x.bak <- x.bak[, old_cols, drop = FALSE] # reset original attributes attributes(x.bak) <- old_attributes @@ -1103,8 +1103,8 @@ edit_sir <- function(x, if (w$message %like% "invalid factor level") { xyz <- vapply(FUN.VALUE = logical(1), cols, function(col) { new_edits[, col] <<- factor( - x = as.character(pull(new_edits, col)), - levels = unique(c(to, levels(pull(new_edits, col)))) + x = as.character(pm_pull(new_edits, col)), + levels = unique(c(to, levels(pm_pull(new_edits, col)))) ) TRUE }) @@ -1159,22 +1159,22 @@ edit_sir <- function(x, "rowid", "col", "mo_fullname", "old", "new", "rule", "rule_group", "rule_name", "rule_source" ) - verbose_new <- verbose_new %>% filter(old != new | is.na(old) | is.na(new) & !is.na(old)) + verbose_new <- verbose_new %pm>% pm_filter(old != new | is.na(old) | is.na(new) & !is.na(old)) # save changes to data set 'verbose_info' track_changes$verbose_info <- rbind(track_changes$verbose_info, verbose_new, stringsAsFactors = FALSE ) # count adds and changes - track_changes$added <- track_changes$added + verbose_new %>% - filter(is.na(old)) %>% - pull(rowid) %>% - get_original_rows() %>% + track_changes$added <- track_changes$added + verbose_new %pm>% + pm_filter(is.na(old)) %pm>% + pm_pull(rowid) %pm>% + get_original_rows() %pm>% length() - track_changes$changed <- track_changes$changed + verbose_new %>% - filter(!is.na(old)) %>% - pull(rowid) %>% - get_original_rows() %>% + track_changes$changed <- track_changes$changed + verbose_new %pm>% + pm_filter(!is.na(old)) %pm>% + pm_pull(rowid) %pm>% + get_original_rows() %pm>% length() } } diff --git a/R/first_isolate.R b/R/first_isolate.R index b237ed14..89c1c1cb 100755 --- a/R/first_isolate.R +++ b/R/first_isolate.R @@ -33,7 +33,7 @@ #' @param x a [data.frame] containing isolates. Can be left blank for automatic determination, see *Examples*. #' @param col_date column name of the result date (or date that is was received on the lab), defaults to the first column with a date class #' @param col_patient_id column name of the unique IDs of the patients, defaults to the first column that starts with 'patient' or 'patid' (case insensitive) -#' @param col_mo column name of the names or codes of the microorganisms (see [as.mo()]), defaults to the first column of class [`mo`]. Values will be coerced using [as.mo()]. +#' @param col_mo column name of the IDs of the microorganisms (see [as.mo()]), defaults to the first column of class [`mo`]. Values will be coerced using [as.mo()]. #' @param col_testcode column name of the test codes. Use `col_testcode = NULL` to **not** exclude certain test codes (such as test codes for screening). In that case `testcodes_exclude` will be ignored. #' @param col_specimen column name of the specimen type or group #' @param col_icu column name of the logicals (`TRUE`/`FALSE`) whether a ward or department is an Intensive Care Unit (ICU). This can also be a [logical] vector with the same length as rows in `x`. @@ -71,14 +71,17 @@ #' | **Isolate-based** | `first_isolate(x, method = "isolate-based")` | #' | *(= all isolates)* | | #' | | | +#' | | | #' | **Patient-based** | `first_isolate(x, method = "patient-based")` | #' | *(= first isolate per patient)* | | #' | | | +#' | | | #' | **Episode-based** | `first_isolate(x, method = "episode-based")`, or: | #' | *(= first isolate per episode)* | | #' | - 7-Day interval from initial isolate | - `first_isolate(x, method = "e", episode_days = 7)` | #' | - 30-Day interval from initial isolate | - `first_isolate(x, method = "e", episode_days = 30)` | #' | | | +#' | | | #' | **Phenotype-based** | `first_isolate(x, method = "phenotype-based")`, or: | #' | *(= first isolate per phenotype)* | | #' | - Major difference in any antimicrobial result | - `first_isolate(x, type = "points")` | @@ -130,7 +133,7 @@ #' # `example_isolates` is a data set available in the AMR package. #' # See ?example_isolates. #' -#' example_isolates[first_isolate(info = TRUE), ] +#' example_isolates[first_isolate(), ] #' \donttest{ #' # get all first Gram-negatives #' example_isolates[which(first_isolate(info = FALSE) & mo_is_gram_negative()), ] @@ -138,7 +141,7 @@ #' if (require("dplyr")) { #' # filter on first isolates using dplyr: #' example_isolates %>% -#' filter(first_isolate(info = TRUE)) +#' filter(first_isolate()) #' } #' if (require("dplyr")) { #' # short-hand version: @@ -149,7 +152,7 @@ #' # flag the first isolates per group: #' example_isolates %>% #' group_by(ward) %>% -#' mutate(first = first_isolate(info = FALSE)) %>% +#' mutate(first = first_isolate()) %>% #' select(ward, date, patient, mo, first) #' } #' } @@ -391,17 +394,17 @@ first_isolate <- function(x = NULL, } else { # filtering on specimen and only analyse these rows to save time x <- x[order( - pull(x, col_specimen), + pm_pull(x, col_specimen), x$newvar_patient_id, x$newvar_genus_species, x$newvar_date ), ] rownames(x) <- NULL suppressWarnings( - row.start <- which(x %>% pull(col_specimen) == specimen_group) %>% min(na.rm = TRUE) + row.start <- which(x %pm>% pm_pull(col_specimen) == specimen_group) %pm>% min(na.rm = TRUE) ) suppressWarnings( - row.end <- which(x %>% pull(col_specimen) == specimen_group) %>% max(na.rm = TRUE) + row.end <- which(x %pm>% pm_pull(col_specimen) == specimen_group) %pm>% max(na.rm = TRUE) ) } @@ -424,7 +427,7 @@ first_isolate <- function(x = NULL, } return(TRUE) } - if (length(c(row.start:row.end)) == n_distinct(x[c(row.start:row.end), col_mo, drop = TRUE])) { + if (length(c(row.start:row.end)) == pm_n_distinct(x[c(row.start:row.end), col_mo, drop = TRUE])) { if (isTRUE(info)) { message_("=> Found ", font_bold(paste(length(c(row.start:row.end)), "first isolates")), ", as all isolates were different microbial species", @@ -462,7 +465,7 @@ first_isolate <- function(x = NULL, } } - x$other_pat_or_mo <- !(x$newvar_patient_id == lag(x$newvar_patient_id) & x$newvar_genus_species == lag(x$newvar_genus_species)) + x$other_pat_or_mo <- !(x$newvar_patient_id == pm_lag(x$newvar_patient_id) & x$newvar_genus_species == pm_lag(x$newvar_genus_species)) x$episode_group <- paste(x$newvar_patient_id, x$newvar_genus_species) x$more_than_episode_ago <- unlist( @@ -482,21 +485,29 @@ first_isolate <- function(x = NULL, # with key antibiotics x$other_key_ab <- !antimicrobials_equal( y = x$newvar_key_ab, - z = lag(x$newvar_key_ab), + z = pm_lag(x$newvar_key_ab), type = type, ignore_I = ignore_I, points_threshold = points_threshold ) - x$newvar_first_isolate <- x$newvar_row_index_sorted >= row.start & - x$newvar_row_index_sorted <= row.end & - x$newvar_genus_species != "" & - (x$other_pat_or_mo | x$more_than_episode_ago | x$other_key_ab) + x$newvar_first_isolate <- pm_if_else( + x$newvar_row_index_sorted >= row.start & + x$newvar_row_index_sorted <= row.end & + x$newvar_genus_species != "" & + (x$other_pat_or_mo | x$more_than_episode_ago | x$other_key_ab), + TRUE, + FALSE + ) } else { # no key antibiotics - x$newvar_first_isolate <- x$newvar_row_index_sorted >= row.start & - x$newvar_row_index_sorted <= row.end & - x$newvar_genus_species != "" & - (x$other_pat_or_mo | x$more_than_episode_ago) + x$newvar_first_isolate <- pm_if_else( + x$newvar_row_index_sorted >= row.start & + x$newvar_row_index_sorted <= row.end & + x$newvar_genus_species != "" & + (x$other_pat_or_mo | x$more_than_episode_ago), + TRUE, + FALSE + ) } # first one as TRUE @@ -507,14 +518,12 @@ first_isolate <- function(x = NULL, } if (!is.null(col_icu)) { if (icu_exclude == TRUE) { - if (isTRUE(info)) { - message_("Excluding ", format(sum(col_icu, na.rm = TRUE), big.mark = ","), " isolates from ICU.", - add_fn = font_black, - as_note = FALSE - ) - } + message_("Excluding ", format(sum(col_icu, na.rm = TRUE), big.mark = ","), " isolates from ICU.", + add_fn = font_black, + as_note = FALSE + ) x[which(col_icu), "newvar_first_isolate"] <- FALSE - } else if (isTRUE(info)) { + } else { message_("Including isolates from ICU.", add_fn = font_black, as_note = FALSE @@ -523,7 +532,7 @@ first_isolate <- function(x = NULL, } decimal.mark <- getOption("OutDec") - big.mark <- ifelse(decimal.mark != ",", ",", " ") + big.mark <- ifelse(decimal.mark != ",", ",", ".") if (isTRUE(info)) { # print group name if used in dplyr::group_by() diff --git a/R/get_episode.R b/R/get_episode.R index 8d2398dc..9d13a6cd 100755 --- a/R/get_episode.R +++ b/R/get_episode.R @@ -109,7 +109,7 @@ get_episode <- function(x, episode_days, ...) { meet_criteria(x, allow_class = c("Date", "POSIXt"), allow_NA = TRUE) meet_criteria(episode_days, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = FALSE) - + exec_episode( x = x, episode_days = episode_days, @@ -127,10 +127,10 @@ is_new_episode <- function(x, episode_days, ...) { exec_episode <- function(x, type, episode_days, ...) { x <- as.double(as.POSIXct(x)) # as.POSIXct() required for Date classes - + # since x is now in seconds, get seconds from episode_days as well episode_seconds <- episode_days * 60 * 60 * 24 - + if (length(x) == 1) { # this will also match 1 NA, which is fine return(1) } else if (length(x) == 2 && !all(is.na(x))) { @@ -140,7 +140,7 @@ exec_episode <- function(x, type, episode_days, ...) { return(c(1, 1)) } } - + # we asked on StackOverflow: # https://stackoverflow.com/questions/42122245/filter-one-row-every-year run_episodes <- function(x, episode_seconds) { @@ -157,7 +157,7 @@ exec_episode <- function(x, type, episode_days, ...) { } indices } - + ord <- order(x) out <- run_episodes(x[ord], episode_seconds)[order(ord)] out[is.na(x) & ord != 1] <- NA # every NA expect for the first must remain NA diff --git a/R/ggplot_sir.R b/R/ggplot_sir.R index e999f65d..ea2e50b3 100755 --- a/R/ggplot_sir.R +++ b/R/ggplot_sir.R @@ -202,7 +202,7 @@ ggplot_sir <- function(data, meet_criteria(limits, allow_class = c("numeric", "integer"), has_length = 2, allow_NULL = TRUE, allow_NA = TRUE) meet_criteria(translate_ab, allow_class = c("character", "logical"), has_length = 1, allow_NA = TRUE) meet_criteria(combine_SI, allow_class = "logical", has_length = 1) - meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE) + meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_finite = TRUE) language <- validate_language(language) meet_criteria(nrow, allow_class = c("numeric", "integer"), has_length = 1, allow_NULL = TRUE, is_positive = TRUE, is_finite = TRUE) meet_criteria(colours, allow_class = c("character", "logical")) @@ -300,7 +300,7 @@ geom_sir <- function(position = NULL, meet_criteria(x, allow_class = "character", has_length = 1) meet_criteria(fill, allow_class = "character", has_length = 1) meet_criteria(translate_ab, allow_class = c("character", "logical"), has_length = 1, allow_NA = TRUE) - meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE) + meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_finite = TRUE) language <- validate_language(language) meet_criteria(combine_SI, allow_class = "logical", has_length = 1) @@ -486,7 +486,7 @@ labels_sir_count <- function(position = NULL, meet_criteria(position, allow_class = "character", has_length = 1, is_in = c("fill", "stack", "dodge"), allow_NULL = TRUE) meet_criteria(x, allow_class = "character", has_length = 1) meet_criteria(translate_ab, allow_class = c("character", "logical"), has_length = 1, allow_NA = TRUE) - meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE) + meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_finite = TRUE) language <- validate_language(language) meet_criteria(combine_SI, allow_class = "logical", has_length = 1) meet_criteria(datalabels.size, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE) @@ -519,11 +519,11 @@ labels_sir_count <- function(position = NULL, language = language ) transformed$gr <- transformed[, x_name, drop = TRUE] - transformed %>% - group_by(gr) %>% - mutate(lbl = paste0("n=", isolates)) %>% - ungroup() %>% - select(-gr) + transformed %pm>% + pm_group_by(gr) %pm>% + pm_mutate(lbl = paste0("n=", isolates)) %pm>% + pm_ungroup() %pm>% + pm_select(-gr) } ) } diff --git a/R/join_microorganisms.R b/R/join_microorganisms.R index 23904055..e3817ff9 100755 --- a/R/join_microorganisms.R +++ b/R/join_microorganisms.R @@ -159,9 +159,14 @@ join_microorganisms <- function(type, x, by, suffix, ...) { by <- stats::setNames("mo", by) } - # this will use dplyr if available, and the slower poorman otherwise, see R/aaa_helper_pm_functions.R - join_fn <- get(type, envir = asNamespace("AMR")) - + # use dplyr if available - it's much faster than poorman alternatives + dplyr_join <- import_fn(name = type, pkg = "dplyr", error_on_fail = FALSE) + if (!is.null(dplyr_join)) { + join_fn <- dplyr_join + } else { + # otherwise use poorman, see R/aa_helper_pm_functions.R + join_fn <- get(paste0("pm_", type), envir = asNamespace("AMR")) + } MO_df <- AMR_env$MO_lookup[, colnames(AMR::microorganisms), drop = FALSE] if (type %like% "full|left|right|inner") { joined <- join_fn(x = x, y = MO_df, by = by, suffix = suffix, ...) diff --git a/R/mean_amr_distance.R b/R/mean_amr_distance.R index 174f43d0..34311469 100755 --- a/R/mean_amr_distance.R +++ b/R/mean_amr_distance.R @@ -137,7 +137,7 @@ mean_amr_distance.data.frame <- function(x, ..., combine_SI = TRUE) { if (!is.null(out)) { df <- df[, out, drop = FALSE] } else { - df <- select(df, ...) + df <- pm_select(df, ...) } } df_classes <- colnames(df)[vapply(FUN.VALUE = logical(1), df, function(x) is.disk(x) | is.mic(x) | is.disk(x), USE.NAMES = FALSE)] diff --git a/R/mic.R b/R/mic.R index aeb2276b..3091a485 100755 --- a/R/mic.R +++ b/R/mic.R @@ -219,14 +219,14 @@ as.mic <- function(x, na.rm = FALSE) { ## previously unempty values now empty - should return a warning later on x[x.bak != "" & x == ""] <- "invalid" - na_before <- x[is.na(x) | x == ""] %>% length() + na_before <- x[is.na(x) | x == ""] %pm>% length() x[!x %in% valid_mic_levels] <- NA - na_after <- x[is.na(x) | x == ""] %>% length() + na_after <- x[is.na(x) | x == ""] %pm>% length() if (na_before != na_after) { - list_missing <- x.bak[is.na(x) & !is.na(x.bak) & x.bak != ""] %>% - unique() %>% - sort() %>% + list_missing <- x.bak[is.na(x) & !is.na(x.bak) & x.bak != ""] %pm>% + unique() %pm>% + sort() %pm>% vector_and(quotes = TRUE) cur_col <- get_current_column() warning_("in `as.mic()`: ", na_after - na_before, " result", diff --git a/R/mo.R b/R/mo.R index 0a43cac8..345fb814 100755 --- a/R/mo.R +++ b/R/mo.R @@ -561,10 +561,10 @@ pillar_shaft.mo <- function(x, ...) { # markup NA and UNKNOWN out[is.na(x)] <- font_na(" NA") out[x == "UNKNOWN"] <- font_na(" UNKNOWN") - + # markup manual codes out[x %in% AMR_env$MO_lookup$mo & !x %in% AMR::microorganisms$mo] <- font_blue(out[x %in% AMR_env$MO_lookup$mo & !x %in% AMR::microorganisms$mo], collapse = NULL) - + df <- tryCatch(get_current_data(arg_name = "x", call = 0), error = function(e) NULL ) @@ -579,7 +579,7 @@ pillar_shaft.mo <- function(x, ...) { (!is.null(df) && !all(unlist(df[, which(mo_cols), drop = FALSE]) %in% all_mos))) { # markup old mo codes out[!x %in% all_mos] <- font_italic( - font_na(font_stripstyle(out[!x %in% all_mos]), + font_na(x[!x %in% all_mos], collapse = NULL ), collapse = NULL @@ -627,7 +627,7 @@ freq.mo <- function(x, ...) { .add_header = list( `Gram-negative` = paste0( format(sum(grams == "Gram-negative", na.rm = TRUE), - big.mark = " ", + big.mark = ",", decimal.mark = "." ), " (", percentage(sum(grams == "Gram-negative", na.rm = TRUE) / length(grams), @@ -637,7 +637,7 @@ freq.mo <- function(x, ...) { ), `Gram-positive` = paste0( format(sum(grams == "Gram-positive", na.rm = TRUE), - big.mark = " ", + big.mark = ",", decimal.mark = "." ), " (", percentage(sum(grams == "Gram-positive", na.rm = TRUE) / length(grams), @@ -645,8 +645,8 @@ freq.mo <- function(x, ...) { ), ")" ), - `Nr. of genera` = n_distinct(mo_genus(x_noNA, language = NULL)), - `Nr. of species` = n_distinct(paste( + `Nr. of genera` = pm_n_distinct(mo_genus(x_noNA, language = NULL)), + `Nr. of species` = pm_n_distinct(paste( mo_genus(x_noNA, language = NULL), mo_species(x_noNA, language = NULL) )) @@ -1155,14 +1155,14 @@ repair_reference_df <- function(reference_df) { return(NULL) } # has valid own reference_df - reference_df <- reference_df %>% - filter(!is.na(mo)) + reference_df <- reference_df %pm>% + pm_filter(!is.na(mo)) # keep only first two columns, second must be mo if (colnames(reference_df)[1] == "mo") { - reference_df <- reference_df %>% select(2, "mo") + reference_df <- reference_df %pm>% pm_select(2, "mo") } else { - reference_df <- reference_df %>% select(1, "mo") + reference_df <- reference_df %pm>% pm_select(1, "mo") } # remove factors, just keep characters diff --git a/R/mo_property.R b/R/mo_property.R index d6a40eba..34f7abbf 100755 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -31,7 +31,7 @@ #' #' Use these functions to return a specific property of a microorganism based on the latest accepted taxonomy. All input values will be evaluated internally with [as.mo()], which makes it possible to use microbial abbreviations, codes and names as input. See *Examples*. #' @param x any [character] (vector) that can be coerced to a valid microorganism code with [as.mo()]. Can be left blank for auto-guessing the column containing microorganism codes if used in a data set, see *Examples*. -#' @param property one of the column names of the [microorganisms] data set: `r vector_or(colnames(microorganisms), sort = FALSE, quotes = TRUE)` +#' @param property one of the column names of the [microorganisms] data set: `r vector_or(colnames(microorganisms), sort = FALSE, quotes = TRUE)`, or must be `"shortname"` #' @inheritParams as.mo #' @param ... other arguments passed on to [as.mo()], such as 'minimum_matching_score', 'ignore_pattern', and 'remove_from_input' #' @param ab any (vector of) text that can be coerced to a valid antibiotic drug code with [as.ab()] @@ -900,16 +900,12 @@ mo_validate <- function(x, property, language, keep_synonyms = keep_synonyms, .. } # get property reeaaally fast using match() - if (property == "snomed") { - x <- lapply(x, function(y) unlist(AMR_env$MO_lookup$snomed[match(y, AMR_env$MO_lookup$mo)])) - } else { - x <- AMR_env$MO_lookup[[property]][match(x, AMR_env$MO_lookup$mo)] - } - + x <- AMR_env$MO_lookup[[property]][match(x, AMR_env$MO_lookup$mo)] + if (property == "mo") { return(set_clean_class(x, new_class = c("mo", "character"))) } else if (property == "snomed") { - return(x) + return(sort(as.character(eval(parse(text = x))))) } else if (property == "prevalence") { return(as.double(x)) } else { diff --git a/R/pca.R b/R/pca.R index fa506d13..e8b42368 100755 --- a/R/pca.R +++ b/R/pca.R @@ -127,7 +127,7 @@ pca <- function(x, x <- cbind(x.bak[, vapply(FUN.VALUE = logical(1), x.bak, function(y) !is.numeric(y) & !all(is.na(y))), drop = FALSE], x) } - x <- ungroup(x) # would otherwise select the grouping vars + x <- pm_ungroup(x) # would otherwise select the grouping vars x <- x[rowSums(is.na(x)) == 0, ] # remove columns containing NAs pca_data <- x[, which(vapply(FUN.VALUE = logical(1), x, function(x) is.numeric(x))), drop = FALSE] diff --git a/R/plot.R b/R/plot.R index 5f6d1a5d..fb2431a3 100755 --- a/R/plot.R +++ b/R/plot.R @@ -602,7 +602,7 @@ plot.sir <- function(x, data$x <- factor(data$x, levels = c("S", "I", "R"), ordered = TRUE) - ymax <- ifelse(max(data$s) > 95, 105, 100) + ymax <- pm_if_else(max(data$s) > 95, 105, 100) plot( x = data$x, @@ -615,7 +615,7 @@ plot.sir <- function(x, axes = FALSE ) # x axis - axis(side = 1, at = 1:n_distinct(data$x), labels = levels(data$x), lwd = 0) + axis(side = 1, at = 1:pm_n_distinct(data$x), labels = levels(data$x), lwd = 0) # y axis, 0-100% axis(side = 2, at = seq(0, 100, 5)) diff --git a/R/proportion.R b/R/proportion.R index 133d73f2..0ff939b5 100755 --- a/R/proportion.R +++ b/R/proportion.R @@ -27,7 +27,7 @@ # how to conduct AMR data analysis: https://msberends.github.io/AMR/ # # ==================================================================== # -#' Calculate Antimicrobial Resistance +#' Calculate Microbial Resistance #' #' @description These functions can be used to calculate the (co-)resistance or susceptibility of microbial isolates (i.e. percentage of S, SI, I, IR or R). All functions support quasiquotation with pipes, can be used in `summarise()` from the `dplyr` package and also support grouped variables, see *Examples*. #' @@ -49,7 +49,7 @@ #' #' Use [sir_confidence_interval()] to calculate the confidence interval, which relies on [binom.test()], i.e., the Clopper-Pearson method. This function returns a vector of length 2 at default for antimicrobial *resistance*. Change the `side` argument to "left"/"min" or "right"/"max" to return a single value, and change the `ab_result` argument to e.g. `c("S", "I")` to test for antimicrobial *susceptibility*, see Examples. #' -#' **Remember that you should filter your data to let it contain only first isolates!** This is needed to exclude duplicates and to reduce selection bias. Use [first_isolate()] to determine them in your data set with one of the four available algorithms. +#' **Remember that you should filter your data to let it contain only first isolates!** This is needed to exclude duplicates and to reduce selection bias. Use [first_isolate()] to determine them in your data set. #' #' These functions are not meant to count isolates, but to calculate the proportion of resistance/susceptibility. Use the [`count()`][AMR::count()] functions to count isolates. The function [susceptibility()] is essentially equal to `count_susceptible() / count_all()`. *Low counts can influence the outcome - the `proportion` functions may camouflage this, since they only return the proportion (albeit being dependent on the `minimum` argument).* #' @@ -77,14 +77,11 @@ #' ``` #' #' Please note that, in combination therapies, for `only_all_tested = TRUE` applies that: -#' #' ``` #' count_S() + count_I() + count_R() = count_all() #' proportion_S() + proportion_I() + proportion_R() = 1 #' ``` -#' #' and that, in combination therapies, for `only_all_tested = FALSE` applies that: -#' #' ``` #' count_S() + count_I() + count_R() >= count_all() #' proportion_S() + proportion_I() + proportion_R() >= 1 @@ -101,8 +98,7 @@ #' @examples #' # example_isolates is a data set available in the AMR package. #' # run ?example_isolates for more info. -#' example_isolates -#' +#' #' # base R ------------------------------------------------------------ #' # determines %R #' resistance(example_isolates$AMX) diff --git a/R/random.R b/R/random.R index 20d20a58..39c6e882 100755 --- a/R/random.R +++ b/R/random.R @@ -91,10 +91,10 @@ random_sir <- function(size = NULL, prob_SIR = c(0.33, 0.33, 0.33), ...) { } random_exec <- function(type, size, mo = NULL, ab = NULL) { - df <- clinical_breakpoints %>% - filter(guideline %like% "EUCAST") %>% - arrange(pm_desc(guideline)) %>% - filter(guideline == max(guideline) & + df <- clinical_breakpoints %pm>% + pm_filter(guideline %like% "EUCAST") %pm>% + pm_arrange(pm_desc(guideline)) %pm>% + subset(guideline == max(guideline) & method == type) if (!is.null(mo)) { @@ -105,7 +105,7 @@ random_exec <- function(type, size, mo = NULL, ab = NULL) { as.mo(mo_family(mo_coerced)), as.mo(mo_order(mo_coerced)) ) - df_new <- df %>% + df_new <- df %pm>% subset(mo %in% mo_include) if (nrow(df_new) > 0) { df <- df_new @@ -116,7 +116,7 @@ random_exec <- function(type, size, mo = NULL, ab = NULL) { if (!is.null(ab)) { ab_coerced <- as.ab(ab) - df_new <- df %>% + df_new <- df %pm>% subset(ab %in% ab_coerced) if (nrow(df_new) > 0) { df <- df_new diff --git a/R/resistance_predict.R b/R/resistance_predict.R index 3f930921..378b489c 100755 --- a/R/resistance_predict.R +++ b/R/resistance_predict.R @@ -125,7 +125,7 @@ resistance_predict <- function(x, meet_criteria(year_min, allow_class = c("numeric", "integer"), has_length = 1, allow_NULL = TRUE, is_positive = TRUE, is_finite = TRUE) meet_criteria(year_max, allow_class = c("numeric", "integer"), has_length = 1, allow_NULL = TRUE, is_positive = TRUE, is_finite = TRUE) meet_criteria(year_every, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE) - meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE) + meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_finite = TRUE) meet_criteria(model, allow_class = c("character", "function"), has_length = 1, allow_NULL = TRUE) meet_criteria(I_as_S, allow_class = "logical", has_length = 1) meet_criteria(preserve_measurements, allow_class = "logical", has_length = 1) @@ -260,8 +260,8 @@ resistance_predict <- function(x, observed = df$R / (df$R + df$S), stringsAsFactors = FALSE ) - df_prediction <- df_prediction %>% - left_join(df_observations, by = "year") + df_prediction <- df_prediction %pm>% + pm_left_join(df_observations, by = "year") df_prediction$estimated <- df_prediction$value if (preserve_measurements == TRUE) { diff --git a/R/sir.R b/R/sir.R index c21603ea..696213a8 100755 --- a/R/sir.R +++ b/R/sir.R @@ -89,7 +89,7 @@ #' #' ### Machine-Readable Interpretation Guidelines #' -#' The repository of this package [contains a machine-readable version](https://github.com/msberends/AMR/blob/main/data-raw/clinical_breakpoints.txt) of all guidelines. This is a CSV file consisting of `r format(nrow(AMR::clinical_breakpoints), big.mark = " ")` rows and `r ncol(AMR::clinical_breakpoints)` columns. This file is machine-readable, since it contains one row for every unique combination of the test method (MIC or disk diffusion), the antimicrobial drug and the microorganism. **This allows for easy implementation of these rules in laboratory information systems (LIS)**. Note that it only contains interpretation guidelines for humans - interpretation guidelines from CLSI for animals were removed. +#' The repository of this package [contains a machine-readable version](https://github.com/msberends/AMR/blob/main/data-raw/clinical_breakpoints.txt) of all guidelines. This is a CSV file consisting of `r format(nrow(AMR::clinical_breakpoints), big.mark = ",")` rows and `r ncol(AMR::clinical_breakpoints)` columns. This file is machine-readable, since it contains one row for every unique combination of the test method (MIC or disk diffusion), the antimicrobial drug and the microorganism. **This allows for easy implementation of these rules in laboratory information systems (LIS)**. Note that it only contains interpretation guidelines for humans - interpretation guidelines from CLSI for animals were removed. #' #' ### Other #' @@ -373,9 +373,9 @@ as.sir.default <- function(x, ...) { if (!isFALSE(list(...)$warn)) { # so as.sir(..., warn = FALSE) will never throw a warning if (na_before != na_after) { - list_missing <- x.bak[is.na(x) & !is.na(x.bak) & x.bak != ""] %>% - unique() %>% - sort() %>% + list_missing <- x.bak[is.na(x) & !is.na(x.bak) & x.bak != ""] %pm>% + unique() %pm>% + sort() %pm>% vector_and(quotes = TRUE) cur_col <- get_current_column() warning_("in `as.sir()`: ", na_after - na_before, " result", @@ -543,7 +543,7 @@ as.sir.data.frame <- function(x, i <- 0 if (tryCatch(length(list(...)) > 0, error = function(e) TRUE)) { - sel <- colnames(select(x, ...)) + sel <- colnames(pm_select(x, ...)) } else { sel <- colnames(x) } @@ -597,10 +597,10 @@ as.sir.data.frame <- function(x, for (i in seq_len(length(ab_cols))) { if (types[i] == "mic") { - x[, ab_cols[i]] <- x %>% - pull(ab_cols[i]) %>% - as.character() %>% - as.mic() %>% + x[, ab_cols[i]] <- x %pm>% + pm_pull(ab_cols[i]) %pm>% + as.character() %pm>% + as.mic() %pm>% as.sir( mo = x_mo, mo.bak = x[, col_mo, drop = TRUE], @@ -614,10 +614,10 @@ as.sir.data.frame <- function(x, is_data.frame = TRUE ) } else if (types[i] == "disk") { - x[, ab_cols[i]] <- x %>% - pull(ab_cols[i]) %>% - as.character() %>% - as.disk() %>% + x[, ab_cols[i]] <- x %pm>% + pm_pull(ab_cols[i]) %pm>% + as.character() %pm>% + as.disk() %pm>% as.sir( mo = x_mo, mo.bak = x[, col_mo, drop = TRUE], @@ -848,21 +848,21 @@ as_sir_method <- function(method_short, mo_coerced <- mo if (identical(reference_data, AMR::clinical_breakpoints)) { - breakpoints <- reference_data %>% + breakpoints <- reference_data %pm>% subset(guideline == guideline_coerced & method == method_coerced & ab == ab_coerced) if (ab_coerced == "AMX" && nrow(breakpoints) == 0) { ab_coerced <- "AMP" - breakpoints <- reference_data %>% + breakpoints <- reference_data %pm>% subset(guideline == guideline_coerced & method == method_coerced & ab == ab_coerced) } } else { - breakpoints <- reference_data %>% + breakpoints <- reference_data %pm>% subset(method == method_coerced & ab == ab_coerced) } if (isFALSE(include_PKPD)) { # remove PKPD rules from the breakpoints table - breakpoints <- breakpoints %>% + breakpoints <- breakpoints %pm>% subset(mo != "UNKNOWN" & ref_tbl %unlike% "PK.*PD") } @@ -918,7 +918,7 @@ as_sir_method <- function(method_short, # gather all available breakpoints for current MO and sort on taxonomic rank # (this will prefer species breakpoints over order breakpoints) - breakpoints_current <- breakpoints %>% + breakpoints_current <- breakpoints %pm>% subset(mo %in% c( mo_current_genus, mo_current_family, mo_current_order, mo_current_class, @@ -927,14 +927,14 @@ as_sir_method <- function(method_short, )) if (any(uti, na.rm = TRUE)) { - breakpoints_current <- breakpoints_current %>% + breakpoints_current <- breakpoints_current %pm>% # be as specific as possible (i.e. prefer species over genus): - # the below `desc(uti)` will put `TRUE` on top and FALSE on bottom - arrange(rank_index, desc(uti)) # 'uti' is a column in data set 'clinical_breakpoints' + # the below `pm_desc(uti)` will put `TRUE` on top and FALSE on bottom + pm_arrange(rank_index, pm_desc(uti)) # 'uti' is a column in data set 'clinical_breakpoints' } else { - breakpoints_current <- breakpoints_current %>% + breakpoints_current <- breakpoints_current %pm>% # sort UTI = FALSE first, then UTI = TRUE - arrange(rank_index, uti) + pm_arrange(rank_index, uti) } # throw notes for different body sites @@ -945,8 +945,8 @@ as_sir_method <- function(method_short, } else if (nrow(breakpoints_current) > 1 && length(unique(breakpoints_current$site)) > 1 && any(is.na(uti)) && all(c(TRUE, FALSE) %in% breakpoints_current$uti, na.rm = TRUE) && message_not_thrown_before("as.sir", "siteUTI", mo_unique, ab_coerced)) { # both UTI and Non-UTI breakpoints available msgs <- c(msgs, paste0("Breakpoints for UTI ", font_underline("and"), " non-UTI available for ", ab_formatted, " in ", mo_formatted, " - assuming non-UTI. Use argument `uti` to set which isolates are from urine. See `?as.sir`.")) - breakpoints_current <- breakpoints_current %>% - filter(uti == FALSE) + breakpoints_current <- breakpoints_current %pm>% + pm_filter(uti == FALSE) } else if (nrow(breakpoints_current) > 1 && length(unique(breakpoints_current$site)) > 1 && all(breakpoints_current$uti == FALSE, na.rm = TRUE) && message_not_thrown_before("as.sir", "siteOther", mo_unique, ab_coerced)) { # breakpoints for multiple body sites available site <- breakpoints_current[1L, "site", drop = FALSE] # this is the one we'll take @@ -974,7 +974,7 @@ as_sir_method <- function(method_short, } if (method == "mic") { - new_sir <- case_when( + new_sir <- quick_case_when( is.na(values) ~ NA_sir_, values <= breakpoints_current$breakpoint_S ~ as.sir("S"), guideline_coerced %like% "EUCAST" & values > breakpoints_current$breakpoint_R ~ as.sir("R"), @@ -985,7 +985,7 @@ as_sir_method <- function(method_short, TRUE ~ NA_sir_ ) } else if (method == "disk") { - new_sir <- case_when( + new_sir <- quick_case_when( is.na(values) ~ NA_sir_, as.double(values) >= as.double(breakpoints_current$breakpoint_S) ~ as.sir("S"), guideline_coerced %like% "EUCAST" & as.double(values) < as.double(breakpoints_current$breakpoint_R) ~ as.sir("R"), diff --git a/R/sir_calc.R b/R/sir_calc.R index a5753a34..5b2d9713 100755 --- a/R/sir_calc.R +++ b/R/sir_calc.R @@ -31,8 +31,7 @@ dots2vars <- function(...) { # this function is to give more informative output about # variable names in count_* and proportion_* functions dots <- substitute(list(...)) - dots <- as.character(dots)[2:length(dots)] - paste0(dots[dots != "."], collapse = "+") + as.character(dots)[2:length(dots)] } sir_calc <- function(..., @@ -42,7 +41,7 @@ sir_calc <- function(..., only_all_tested = FALSE, only_count = FALSE) { meet_criteria(ab_result, allow_class = c("character", "numeric", "integer"), has_length = c(1, 2, 3)) - meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE) + meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_finite = TRUE) meet_criteria(as_percent, allow_class = "logical", has_length = 1) meet_criteria(only_all_tested, allow_class = "logical", has_length = 1) meet_criteria(only_count, allow_class = "logical", has_length = 1) @@ -68,7 +67,7 @@ sir_calc <- function(..., ndots <- length(dots) if (is.data.frame(dots_df)) { - # data.frame passed with other columns, like: example_isolates %>% proportion_S(AMC, GEN) + # data.frame passed with other columns, like: example_isolates %pm>% proportion_S(AMC, GEN) dots <- as.character(dots) # remove first element, it's the data.frame @@ -78,7 +77,7 @@ sir_calc <- function(..., dots <- dots[2:length(dots)] } if (length(dots) == 0 || all(dots == "df")) { - # for complete data.frames, like example_isolates %>% select(AMC, GEN) %>% proportion_S() + # for complete data.frames, like example_isolates %pm>% select(AMC, GEN) %pm>% proportion_S() # and the old sir function, which has "df" as name of the first argument x <- dots_df } else { @@ -93,14 +92,14 @@ sir_calc <- function(..., x <- dots_df[, dots, drop = FALSE] } } else if (ndots == 1) { - # only 1 variable passed (can also be data.frame), like: proportion_S(example_isolates$AMC) and example_isolates$AMC %>% proportion_S() + # only 1 variable passed (can also be data.frame), like: proportion_S(example_isolates$AMC) and example_isolates$AMC %pm>% proportion_S() x <- dots_df } else { # multiple variables passed without pipe, like: proportion_S(example_isolates$AMC, example_isolates$GEN) x <- NULL try(x <- as.data.frame(dots, stringsAsFactors = FALSE), silent = TRUE) if (is.null(x)) { - # support for example_isolates %>% group_by(ward) %>% summarise(amox = susceptibility(GEN, AMX)) + # support for example_isolates %pm>% group_by(ward) %pm>% summarise(amox = susceptibility(GEN, AMX)) x <- as.data.frame(list(...), stringsAsFactors = FALSE) } } @@ -134,7 +133,7 @@ sir_calc <- function(..., } x_transposed <- as.list(as.data.frame(t(x), stringsAsFactors = FALSE)) - if (isTRUE(only_all_tested)) { + if (only_all_tested == TRUE) { # no NAs in any column y <- apply( X = as.data.frame(lapply(x, as.integer), stringsAsFactors = FALSE), @@ -171,7 +170,7 @@ sir_calc <- function(..., if (only_count == TRUE) { return(numerator) } - + if (denominator < minimum) { if (data_vars != "") { data_vars <- paste(" for", data_vars) @@ -225,8 +224,8 @@ sir_calc_df <- function(type, # "proportion", "count" or "both" meet_criteria(type, is_in = c("proportion", "count", "both"), has_length = 1) meet_criteria(data, allow_class = "data.frame", contains_column_class = "sir") meet_criteria(translate_ab, allow_class = c("character", "logical"), has_length = 1, allow_NA = TRUE) - language <- validate_language(language) - meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE) + meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) + meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_finite = TRUE) meet_criteria(as_percent, allow_class = "logical", has_length = 1) meet_criteria(combine_SI, allow_class = "logical", has_length = 1) meet_criteria(confidence_level, allow_class = "numeric", has_length = 1) @@ -334,7 +333,7 @@ sir_calc_df <- function(type, # "proportion", "count" or "both" res <- do.call(rbind, unname(lapply(grouped, fn, ...))) if (any(groups %in% colnames(res))) { class(res) <- c("grouped_data", class(res)) - res <- pm_groups_set(res, groups[groups %in% colnames(res)]) + res <- pm_set_groups(res, groups[groups %in% colnames(res)]) } res } @@ -356,7 +355,6 @@ sir_calc_df <- function(type, # "proportion", "count" or "both" if (data_has_groups) { # ordering by the groups and two more: "antibiotic" and "interpretation" - # (pm_ungroup here, as we do not use dplyr for summarising) out <- pm_ungroup(out[do.call("order", out[, seq_len(length(groups) + 2), drop = FALSE]), , drop = FALSE]) } else { out <- out[order(out$antibiotic, out$interpretation), , drop = FALSE] diff --git a/R/translate.R b/R/translate.R index 15483133..e7a4682e 100755 --- a/R/translate.R +++ b/R/translate.R @@ -244,10 +244,9 @@ translate_into_language <- function(from, if (NROW(df_trans) == 0 | !any_form_in_patterns) { return(from) } - + lapply( - # starting from last row, since more general translation are on top, such as 'Group' - rev(seq_len(nrow(df_trans))), + seq_len(nrow(df_trans)), function(i) { from_unique_translated <<- gsub( pattern = df_trans$pattern[i], diff --git a/R/zzz.R b/R/zzz.R index 74a69558..1cc10a1b 100755 --- a/R/zzz.R +++ b/R/zzz.R @@ -123,7 +123,6 @@ if (utf8_supported && !is_latex) { s3_register("ggplot2::autoplot", "mic") s3_register("ggplot2::autoplot", "disk") s3_register("ggplot2::autoplot", "resistance_predict") - s3_register("ggplot2::autoplot", "antibiogram") # Support for fortify from the ggplot2 package s3_register("ggplot2::fortify", "sir") s3_register("ggplot2::fortify", "mic") @@ -181,7 +180,7 @@ if (utf8_supported && !is_latex) { if (pkg_is_available("tibble", also_load = FALSE)) { try(loadNamespace("tibble"), silent = TRUE) } - + # reference data - they have additional to improve algorithm speed # they cannot be part of R/sysdata.rda since CRAN thinks it would make the package too large (+3 MB) AMR_env$AB_lookup <- cbind(AMR::antibiotics, AB_LOOKUP) diff --git a/inst/tinytest/test-antibiogram.R b/inst/tinytest/test-antibiogram.R deleted file mode 100644 index eb53d40a..00000000 --- a/inst/tinytest/test-antibiogram.R +++ /dev/null @@ -1,132 +0,0 @@ -# ==================================================================== # -# TITLE # -# AMR: An R Package for Working with Antimicrobial Resistance Data # -# # -# SOURCE # -# https://github.com/msberends/AMR # -# # -# CITE AS # -# Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C # -# (2022). AMR: An R Package for Working with Antimicrobial Resistance # -# Data. Journal of Statistical Software, 104(3), 1-31. # -# doi:10.18637/jss.v104.i03 # -# # -# Developed at the University of Groningen and the University Medical # -# Center Groningen in The Netherlands, in collaboration with many # -# colleagues from around the world, see our website. # -# # -# This R package is free software; you can freely use and distribute # -# it for both personal and commercial purposes under the terms of the # -# GNU General Public License version 2.0 (GNU GPL-2), as published by # -# the Free Software Foundation. # -# We created this package for both routine data analysis and academic # -# research and it was publicly released in the hope that it will be # -# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. # -# # -# Visit our website for the full manual and a complete tutorial about # -# how to conduct AMR data analysis: https://msberends.github.io/AMR/ # -# ==================================================================== # - - - -# Traditional antibiogram ---------------------------------------------- - -ab1 <- antibiogram(example_isolates, - antibiotics = c(aminoglycosides(), carbapenems())) - -ab2 <- antibiogram(example_isolates, - antibiotics = aminoglycosides(), - ab_transform = "atc", - mo_transform = "gramstain") - -ab3 <- antibiogram(example_isolates, - antibiotics = carbapenems(), - ab_transform = "name", - mo_transform = "name") - -expect_inherits(ab1, "antibiogram") -expect_inherits(ab2, "antibiogram") -expect_inherits(ab3, "antibiogram") -expect_equal(colnames(ab1), c("Pathogen (N min-max)", "AMK", "GEN", "IPM", "KAN", "MEM", "TOB")) -expect_equal(colnames(ab2), c("Pathogen (N min-max)", "J01GB01", "J01GB03", "J01GB04", "J01GB06")) -expect_equal(colnames(ab3), c("Pathogen (N min-max)", "Imipenem", "Meropenem")) -expect_equal(ab3$Meropenem, c(52, NA, 100, 100, NA)) - -# Combined antibiogram ------------------------------------------------- - -# combined antibiotics yield higher empiric coverage -ab4 <- antibiogram(example_isolates, - antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"), - mo_transform = "gramstain") - -ab5 <- antibiogram(example_isolates, - antibiotics = c("TZP", "TZP+TOB"), - mo_transform = "gramstain", - ab_transform = "name", - sep = " & ", - add_total_n = FALSE) - -expect_inherits(ab4, "antibiogram") -expect_inherits(ab5, "antibiogram") -expect_equal(colnames(ab4), c("Pathogen (N min-max)", "TZP", "TZP + GEN", "TZP + TOB")) -expect_equal(colnames(ab5), c("Pathogen", "Piperacillin/tazobactam", "Piperacillin/tazobactam & Tobramycin")) - -# Syndromic antibiogram ------------------------------------------------ - -# the data set could contain a filter for e.g. respiratory specimens -ab6 <- antibiogram(example_isolates, - antibiotics = c(aminoglycosides(), carbapenems()), - syndromic_group = "ward") - -# with a custom language, though this will be determined automatically -# (i.e., this table will be in Spanish on Spanish systems) -ex1 <- example_isolates[which(mo_genus() == "Escherichia"), ] -ab7 <- antibiogram(ex1, - antibiotics = aminoglycosides(), - ab_transform = "name", - syndromic_group = ifelse(ex1$ward == "ICU", - "UCI", "No UCI"), - language = "es") - -expect_inherits(ab6, "antibiogram") -expect_inherits(ab7, "antibiogram") -expect_equal(colnames(ab6), c("Syndromic Group", "Pathogen (N min-max)", "AMK", "GEN", "IPM", "KAN", "MEM", "TOB")) -expect_equal(colnames(ab7), c("Grupo sindrómico", "Patógeno (N min-max)", "Amikacina", "Gentamicina", "Tobramicina")) - -# Weighted-incidence syndromic combination antibiogram (WISCA) --------- - -# the data set could contain a filter for e.g. respiratory specimens -ab8 <- antibiogram(example_isolates, - antibiotics = c("AMC", "AMC+CIP", "TZP", "TZP+TOB"), - mo_transform = "gramstain", - minimum = 10, # this should be >= 30, but now just as example - syndromic_group = ifelse(example_isolates$age >= 65 & - example_isolates$gender == "M", - "WISCA Group 1", "WISCA Group 2")) - -expect_inherits(ab8, "antibiogram") -expect_equal(colnames(ab8), c("Syndromic Group", "Pathogen (N min-max)", "AMC", "AMC + CIP", "TZP", "TZP + TOB")) - -# Generate plots with ggplot2 or base R -------------------------------- - -pdf(NULL) # prevent Rplots.pdf being created - -expect_silent(plot(ab1)) -expect_silent(plot(ab2)) -expect_silent(plot(ab3)) -expect_silent(plot(ab4)) -expect_silent(plot(ab5)) -expect_silent(plot(ab6)) -expect_silent(plot(ab7)) -expect_silent(plot(ab8)) - -if (AMR:::pkg_is_available("ggplot2")) { - expect_inherits(autoplot(ab1), "gg") - expect_inherits(autoplot(ab2), "gg") - expect_inherits(autoplot(ab3), "gg") - expect_inherits(autoplot(ab4), "gg") - expect_inherits(autoplot(ab5), "gg") - expect_inherits(autoplot(ab6), "gg") - expect_inherits(autoplot(ab7), "gg") - expect_inherits(autoplot(ab8), "gg") -} diff --git a/inst/tinytest/test-first_isolate.R b/inst/tinytest/test-first_isolate.R index 4a77ec01..7cc03773 100755 --- a/inst/tinytest/test-first_isolate.R +++ b/inst/tinytest/test-first_isolate.R @@ -228,7 +228,7 @@ expect_identical( # notice that all mo's are distinct, so all are TRUE -expect_true(all(first_isolate(AMR:::distinct(example_isolates, mo, .keep_all = TRUE), info = TRUE) == TRUE)) +expect_true(all(first_isolate(AMR:::pm_distinct(example_isolates, mo, .keep_all = TRUE), info = TRUE) == TRUE)) # only one isolate, so return fast expect_true(first_isolate(data.frame(mo = "Escherichia coli", date = Sys.Date(), patient = "patient"), info = TRUE)) diff --git a/inst/tinytest/test-zzz.R b/inst/tinytest/test-zzz.R index a951e12a..1d850428 100644 --- a/inst/tinytest/test-zzz.R +++ b/inst/tinytest/test-zzz.R @@ -32,21 +32,11 @@ # functions used by import_fn() import_functions <- c( - "%>%" = "dplyr", "%chin%" = "data.table", - "across" = "dplyr", "anti_join" = "dplyr", - "arrange" = "dplyr", - "bind_rows" = "dplyr", "chmatch" = "data.table", - "count" = "dplyr", "cur_column" = "dplyr", - "desc" = "dplyr", - "distinct" = "dplyr", - "everything" = "dplyr", "full_join" = "dplyr", - "group_by" = "dplyr", - "group_vars" = "dplyr", "has_internet" = "curl", "html_attr" = "rvest", "html_children" = "rvest", @@ -56,24 +46,13 @@ import_functions <- c( "html_text" = "rvest", "inner_join" = "dplyr", "insertText" = "rstudioapi", - "kable" = "knitr", - "lag" = "dplyr", "left_join" = "dplyr", - "mutate" = "dplyr", - "n_distinct" = "dplyr", "new_pillar_shaft_simple" = "pillar", - "pivot_longer" = "tidyr", "progress_bar" = "progress", - "pull" = "dplyr", "read_html" = "xml2", - "rename" = "dplyr", "right_join" = "dplyr", - "select" = "dplyr", "semi_join" = "dplyr", - "showQuestion" = "rstudioapi", - "summarise" = "dplyr", - "ungroup" = "dplyr", - "where" = "dplyr" + "showQuestion" = "rstudioapi" ) # functions that are called directly with :: @@ -92,7 +71,6 @@ call_functions <- c( "element_text" = "ggplot2", "expand_limits" = "ggplot2", "facet_wrap" = "ggplot2", - "geom_col" = "ggplot2", "geom_errorbar" = "ggplot2", "geom_path" = "ggplot2", "geom_point" = "ggplot2", @@ -137,7 +115,7 @@ for (i in seq_len(length(import_functions))) { # function should exist in foreign pkg namespace if (AMR:::pkg_is_available(pkg, also_load = FALSE, - min_version = if (pkg %in% c("dplyr", "tidyr")) "1.0.0" else NULL + min_version = if (pkg == "dplyr") "1.0.0" else NULL )) { tst <- !is.null(AMR:::import_fn(name = fn, pkg = pkg, error_on_fail = FALSE)) expect_true(tst, diff --git a/man/ab_property.Rd b/man/ab_property.Rd index 9868eea5..d115aa35 100644 --- a/man/ab_property.Rd +++ b/man/ab_property.Rd @@ -163,7 +163,8 @@ if (require("dplyr")) { set_ab_names(property = "atc") example_isolates \%>\% - set_ab_names(where(is.sir)) + set_ab_names(where(is.sir)) \%>\% + colnames() example_isolates \%>\% set_ab_names(NIT:VAN) \%>\% diff --git a/man/antibiogram.Rd b/man/antibiogram.Rd deleted file mode 100644 index 36f122e9..00000000 --- a/man/antibiogram.Rd +++ /dev/null @@ -1,234 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/antibiogram.R -\name{antibiogram} -\alias{antibiogram} -\alias{plot.antibiogram} -\alias{autoplot.antibiogram} -\alias{print.antibiogram} -\title{Generate Antibiogram: Traditional, Combined, Syndromic, or Weighted-Incidence Syndromic Combination (WISCA)} -\source{ -\itemize{ -\item Klinker KP \emph{et al.} (2021). \strong{Antimicrobial stewardship and antibiograms: importance of moving beyond traditional antibiograms}. \emph{Therapeutic Advances in Infectious Disease}, May 5;8:20499361211011373; \doi{10.1177/20499361211011373} -\item Barbieri E \emph{et al.} (2021). \strong{Development of a Weighted-Incidence Syndromic Combination Antibiogram (WISCA) to guide the choice of the empiric antibiotic treatment for urinary tract infection in paediatric patients: a Bayesian approach} \emph{Antimicrobial Resistance & Infection Control} May 1;10(1):74; \doi{10.1186/s13756-021-00939-2} -\item \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 5th Edition}, 2022, \emph{Clinical and Laboratory Standards Institute (CLSI)}. \url{https://clsi.org/standards/products/microbiology/documents/m39/}. -} -} -\usage{ -antibiogram( - x, - antibiotics = where(is.sir), - mo_transform = "shortname", - ab_transform = NULL, - syndromic_group = NULL, - add_total_n = TRUE, - only_all_tested = FALSE, - digits = 0, - col_mo = NULL, - language = get_AMR_locale(), - minimum = 30, - combine_SI = TRUE, - sep = " + " -) - -\method{plot}{antibiogram}(x, ...) - -\method{autoplot}{antibiogram}(object, ...) - -\method{print}{antibiogram}(x, as_kable = !interactive(), ...) -} -\arguments{ -\item{x}{a \link{data.frame} containing at least a column with microorganisms and columns with antibiotic results (class 'sir', see \code{\link[=as.sir]{as.sir()}})} - -\item{antibiotics}{vector of column names, or (any combinations of) \link[=antibiotic_class_selectors]{antibiotic selectors} such as \code{\link[=aminoglycosides]{aminoglycosides()}} or \code{\link[=carbapenems]{carbapenems()}}. For combination antibiograms, this can also be column names separated with \code{"+"}, such as "TZP+TOB" given that the data set contains columns "TZP" and "TOB". See \emph{Examples}.} - -\item{mo_transform}{a character to transform microorganism input - must be "name", "shortname", "gramstain", or one of the column names of the \link{microorganisms} data set: "mo", "fullname", "status", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "rank", "ref", "source", "lpsn", "lpsn_parent", "lpsn_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence" or "snomed". Can also be \code{NULL} to not transform the input.} - -\item{ab_transform}{a character to transform antibiotic input - must be one of the column names of the \link{antibiotics} data set: "ab", "cid", "name", "group", "atc", "atc_group1", "atc_group2", "abbreviations", "synonyms", "oral_ddd", "oral_units", "iv_ddd", "iv_units" or "loinc". Can also be \code{NULL} to not transform the input.} - -\item{syndromic_group}{a column name of \code{x}, or values calculated to split rows of \code{x}, e.g. by using \code{\link[=ifelse]{ifelse()}} or \code{\link[dplyr:case_when]{case_when()}}. See \emph{Examples}.} - -\item{add_total_n}{a \link{logical} to indicate whether total available numbers per pathogen should be added to the table (defaults to \code{TRUE}). This will add the lowest and highest number of available isolate per antibiotic (e.g, if for \emph{E. coli} 200 isolates are available for ciprofloxacin and 150 for amoxicillin, the returned number will be "150-200").} - -\item{only_all_tested}{(for combination antibiograms): a \link{logical} to indicate that isolates must be tested for all antibiotics, see \emph{Details}} - -\item{digits}{number of digits to use for rounding} - -\item{col_mo}{column name of the names or codes of the microorganisms (see \code{\link[=as.mo]{as.mo()}}), defaults to the first column of class \code{\link{mo}}. Values will be coerced using \code{\link[=as.mo]{as.mo()}}.} - -\item{language}{language to translate text, which defaults to the system language (see \code{\link[=get_AMR_locale]{get_AMR_locale()}})} - -\item{minimum}{the minimum allowed number of available (tested) isolates. Any isolate count lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see \emph{Source}.} - -\item{combine_SI}{a \link{logical} to indicate whether all susceptibility should be determined by results of either S or I, instead of only S (defaults to \code{TRUE})} - -\item{sep}{a separating character for antibiotic columns in combination antibiograms} - -\item{...}{method extensions} - -\item{object}{an \code{\link[=antibiogram]{antibiogram()}} object} - -\item{as_kable}{a \link{logical} to indicate whether the printing should be done using \code{\link[knitr:kable]{knitr::kable()}} (which is the default in non-interactive sessions)} -} -\description{ -Generate an antibiogram, and communicate the results in plots or tables. These functions follow the logic of Klinker \emph{et al.} (2021, \doi{10.1177/20499361211011373}) and Barbieri \emph{et al.} (2021, \doi{10.1186/s13756-021-00939-2}), and allow reporting in e.g. R Markdown and Quarto as well. -} -\details{ -This function returns a table with values between 0 and 100 for \emph{susceptibility}, not resistance. - -\strong{Remember that you should filter your data to let it contain only first isolates!} This is needed to exclude duplicates and to reduce selection bias. Use \code{\link[=first_isolate]{first_isolate()}} to determine them in your data set with one of the four available algorithms. - -There are four antibiogram types, as proposed by Klinker \emph{et al.} (2021, \doi{10.1177/20499361211011373}), and they are all supported by \code{\link[=antibiogram]{antibiogram()}}: -\enumerate{ -\item \strong{Traditional Antibiogram} - -Case example: Susceptibility of \emph{Pseudomonas aeruginosa} to piperacillin/tazobactam (TZP) - -Code example: - -\if{html}{\out{
}}\preformatted{antibiogram(your_data, - antibiotics = "TZP") -}\if{html}{\out{
}} -\item \strong{Combination Antibiogram} - -Case example: Additional susceptibility of \emph{Pseudomonas aeruginosa} to TZP + tobramycin versus TZP alone - -Code example: - -\if{html}{\out{
}}\preformatted{antibiogram(your_data, - antibiotics = c("TZP", "TZP+TOB", "TZP+GEN")) -}\if{html}{\out{
}} -\item \strong{Syndromic Antibiogram} - -Case example: Susceptibility of \emph{Pseudomonas aeruginosa} to TZP among respiratory specimens (obtained among ICU patients only) - -Code example: - -\if{html}{\out{
}}\preformatted{antibiogram(your_data, - antibiotics = penicillins(), - syndromic_group = "ward") -}\if{html}{\out{
}} -\item \strong{Weighted-Incidence Syndromic Combination Antibiogram (WISCA)} - -Case example: Susceptibility of \emph{Pseudomonas aeruginosa} to TZP among respiratory specimens (obtained among ICU patients only) for male patients age >=65 years with heart failure - -Code example: - -\if{html}{\out{
}}\preformatted{antibiogram(your_data, - antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"), - syndromic_group = ifelse(your_data$age >= 65 & your_data$gender == "Male", - "Group 1", "Group 2")) -}\if{html}{\out{
}} -} - -All types of antibiograms can be generated with the functions as described on this page, and can be plotted (using \code{\link[ggplot2:autoplot]{ggplot2::autoplot()}} or base \R \code{\link[=plot]{plot()}}/\code{\link[=barplot]{barplot()}}) or printed into R Markdown / Quarto formats for reports. Use functions from specific 'table reporting' packages to transform the output of \code{\link[=antibiogram]{antibiogram()}} to your needs, e.g. \code{flextable::as_flextable()} or \code{gt::gt()}. - -Note that for combination antibiograms, it is important to realise that susceptibility can be calculated in two ways, which can be set with the \code{only_all_tested} argument (defaults to \code{FALSE}). See this example for two antibiotics, Drug A and Drug B, about how \code{\link[=antibiogram]{antibiogram()}} works to calculate the \%SI: - -\if{html}{\out{
}}\preformatted{-------------------------------------------------------------------- - only_all_tested = FALSE only_all_tested = TRUE - ----------------------- ----------------------- - Drug A Drug B include as include as include as include as - numerator denominator numerator denominator --------- -------- ---------- ----------- ---------- ----------- - S or I S or I X X X X - R S or I X X X X - S or I X X - - - S or I R X X X X - R R - X - X - R - - - - - S or I X X - - - R - - - - - - - - - --------------------------------------------------------------------- -}\if{html}{\out{
}} - -Printing the antibiogram in non-interactive sessions will be done by \code{\link[knitr:kable]{knitr::kable()}}, with support for \link[knitr:kable]{all their implemented formats}, such as "markdown". The knitr format will be automatically determined if printed inside a knitr document (LaTeX, HTML, etc.). -} -\examples{ -# example_isolates is a data set available in the AMR package. -# run ?example_isolates for more info. -example_isolates - - -# Traditional antibiogram ---------------------------------------------- - -antibiogram(example_isolates, - antibiotics = c(aminoglycosides(), carbapenems())) - -antibiogram(example_isolates, - antibiotics = aminoglycosides(), - ab_transform = "atc", - mo_transform = "gramstain") - -antibiogram(example_isolates, - antibiotics = carbapenems(), - ab_transform = "name", - mo_transform = "name") - - -# Combined antibiogram ------------------------------------------------- - -# combined antibiotics yield higher empiric coverage -antibiogram(example_isolates, - antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"), - mo_transform = "gramstain") - -antibiogram(example_isolates, - antibiotics = c("TZP", "TZP+TOB"), - mo_transform = "gramstain", - ab_transform = "name", - sep = " & ") - - -# Syndromic antibiogram ------------------------------------------------ - -# the data set could contain a filter for e.g. respiratory specimens -antibiogram(example_isolates, - antibiotics = c(aminoglycosides(), carbapenems()), - syndromic_group = "ward") - -# with a custom language, though this will be determined automatically -# (i.e., this table will be in Spanish on Spanish systems) -ex1 <- example_isolates[which(mo_genus() == "Escherichia"), ] -antibiogram(ex1, - antibiotics = aminoglycosides(), - ab_transform = "name", - syndromic_group = ifelse(ex1$ward == "ICU", - "UCI", "No UCI"), - language = "es") - - -# Weighted-incidence syndromic combination antibiogram (WISCA) --------- - -# the data set could contain a filter for e.g. respiratory specimens -antibiogram(example_isolates, - antibiotics = c("AMC", "AMC+CIP", "TZP", "TZP+TOB"), - mo_transform = "gramstain", - minimum = 10, # this should be >= 30, but now just as example - syndromic_group = ifelse(example_isolates$age >= 65 & - example_isolates$gender == "M", - "WISCA Group 1", "WISCA Group 2")) - - -# Generate plots with ggplot2 or base R -------------------------------- - -ab1 <- antibiogram(example_isolates, - antibiotics = c("AMC", "CIP", "TZP", "TZP+TOB"), - mo_transform = "gramstain") -ab2 <- antibiogram(example_isolates, - antibiotics = c("AMC", "CIP", "TZP", "TZP+TOB"), - mo_transform = "gramstain", - syndromic_group = "ward") - -plot(ab1) - -if (requireNamespace("ggplot2")) { - ggplot2::autoplot(ab1) -} - -plot(ab2) - -if (requireNamespace("ggplot2")) { - ggplot2::autoplot(ab2) -} -} diff --git a/man/antibiotic_class_selectors.Rd b/man/antibiotic_class_selectors.Rd index 35b65574..63789862 100644 --- a/man/antibiotic_class_selectors.Rd +++ b/man/antibiotic_class_selectors.Rd @@ -110,7 +110,7 @@ not_intrinsic_resistant( \item{filter}{an \link{expression} to be evaluated in the \link{antibiotics} data set, such as \code{name \%like\% "trim"}} -\item{col_mo}{column name of the names or codes of the microorganisms (see \code{\link[=as.mo]{as.mo()}}), defaults to the first column of class \code{\link{mo}}. Values will be coerced using \code{\link[=as.mo]{as.mo()}}.} +\item{col_mo}{column name of the IDs of the microorganisms (see \code{\link[=as.mo]{as.mo()}}), defaults to the first column of class \code{\link{mo}}. Values will be coerced using \code{\link[=as.mo]{as.mo()}}.} \item{version_expertrules}{the version number to use for the EUCAST Expert Rules and Intrinsic Resistance guideline. Can be either "3.3", "3.2" or "3.1".} } diff --git a/man/as.sir.Rd b/man/as.sir.Rd index c9059e5c..92a88299 100644 --- a/man/as.sir.Rd +++ b/man/as.sir.Rd @@ -94,7 +94,7 @@ sir_interpretation_history(clean = FALSE) \item{include_PKPD}{a \link{logical} to indicate that PK/PD clinical breakpoints must be applied as a last resort, defaults to \code{TRUE}. Can also be set with the option \code{\link[=AMR-options]{AMR_include_PKPD}}.} -\item{col_mo}{column name of the names or codes of the microorganisms (see \code{\link[=as.mo]{as.mo()}}), defaults to the first column of class \code{\link{mo}}. Values will be coerced using \code{\link[=as.mo]{as.mo()}}.} +\item{col_mo}{column name of the IDs of the microorganisms (see \code{\link[=as.mo]{as.mo()}}), defaults to the first column of class \code{\link{mo}}. Values will be coerced using \code{\link[=as.mo]{as.mo()}}.} \item{clean}{a \link{logical} to indicate whether previously stored results should be forgotten after returning the 'logbook' with results} } @@ -156,7 +156,7 @@ After using \code{\link[=as.sir]{as.sir()}}, you can use the \code{\link[=eucast \subsection{Machine-Readable Interpretation Guidelines}{ -The repository of this package \href{https://github.com/msberends/AMR/blob/main/data-raw/clinical_breakpoints.txt}{contains a machine-readable version} of all guidelines. This is a CSV file consisting of 18 308 rows and 11 columns. This file is machine-readable, since it contains one row for every unique combination of the test method (MIC or disk diffusion), the antimicrobial drug and the microorganism. \strong{This allows for easy implementation of these rules in laboratory information systems (LIS)}. Note that it only contains interpretation guidelines for humans - interpretation guidelines from CLSI for animals were removed. +The repository of this package \href{https://github.com/msberends/AMR/blob/main/data-raw/clinical_breakpoints.txt}{contains a machine-readable version} of all guidelines. This is a CSV file consisting of 18,308 rows and 11 columns. This file is machine-readable, since it contains one row for every unique combination of the test method (MIC or disk diffusion), the antimicrobial drug and the microorganism. \strong{This allows for easy implementation of these rules in laboratory information systems (LIS)}. Note that it only contains interpretation guidelines for humans - interpretation guidelines from CLSI for animals were removed. } \subsection{Other}{ diff --git a/man/bug_drug_combinations.Rd b/man/bug_drug_combinations.Rd index 1d9b0d4d..7374195f 100644 --- a/man/bug_drug_combinations.Rd +++ b/man/bug_drug_combinations.Rd @@ -16,14 +16,14 @@ bug_drug_combinations(x, col_mo = NULL, FUN = mo_shortname, ...) add_ab_group = TRUE, remove_intrinsic_resistant = FALSE, decimal.mark = getOption("OutDec"), - big.mark = ifelse(decimal.mark == ",", " ", ","), + big.mark = ifelse(decimal.mark == ",", ".", ","), ... ) } \arguments{ \item{x}{a data set with antibiotic columns, such as \code{amox}, \code{AMX} and \code{AMC}} -\item{col_mo}{column name of the names or codes of the microorganisms (see \code{\link[=as.mo]{as.mo()}}), defaults to the first column of class \code{\link{mo}}. Values will be coerced using \code{\link[=as.mo]{as.mo()}}.} +\item{col_mo}{column name of the IDs of the microorganisms (see \code{\link[=as.mo]{as.mo()}}), defaults to the first column of class \code{\link{mo}}. Values will be coerced using \code{\link[=as.mo]{as.mo()}}.} \item{FUN}{the function to call on the \code{mo} column to transform the microorganism codes, defaults to \code{\link[=mo_shortname]{mo_shortname()}}} @@ -59,11 +59,8 @@ The function \code{\link[=format]{format()}} calculates the resistance per bug-d } \examples{ \donttest{ -#' # example_isolates is a data set available in the AMR package. -# run ?example_isolates for more info. -example_isolates - x <- bug_drug_combinations(example_isolates) +head(x) format(x, translate_ab = "name (atc)") # Use FUN to change to transformation of microorganism codes diff --git a/man/clinical_breakpoints.Rd b/man/clinical_breakpoints.Rd index 3fcd1c2e..63c80045 100644 --- a/man/clinical_breakpoints.Rd +++ b/man/clinical_breakpoints.Rd @@ -5,7 +5,7 @@ \alias{clinical_breakpoints} \title{Data Set with Clinical Breakpoints for SIR Interpretation} \format{ -A \link[tibble:tibble]{tibble} with 18 308 observations and 11 variables: +A \link[tibble:tibble]{tibble} with 18,308 observations and 11 variables: \itemize{ \item \code{guideline}\cr Name of the guideline \item \code{method}\cr Either "DISK" or "MIC" diff --git a/man/eucast_rules.Rd b/man/eucast_rules.Rd index a0a6d533..c44b32ff 100644 --- a/man/eucast_rules.Rd +++ b/man/eucast_rules.Rd @@ -38,7 +38,7 @@ eucast_dosage(ab, administration = "iv", version_breakpoints = 12) \arguments{ \item{x}{a data set with antibiotic columns, such as \code{amox}, \code{AMX} and \code{AMC}} -\item{col_mo}{column name of the names or codes of the microorganisms (see \code{\link[=as.mo]{as.mo()}}), defaults to the first column of class \code{\link{mo}}. Values will be coerced using \code{\link[=as.mo]{as.mo()}}.} +\item{col_mo}{column name of the IDs of the microorganisms (see \code{\link[=as.mo]{as.mo()}}), defaults to the first column of class \code{\link{mo}}. Values will be coerced using \code{\link[=as.mo]{as.mo()}}.} \item{info}{a \link{logical} to indicate whether progress should be printed to the console, defaults to only print while in interactive sessions} diff --git a/man/example_isolates.Rd b/man/example_isolates.Rd index 8445c54b..4e6e59b9 100644 --- a/man/example_isolates.Rd +++ b/man/example_isolates.Rd @@ -3,9 +3,9 @@ \docType{data} \name{example_isolates} \alias{example_isolates} -\title{Data Set with 2 000 Example Isolates} +\title{Data Set with 2,000 Example Isolates} \format{ -A \link[tibble:tibble]{tibble} with 2 000 observations and 46 variables: +A \link[tibble:tibble]{tibble} with 2,000 observations and 46 variables: \itemize{ \item \code{date}\cr Date of receipt at the laboratory \item \code{patient}\cr ID of the patient @@ -20,7 +20,7 @@ A \link[tibble:tibble]{tibble} with 2 000 observations and 46 variables: example_isolates } \description{ -A data set containing 2 000 microbial isolates with their full antibiograms. This data set contains randomised fictitious data, but reflects reality and can be used to practise AMR data analysis. For examples, please read \href{https://msberends.github.io/AMR/articles/AMR.html}{the tutorial on our website}. +A data set containing 2,000 microbial isolates with their full antibiograms. This data set contains randomised fictitious data, but reflects reality and can be used to practise AMR data analysis. For examples, please read \href{https://msberends.github.io/AMR/articles/AMR.html}{the tutorial on our website}. } \details{ Like all data sets in this package, this data set is publicly available for download in the following formats: R, MS Excel, Apache Feather, Apache Parquet, SPSS, SAS, and Stata. Please visit \href{https://msberends.github.io/AMR/articles/datasets.html}{our website for the download links}. The actual files are of course available on \href{https://github.com/msberends/AMR/tree/main/data-raw}{our GitHub repository}. diff --git a/man/example_isolates_unclean.Rd b/man/example_isolates_unclean.Rd index 37bcced6..0c0b2477 100644 --- a/man/example_isolates_unclean.Rd +++ b/man/example_isolates_unclean.Rd @@ -5,7 +5,7 @@ \alias{example_isolates_unclean} \title{Data Set with Unclean Data} \format{ -A \link[tibble:tibble]{tibble} with 3 000 observations and 8 variables: +A \link[tibble:tibble]{tibble} with 3,000 observations and 8 variables: \itemize{ \item \code{patient_id}\cr ID of the patient \item \code{date}\cr date of receipt at the laboratory @@ -18,7 +18,7 @@ A \link[tibble:tibble]{tibble} with 3 000 observations and 8 variables: example_isolates_unclean } \description{ -A data set containing 3 000 microbial isolates that are not cleaned up and consequently not ready for AMR data analysis. This data set can be used for practice. +A data set containing 3,000 microbial isolates that are not cleaned up and consequently not ready for AMR data analysis. This data set can be used for practice. } \details{ Like all data sets in this package, this data set is publicly available for download in the following formats: R, MS Excel, Apache Feather, Apache Parquet, SPSS, SAS, and Stata. Please visit \href{https://msberends.github.io/AMR/articles/datasets.html}{our website for the download links}. The actual files are of course available on \href{https://github.com/msberends/AMR/tree/main/data-raw}{our GitHub repository}. diff --git a/man/first_isolate.Rd b/man/first_isolate.Rd index 70a1000e..8069d151 100644 --- a/man/first_isolate.Rd +++ b/man/first_isolate.Rd @@ -52,7 +52,7 @@ filter_first_isolate( \item{col_patient_id}{column name of the unique IDs of the patients, defaults to the first column that starts with 'patient' or 'patid' (case insensitive)} -\item{col_mo}{column name of the names or codes of the microorganisms (see \code{\link[=as.mo]{as.mo()}}), defaults to the first column of class \code{\link{mo}}. Values will be coerced using \code{\link[=as.mo]{as.mo()}}.} +\item{col_mo}{column name of the IDs of the microorganisms (see \code{\link[=as.mo]{as.mo()}}), defaults to the first column of class \code{\link{mo}}. Values will be coerced using \code{\link[=as.mo]{as.mo()}}.} \item{col_testcode}{column name of the test codes. Use \code{col_testcode = NULL} to \strong{not} exclude certain test codes (such as test codes for screening). In that case \code{testcodes_exclude} will be ignored.} @@ -109,14 +109,17 @@ All mentioned methods are covered in the \code{\link[=first_isolate]{first_isola \strong{Isolate-based} \tab \code{first_isolate(x, method = "isolate-based")} \cr \emph{(= all isolates)} \tab \cr \tab \cr + \tab \cr \strong{Patient-based} \tab \code{first_isolate(x, method = "patient-based")} \cr \emph{(= first isolate per patient)} \tab \cr \tab \cr + \tab \cr \strong{Episode-based} \tab \code{first_isolate(x, method = "episode-based")}, or: \cr \emph{(= first isolate per episode)} \tab \cr - 7-Day interval from initial isolate \tab - \code{first_isolate(x, method = "e", episode_days = 7)} \cr - 30-Day interval from initial isolate \tab - \code{first_isolate(x, method = "e", episode_days = 30)} \cr \tab \cr + \tab \cr \strong{Phenotype-based} \tab \code{first_isolate(x, method = "phenotype-based")}, or: \cr \emph{(= first isolate per phenotype)} \tab \cr - Major difference in any antimicrobial result \tab - \code{first_isolate(x, type = "points")} \cr @@ -165,7 +168,7 @@ The default method is phenotype-based (using \code{type = "points"}) and episode # `example_isolates` is a data set available in the AMR package. # See ?example_isolates. -example_isolates[first_isolate(info = TRUE), ] +example_isolates[first_isolate(), ] \donttest{ # get all first Gram-negatives example_isolates[which(first_isolate(info = FALSE) & mo_is_gram_negative()), ] @@ -173,7 +176,7 @@ example_isolates[which(first_isolate(info = FALSE) & mo_is_gram_negative()), ] if (require("dplyr")) { # filter on first isolates using dplyr: example_isolates \%>\% - filter(first_isolate(info = TRUE)) + filter(first_isolate()) } if (require("dplyr")) { # short-hand version: @@ -184,7 +187,7 @@ if (require("dplyr")) { # flag the first isolates per group: example_isolates \%>\% group_by(ward) \%>\% - mutate(first = first_isolate(info = FALSE)) \%>\% + mutate(first = first_isolate()) \%>\% select(ward, date, patient, mo, first) } } diff --git a/man/intrinsic_resistant.Rd b/man/intrinsic_resistant.Rd index 0d7797a0..58bf2fa6 100644 --- a/man/intrinsic_resistant.Rd +++ b/man/intrinsic_resistant.Rd @@ -5,7 +5,7 @@ \alias{intrinsic_resistant} \title{Data Set with Bacterial Intrinsic Resistance} \format{ -A \link[tibble:tibble]{tibble} with 134 634 observations and 2 variables: +A \link[tibble:tibble]{tibble} with 134,634 observations and 2 variables: \itemize{ \item \code{mo}\cr Microorganism ID \item \code{ab}\cr Antibiotic ID diff --git a/man/key_antimicrobials.Rd b/man/key_antimicrobials.Rd index 814ff4e4..2fe3c999 100644 --- a/man/key_antimicrobials.Rd +++ b/man/key_antimicrobials.Rd @@ -35,7 +35,7 @@ antimicrobials_equal( \arguments{ \item{x}{a \link{data.frame} with antibiotics columns, like \code{AMX} or \code{amox}. Can be left blank to determine automatically} -\item{col_mo}{column name of the names or codes of the microorganisms (see \code{\link[=as.mo]{as.mo()}}), defaults to the first column of class \code{\link{mo}}. Values will be coerced using \code{\link[=as.mo]{as.mo()}}.} +\item{col_mo}{column name of the IDs of the microorganisms (see \code{\link[=as.mo]{as.mo()}}), defaults to the first column of class \code{\link{mo}}. Values will be coerced using \code{\link[=as.mo]{as.mo()}}.} \item{universal}{names of \strong{broad-spectrum} antimicrobial drugs, case-insensitive. Set to \code{NULL} to ignore. See \emph{Details} for the default antimicrobial drugs} diff --git a/man/mdro.Rd b/man/mdro.Rd index 0720f6d7..ce862310 100644 --- a/man/mdro.Rd +++ b/man/mdro.Rd @@ -48,7 +48,7 @@ eucast_exceptional_phenotypes(x = NULL, only_sir_columns = FALSE, ...) \item{guideline}{a specific guideline to follow, see sections \emph{Supported international / national guidelines} and \emph{Using Custom Guidelines} below. When left empty, the publication by Magiorakos \emph{et al.} (see below) will be followed.} -\item{col_mo}{column name of the names or codes of the microorganisms (see \code{\link[=as.mo]{as.mo()}}), defaults to the first column of class \code{\link{mo}}. Values will be coerced using \code{\link[=as.mo]{as.mo()}}.} +\item{col_mo}{column name of the IDs of the microorganisms (see \code{\link[=as.mo]{as.mo()}}), defaults to the first column of class \code{\link{mo}}. Values will be coerced using \code{\link[=as.mo]{as.mo()}}.} \item{info}{a \link{logical} to indicate whether progress should be printed to the console, defaults to only print while in interactive sessions} diff --git a/man/microorganisms.Rd b/man/microorganisms.Rd index 0d6929f2..c7cacf77 100644 --- a/man/microorganisms.Rd +++ b/man/microorganisms.Rd @@ -3,9 +3,9 @@ \docType{data} \name{microorganisms} \alias{microorganisms} -\title{Data Set with 52 142 Microorganisms} +\title{Data Set with 52,142 Microorganisms} \format{ -A \link[tibble:tibble]{tibble} with 52 142 observations and 22 variables: +A \link[tibble:tibble]{tibble} with 52,142 observations and 22 variables: \itemize{ \item \code{mo}\cr ID of microorganism as used by this package \item \code{fullname}\cr Full name, like \code{"Escherichia coli"}. For the taxonomic ranks genus, species and subspecies, this is the 'pasted' text of genus, species, and subspecies. For all taxonomic ranks higher than genus, this is the name of the taxon. diff --git a/man/microorganisms.codes.Rd b/man/microorganisms.codes.Rd index 6c4c6ff6..97578416 100644 --- a/man/microorganisms.codes.Rd +++ b/man/microorganisms.codes.Rd @@ -3,9 +3,9 @@ \docType{data} \name{microorganisms.codes} \alias{microorganisms.codes} -\title{Data Set with 5 910 Common Microorganism Codes} +\title{Data Set with 5,910 Common Microorganism Codes} \format{ -A \link[tibble:tibble]{tibble} with 5 910 observations and 2 variables: +A \link[tibble:tibble]{tibble} with 5,910 observations and 2 variables: \itemize{ \item \code{code}\cr Commonly used code of a microorganism \item \code{mo}\cr ID of the microorganism in the \link{microorganisms} data set diff --git a/man/mo_property.Rd b/man/mo_property.Rd index 03e85534..d93445e7 100644 --- a/man/mo_property.Rd +++ b/man/mo_property.Rd @@ -278,7 +278,7 @@ mo_property( \item{open}{browse the URL using \code{\link[utils:browseURL]{browseURL()}}} -\item{property}{one of the column names of the \link{microorganisms} data set: "mo", "fullname", "status", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "rank", "ref", "source", "lpsn", "lpsn_parent", "lpsn_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence" or "snomed"} +\item{property}{one of the column names of the \link{microorganisms} data set: "mo", "fullname", "status", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "rank", "ref", "source", "lpsn", "lpsn_parent", "lpsn_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence" or "snomed", or must be \code{"shortname"}} } \value{ \itemize{ diff --git a/man/proportion.Rd b/man/proportion.Rd index 38c3b081..521a5d45 100644 --- a/man/proportion.Rd +++ b/man/proportion.Rd @@ -13,7 +13,7 @@ \alias{proportion_S} \alias{proportion_df} \alias{sir_df} -\title{Calculate Antimicrobial Resistance} +\title{Calculate Microbial Resistance} \source{ \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 5th Edition}, 2022, \emph{Clinical and Laboratory Standards Institute (CLSI)}. \url{https://clsi.org/standards/products/microbiology/documents/m39/}. } @@ -98,7 +98,7 @@ The function \code{\link[=resistance]{resistance()}} is equal to the function \c Use \code{\link[=sir_confidence_interval]{sir_confidence_interval()}} to calculate the confidence interval, which relies on \code{\link[=binom.test]{binom.test()}}, i.e., the Clopper-Pearson method. This function returns a vector of length 2 at default for antimicrobial \emph{resistance}. Change the \code{side} argument to "left"/"min" or "right"/"max" to return a single value, and change the \code{ab_result} argument to e.g. \code{c("S", "I")} to test for antimicrobial \emph{susceptibility}, see Examples. -\strong{Remember that you should filter your data to let it contain only first isolates!} This is needed to exclude duplicates and to reduce selection bias. Use \code{\link[=first_isolate]{first_isolate()}} to determine them in your data set with one of the four available algorithms. +\strong{Remember that you should filter your data to let it contain only first isolates!} This is needed to exclude duplicates and to reduce selection bias. Use \code{\link[=first_isolate]{first_isolate()}} to determine them in your data set. These functions are not meant to count isolates, but to calculate the proportion of resistance/susceptibility. Use the \code{\link[=count]{count()}} functions to count isolates. The function \code{\link[=susceptibility]{susceptibility()}} is essentially equal to \code{count_susceptible() / count_all()}. \emph{Low counts can influence the outcome - the \code{proportion} functions may camouflage this, since they only return the proportion (albeit being dependent on the \code{minimum} argument).} @@ -162,7 +162,6 @@ This AMR package honours this insight. Use \code{\link[=susceptibility]{suscepti \examples{ # example_isolates is a data set available in the AMR package. # run ?example_isolates for more info. -example_isolates # base R ------------------------------------------------------------ # determines \%R diff --git a/tests/tinytest.R b/tests/tinytest.R index 2f0bf5ea..01ef1c7f 100644 --- a/tests/tinytest.R +++ b/tests/tinytest.R @@ -39,11 +39,6 @@ if (tryCatch(isTRUE(AMR:::import_fn("isJob", "rstudioapi")()), error = function( .libPaths(c(Sys.getenv("R_LIBS_USER_GH_ACTIONS"), .libPaths())) if (AMR:::pkg_is_available("tinytest", also_load = TRUE)) { library(AMR) - if (identical(AMR:::import_fn("select", "dplyr"), AMR:::select)) { - message("\n\n------------------------------------\nThis test will rely on {dplyr} verbs\n------------------------------------\n\n") - } else { - message("\n\n---------------------------------------------------------------------\nThis test will rely on {poorman} verbs (installed state dplyr: ", AMR:::pkg_is_available("dplyr", also_load = FALSE), ")\n---------------------------------------------------------------------\n\n") - } # set language set_AMR_locale("English") # set some functions if on old R