From 822e9de82c4c02fee3128d29b839c27d1686f6ba Mon Sep 17 00:00:00 2001 From: "Matthijs S. Berends" Date: Wed, 8 Feb 2023 13:48:06 +0100 Subject: [PATCH] pm update, unit test fix? --- DESCRIPTION | 4 +- NEWS.md | 2 +- R/aa_helper_pm_functions.R | 1741 ++++++++++++++++++---------- R/aaa_helper_functions.R | 68 +- R/ab_selectors.R | 1 - R/bug_drug_combinations.R | 5 +- R/eucast_rules.R | 2 +- data-raw/antibiograms.Rmd | 62 + data-raw/antibiograms.html | 848 ++++++++++++++ data-raw/poorman_prepend.R | 4 +- data-raw/reproduction_of_poorman.R | 94 +- inst/tinytest/test-zzz.R | 1 - tests/tinytest.R | 6 +- 13 files changed, 2118 insertions(+), 720 deletions(-) mode change 100755 => 100644 R/aa_helper_pm_functions.R create mode 100644 data-raw/antibiograms.Rmd create mode 100644 data-raw/antibiograms.html diff --git a/DESCRIPTION b/DESCRIPTION index a668bfb7..a7ca8cb0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 1.8.2.9109 -Date: 2023-02-06 +Version: 1.8.2.9110 +Date: 2023-02-08 Title: Antimicrobial Resistance Data Analysis Description: Functions to simplify and standardise antimicrobial resistance (AMR) data analysis and to work with microbial and antimicrobial properties by diff --git a/NEWS.md b/NEWS.md index 854c0a55..b4b4464a 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# AMR 1.8.2.9109 +# AMR 1.8.2.9110 *(this beta version will eventually become v2.0! We're happy to reach a new major milestone soon!)* diff --git a/R/aa_helper_pm_functions.R b/R/aa_helper_pm_functions.R old mode 100755 new mode 100644 index 3f9d6d1d..67b27e4b --- a/R/aa_helper_pm_functions.R +++ b/R/aa_helper_pm_functions.R @@ -32,11 +32,11 @@ # Source file: data-raw/reproduction_of_poorman.R # ------------------------------------------------ -# poorman: a package to replace all dplyr functions with base R so we can lose dependency on dplyr. +# {poorman}: a package to replace all dplyr functions with base R so we can lose dependency on {dplyr}. # These functions were downloaded from https://github.com/nathaneastwood/poorman, -# from this commit: https://github.com/nathaneastwood/poorman/tree/52eb6947e0b4430cd588976ed8820013eddf955f. +# from this commit: https://github.com/nathaneastwood/poorman/tree/3cc0a9920b1eb559dd166f548561244189586b3a. # -# All functions are prefixed with 'pm_' to make it obvious that they are dplyr substitutes. +# All functions are prefixed with 'pm_' to make it obvious that they are {dplyr} substitutes. # # All code below was released under MIT license, that permits 'free of charge, to any person obtaining a # copy of the software and associated documentation files (the "Software"), to deal in the Software @@ -44,183 +44,368 @@ # distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software # is furnished to do so', given that a copyright notice is given in the software. # -# Copyright notice on 19 September 2020, the day this code was downloaded, as found on -# https://github.com/nathaneastwood/poorman/blob/52eb6947e0b4430cd588976ed8820013eddf955f/LICENSE: +# Copyright notice on 8 February 2023, the day this code was downloaded, as found on +# https://github.com/nathaneastwood/poorman/blob/3cc0a9920b1eb559dd166f548561244189586b3a/LICENSE: # YEAR: 2020 # COPYRIGHT HOLDER: Nathan Eastwood -pm_arrange <- function(.data, ...) { - pm_check_is_dataframe(.data) - if ("grouped_data" %in% class(.data)) { - pm_arrange.grouped_data(.data, ...) +pm_across <- function(.cols = everything(), .fns = NULL, ..., .names = NULL) { + setup <- pm_setup_across(substitute(.cols), .fns, .names) + if (length(setup$names) == 1 && grepl("\\{\\.col\\}|\\{\\.fn\\}", setup$names)) { + ref <- setup$names + id <- 1 + fn_names <- unique(names(setup$funs)) + for (i in seq_along(setup$cols)) { + .col <- setup$cols[i] + for (j in seq_along(fn_names)) { + .fn <- fn_names[j] + setup$names[id] <- gluestick(ref) + id <- id + 1 + } + } + } + cols <- setup$cols + n_cols <- length(cols) + if (n_cols == 0L) return(data.frame()) + funs <- setup$funs + data <- pm_context$get_columns(cols) + names <- setup$names + if (is.null(funs)) { + data <- data.frame(data) + if (is.null(names)) { + return(data) + } else { + return(setNames(data, names)) + } + } + n_fns <- length(funs) + res <- vector(mode = "list", length = n_fns * n_cols) + k <- 1L + for (i in seq_len(n_cols)) { + pm_context$cur_column <- cols[[i]] + col <- data[[i]] + for (j in seq_len(n_fns)) { + res[[k]] <- funs[[j]](col, ...) + k <- k + 1L + } + } + if (is.null(names(res))) names(res) <- names + as.data.frame(res) +} +pm_if_any <- function(.cols, .fns = NULL, ..., .names = NULL) { + df <- do.call(across, list(.cols = substitute(.cols), .fns = .fns, ..., .names = .names)) + if (nrow(df) == 0L) return(FALSE) + check_if_types(df) + Reduce(`|`, df) +} +pm_if_all <- function(.cols, .fns = NULL, ..., .names = NULL) { + df <- do.call(across, list(.cols = substitute(.cols), .fns = .fns, ..., .names = .names)) + if (nrow(df) == 0L) return(FALSE) + check_if_types(df) + Reduce(`&`, df) +} +pm_check_if_types <- function(df) { + types <- vapply(df, class, NA_character_) + not_logical <- types != "logical" + if (any(not_logical)) { + stop( + "Cannot convert the following columns to :\n ", + paste0(colnames(df)[not_logical], " <", types, "> ", collapse = "\n ") + ) + } +} +pm_setup_across <- function(.cols, .fns, .names) { + cols <- pm_eval_select_pos(.data = pm_context$.data, .cols, .pm_group_pos = FALSE) + cols <- pm_context$get_colnames()[cols] + if (pm_context$is_grouped()) cols <- setdiff(cols, pm_group_vars(pm_context$.data)) + funs <- if (is.null(.fns)) NULL else if (!is.list(.fns)) list(.fns) else .fns + if (is.null(funs)) return(list(cols = cols, funs = funs, names = .names)) + f_nms <- names(funs) + if (is.null(f_nms) && !is.null(.fns)) names(funs) <- seq_along(funs) + if (any(nchar(f_nms) == 0L)) { + miss <- which(nchar(f_nms) == 0L) + names(funs)[miss] <- miss + f_nms <- names(funs) + } + funs <- lapply(funs, as_function) + names <- if (!is.null(.names)) { + .names } else { - pm_arrange.default(.data, ...) + if (length(funs) == 1L && is.null(f_nms)) { + cols + } else { + nms <- do.call(paste, c(rev(expand.grid(names(funs), cols)), sep = "_")) + if (length(nms) == 0L) nms <- NULL + nms + } } + list(cols = cols, funs = funs, names = names) } - -pm_arrange.default <- function(.data, ...) { - pm_context$setup(.data) - on.exit(pm_context$clean(), add = TRUE) - rows <- eval(substitute(order(...)), envir = pm_context$.data) - .data[rows, , drop = FALSE] +pm_arrange <- function(.data, ...) { + pm_arrange.data.frame(.data, ...) } - -pm_arrange.grouped_data <- function(.data, ...) { - pm_apply_grouped_function("pm_arrange", .data, drop = TRUE, ...) -} -pm_between <- function(x, left, right) { - if (!is.null(attr(x, "class")) && !inherits(x, c("Date", "POSIXct"))) { - warning("`pm_between()` called on numeric vector with S3 class") +pm_arrange.data.frame <- function(.data, ..., .by_group = FALSE) { + dots <- pm_dotdotdot(...) + is_grouped <- pm_has_groups(.data) + if (isTRUE(.by_group)) dots <- c(groups(.data), dots) + rows <- pm_arrange_rows(.data = .data, dots) + row_number <- attr(.data, "row.names") + out <- .data[rows, , drop = FALSE] + if (is.numeric(row_number)) { + row.names(out) <- row_number + } + if (is_grouped) { + attr(out, "groups") <- pm_calculate_groups(out, pm_group_vars(out)) + } + out +} +pm_arrange_rows <- function(.data, dots) { + if (length(dots) == 0L) return(seq_len(nrow(.data))) + for (i in seq_along(dots)) { + tmp <- deparse(dots[[i]]) + if (startsWith(tmp, "desc(")) { + tmp <- gsub("^desc\\(", "-", tmp) + tmp <- gsub("\\)$", "", tmp) + } + dots[[i]] <- parse(text = tmp, keep.source = FALSE)[[1]] + } + used <- unname(do.call(c, lapply(dots, pm_find_used))) + used <- used[used %in% colnames(.data)] + for (i in seq_along(dots)) { + if (is.character(.data[[used[[i]]]])) { + .data[[used[[i]]]] <- factor(.data[[used[[i]]]]) + } + if (is.factor(.data[[used[[i]]]]) && + (startsWith(deparse(dots[[i]]), "desc(") || + startsWith(deparse(dots[[i]]), "-"))) { + dots[[i]] <- bquote(-xtfrm(.(as.name(used[[i]])))) + } + } + data <- do.call(pm_transmute, c(list(.data = pm_ungroup(.data)), dots)) + do.call(order, c(data, list(decreasing = FALSE, na.last = TRUE))) +} +pm_bind_cols <- function(...) { + lsts <- list(...) + lsts <- squash(lsts) + lsts <- Filter(Negate(is.null), lsts) + if (length(lsts) == 0L) return(data.frame()) + lapply(lsts, function(x) is_df_or_vector(x)) + lsts <- do.call(cbind, lsts) + if (!is.data.frame(lsts)) lsts <- as.data.frame(lsts) + lsts +} +pm_bind_rows <- function(..., .id = NULL) { + lsts <- list(...) + lsts <- flatten(lsts) + lsts <- Filter(Negate(is.null), lsts) + lapply(lsts, function(x) is_df_or_vector(x)) + lapply(lsts, function(x) if (is.atomic(x) && !is_named(x)) stop("Vectors must be named.")) + if (!missing(.id)) { + lsts <- lapply(seq_along(lsts), function(i) { + nms <- names(lsts) + id_df <- data.frame(id = if (is.null(nms)) as.character(i) else nms[i], stringsAsFactors = FALSE) + colnames(id_df) <- .id + cbind(id_df, lsts[[i]]) + }) + } + nms <- unique(unlist(lapply(lsts, names))) + lsts <- lapply( + lsts, + function(x) { + if (!is.data.frame(x)) x <- data.frame(as.list(x), stringsAsFactors = FALSE) + for (i in nms[!nms %in% names(x)]) x[[i]] <- NA + x + } + ) + names(lsts) <- NULL + do.call(rbind, lsts) +} +pm_case_when <- function(...) { + fs <- list(...) + lapply(fs, function(x) if (!inherits(x, "formula")) stop("`case_when()` requires formula inputs.")) + n <- length(fs) + if (n == 0L) stop("No cases provided.") + query <- vector("list", n) + value <- vector("list", n) + default_env <- parent.frame() + for (i in seq_len(n)) { + query[[i]] <- eval(fs[[i]][[2]], envir = default_env) + value[[i]] <- eval(fs[[i]][[3]], envir = default_env) + if (!is.logical(query[[i]])) stop(fs[[i]][[2]], " does not return a `logical` vector.") + } + m <- validate_case_when_length(query, value, fs) + out <- value[[1]][rep(NA_integer_, m)] + replaced <- rep(FALSE, m) + for (i in seq_len(n)) { + out <- replace_with(out, query[[i]] & !replaced, value[[i]], NULL) + replaced <- replaced | (query[[i]] & !is.na(query[[i]])) + } + out +} +pm_validate_case_when_length <- function(query, value, fs) { + lhs_lengths <- lengths(query) + rhs_lengths <- lengths(value) + all_lengths <- unique(c(lhs_lengths, rhs_lengths)) + if (length(all_lengths) <= 1L) return(all_lengths[[1L]]) + non_atomic_lengths <- all_lengths[all_lengths != 1L] + len <- non_atomic_lengths[[1L]] + if (length(non_atomic_lengths) == 1L) return(len) + inconsistent_lengths <- non_atomic_lengths[-1L] + lhs_problems <- lhs_lengths %in% inconsistent_lengths + rhs_problems <- rhs_lengths %in% inconsistent_lengths + problems <- lhs_problems | rhs_problems + if (any(problems)) { + stop( + "The following formulas must be length ", len, " or 1, not ", + paste(inconsistent_lengths, collapse = ", "), ".\n ", + paste(fs[problems], collapse = "\n ") + ) } - if (!is.double(x)) x <- as.numeric(x) - x >= as.numeric(left) & x <= as.numeric(right) } pm_context <- new.env() - -# Data pm_context$setup <- function(.data) pm_context$.data <- .data pm_context$get_data <- function() pm_context$.data +pm_context$get_columns <- function(cols) pm_context$.data[, cols, drop = FALSE] +pm_context$cur_column <- NULL pm_context$get_nrow <- function() nrow(pm_context$.data) pm_context$get_colnames <- function() colnames(pm_context$.data) -pm_context$clean <- function() rm(list = c(".data"), envir = pm_context) - - +pm_context$is_grouped <- function() pm_has_groups(pm_context$.data) +pm_context$as_env <- function() { + if (any(pm_is_nested(pm_context$.data))) { + lapply(as.list(pm_context$.data), function(x) if (is.data.frame(x[[1]])) x[[1]] else x) + } else { + pm_context$.data + } +} +pm_context$pm_group_env <- NULL +pm_context$clean <- function() { + rm(list = c(".data"), envir = pm_context) + if (!is.null(pm_context$cur_column)) rm(list = c("cur_column"), envir = pm_context) +} pm_n <- function() { - pm_check_group_pm_context("`pm_n()`") + check_pm_context("`n()`", pm_context$.data) pm_context$get_nrow() } - pm_cur_data <- function() { - pm_check_group_pm_context("`pm_cur_data()`") + check_pm_context("`cur_data()`", pm_context$.data) data <- pm_context$get_data() - data[, !(colnames(data) %in% pm_get_groups(data)), drop = FALSE] + data[, !(colnames(data) %in% pm_group_vars(data)), drop = FALSE] +} +pm_cur_data_all <- function() { + check_pm_context("`cur_data_all()`", pm_context$.data) + pm_ungroup(pm_context$get_data()) } - pm_cur_group <- function() { - pm_check_group_pm_context("`pm_cur_group()`") + check_pm_context("`cur_group()`", pm_context$.data) data <- pm_context$get_data() - res <- data[1L, pm_get_groups(data), drop = FALSE] + res <- data[1L, pm_group_vars(data), drop = FALSE] rownames(res) <- NULL res } - -pm_cur_group_id <- function() { - pm_check_group_pm_context("`pm_cur_group_id()`") +pm_cur_pm_group_id <- function() { + check_pm_context("`cur_pm_group_id()`", pm_context$.data) data <- pm_context$get_data() - res <- data[1L, pm_get_groups(data), drop = FALSE] - details <- pm_get_group_details(data) - details[, ".group_id"] <- seq_len(nrow(details)) - res <- suppressMessages(pm_semi_join(details, res)) - list(res[, ".group_id"]) + res <- data[1L, pm_group_vars(data), drop = FALSE] + details <- get_pm_group_details(data) + details[, ".pm_group_id"] <- seq_len(nrow(details)) + res <- suppressMessages(semi_join(details, res)) + res[, ".pm_group_id"] } - -pm_cur_group_rows <- function() { - pm_check_group_pm_context("`pm_cur_group_rows()`") +pm_cur_pm_group_rows <- function() { + check_pm_context("`cur_pm_group_rows()`", pm_context$.data) data <- pm_context$get_data() - res <- data[1L, pm_get_groups(data), drop = FALSE] - res <- suppressMessages(pm_semi_join(pm_get_group_details(data), res)) + res <- data[1L, pm_group_vars(data), drop = FALSE] + res <- suppressMessages(semi_join(get_pm_group_details(data), res)) unlist(res[, ".rows"]) } - -pm_check_group_pm_context <- function(fn) { - if (is.null(pm_context$.data)) { - stop(fn, " must only be used inside poorman verbs") +pm_cur_column <- function() { + check_pm_context("`cur_column()`", pm_context$cur_column, "`across`") + pm_context$cur_column +} +pm_check_pm_context <- function(fn, pm_context, name = NULL) { + if (is.null(pm_context)) { + stop(fn, " must only be used inside ", if (is.null(name)) "poorman verbs" else name) } } pm_count <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) { - pm_groups <- pm_get_groups(x) + groups <- pm_group_vars(x) if (!missing(...)) x <- pm_group_by(x, ..., .add = TRUE) wt <- pm_deparse_var(wt) - res <- do.call(pm_tally, list(x, wt, sort, name)) - if (length(pm_groups) > 0L) res <- do.call(pm_group_by, list(res, as.name(pm_groups))) + res <- do.call(tally, list(x, wt, sort, name)) + if (length(groups) > 0L) res <- do.call(pm_group_by, list(res, as.name(groups))) res } - pm_tally <- function(x, wt = NULL, sort = FALSE, name = NULL) { - name <- pm_check_name(x, name) + name <- check_name(x, name) wt <- pm_deparse_var(wt) - res <- do.call(pm_summarise, pm_set_names(list(x, pm_tally_n(x, wt)), c(".data", name))) + res <- do.call(pm_summarise, setNames(list(x, tally_n(x, wt)), c(".data", name))) res <- pm_ungroup(res) - if (isTRUE(sort)) res <- do.call(pm_arrange, list(res, call("pm_desc", as.name(name)))) + if (isTRUE(sort)) res <- do.call(pm_arrange, list(res, call("desc", as.name(name)))) rownames(res) <- NULL res } - pm_add_count <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) { - name <- pm_check_name(x, name) + name <- check_name(x, name) row_names <- rownames(x) wt <- pm_deparse_var(wt) if (!missing(...)) x <- pm_group_by(x, ..., .add = TRUE) - res <- do.call(pm_add_tally, list(x, wt, sort, name)) + res <- do.call(add_tally, list(x, wt, sort, name)) res[row_names, ] } - pm_add_tally <- function(x, wt = NULL, sort = FALSE, name = NULL) { wt <- pm_deparse_var(wt) - pm_n <- pm_tally_n(x, wt) - name <- pm_check_name(x, name) - res <- do.call(pm_mutate, pm_set_names(list(x, pm_n), c(".data", name))) - + n <- tally_n(x, wt) + name <- check_name(x, name) + res <- do.call(pm_mutate, setNames(list(x, n), c(".data", name))) if (isTRUE(sort)) { - do.call(pm_arrange, list(res, call("pm_desc", as.name(name)))) + do.call(pm_arrange, list(res, call("desc", as.name(name)))) } else { res } } - pm_tally_n <- function(x, wt) { - if (is.null(wt) && "pm_n" %in% colnames(x)) { - message("Using `pm_n` as weighting variable") - wt <- "pm_n" + if (is.null(wt) && "n" %in% colnames(x)) { + message("Using `n` as weighting variable") + wt <- "n" } pm_context$setup(.data = x) on.exit(pm_context$clean(), add = TRUE) if (is.null(wt)) { - call("pm_n") + call("n") } else { call("sum", as.name(wt), na.rm = TRUE) } } - pm_check_name <- function(df, name) { if (is.null(name)) { - if ("pm_n" %in% colnames(df)) { + if ("n" %in% colnames(df)) { stop( - "Column 'pm_n' is already present in output\n", + "Column 'n' is already present in output\n", "* Use `name = \"new_name\"` to pick a new name" ) } - return("pm_n") + return("n") } - if (!is.character(name) || length(name) != 1) { stop("`name` must be a single string") } - name } pm_desc <- function(x) -xtfrm(x) -pm_distinct <- function(.data, ...) { - pm_check_is_dataframe(.data) - if ("grouped_data" %in% class(.data)) { - pm_distinct.grouped_data(.data, ...) - } else { - pm_distinct.default(.data, ...) - } +pm_distinct <- function(.data, ..., .keep_all = FALSE) { + if ("grouped_df" %in% class(.data)) pm_distinct.grouped_df(.data, ..., .keep_all = FALSE) else pm_distinct.data.frame(.data, ..., .keep_all = FALSE) } - -pm_distinct.default <- function(.data, ..., .keep_all = FALSE) { - if (ncol(.data) == 0L) { - return(.data[1, ]) - } - cols <- pm_deparse_dots(...) +pm_distinct.data.frame <- function(.data, ..., .keep_all = FALSE) { + if (ncol(.data) == 0L) return(.data[1, ]) + cols <- pm_dotdotdot(...) col_names <- names(cols) col_len <- length(cols) if (is.null(col_names) && col_len > 0L) names(cols) <- cols if (col_len == 0L) { res <- .data } else { - res <- pm_mutate(.data, ...) + mut <- pm_mutate_df(.data, ...) + res <- mut$data col_names <- names(cols) res <- if (!is.null(col_names)) { zero_names <- nchar(col_names) == 0L @@ -228,9 +413,9 @@ pm_distinct.default <- function(.data, ..., .keep_all = FALSE) { names(cols)[zero_names] <- cols[zero_names] col_names <- names(cols) } - suppressMessages(pm_select(res, col_names)) + suppressMessages(select(res, col_names)) } else { - suppressMessages(pm_select(res, cols)) + suppressMessages(select(res, cols)) } } res <- unique(res) @@ -238,26 +423,43 @@ pm_distinct.default <- function(.data, ..., .keep_all = FALSE) { res <- cbind(res, .data[rownames(res), setdiff(colnames(.data), colnames(res)), drop = FALSE]) } common_cols <- c(intersect(colnames(.data), colnames(res)), setdiff(col_names, colnames(.data))) + if (is.numeric(attr(res, "row.names"))) { + row.names(res) <- seq_len(nrow(res)) + } if (length(common_cols) > 0L) res[, common_cols, drop = FALSE] else res } - -pm_distinct.grouped_data <- function(.data, ..., .keep_all = FALSE) { +pm_distinct.grouped_df <- function(.data, ..., .keep_all = FALSE) { pm_apply_grouped_function("pm_distinct", .data, drop = TRUE, ..., .keep_all = .keep_all) } -pm_eval_env <- new.env() -pm_filter <- function(.data, ...) { - pm_check_is_dataframe(.data) - if ("grouped_data" %in% class(.data)) { - pm_filter.grouped_data(.data, ...) - } else { - pm_filter.default(.data, ...) +pm_dotdotdot <- function(..., .impute_names = FALSE) { + dots <- eval(substitute(alist(...))) + if (isTRUE(.impute_names)) { + pm_deparse_dots <- lapply(dots, deparse) + names_dots <- names(dots) + unnamed <- if (is.null(names_dots)) rep(TRUE, length(dots)) else nchar(names_dots) == 0L + names(dots)[unnamed] <- pm_deparse_dots[unnamed] } + dots } - -pm_filter.default <- function(.data, ...) { +pm_deparse_dots <- function(...) { + vapply(substitute(...()), deparse, NA_character_) +} +pm_deparse_var <- function(var, frame = if (is.null(pm_eval_env$env)) parent.frame() else pm_eval_env$env) { + sub_var <- eval(substitute(substitute(var)), frame) + if (is.symbol(sub_var)) var <- as.character(sub_var) + var +} +pm_eval_env <- new.env() +pm_filter <- function(.data, ..., .preserve = FALSE) { + if ("grouped_df" %in% class(.data)) pm_filter.grouped_df(.data, ..., .preserve = FALSE) else pm_filter.data.frame(.data, ..., .preserve = FALSE) +} +pm_filter.data.frame <- function(.data, ..., .preserve = FALSE) { conditions <- pm_dotdotdot(...) + if (length(conditions) == 0L) return(.data) + check_filter(conditions) cond_class <- vapply(conditions, typeof, NA_character_) - if (any(cond_class != "language")) stop("Conditions must be logical vectors") + cond_class <- cond_class[!cond_class %in% c("language", "logical")] + if (length(cond_class) > 0L) stop("Conditions must be logical vectors") pm_context$setup(.data) on.exit(pm_context$clean(), add = TRUE) pm_eval_env$env <- parent.frame() @@ -270,176 +472,216 @@ pm_filter.default <- function(.data, ...) { rows <- Reduce("&", rows) .data[rows & !is.na(rows), ] } - -pm_filter.grouped_data <- function(.data, ...) { +pm_filter.grouped_df <- function(.data, ..., .preserve = FALSE) { rows <- rownames(.data) res <- pm_apply_grouped_function("pm_filter", .data, drop = TRUE, ...) - res[rows[rows %in% rownames(res)], ] -} -pm_group_by <- function(.data, ..., .add = FALSE) { - pm_check_is_dataframe(.data) - pre_groups <- pm_get_groups(.data) - pm_groups <- pm_deparse_dots(...) - if (isTRUE(.add)) pm_groups <- unique(c(pre_groups, pm_groups)) - unknown <- !(pm_groups %in% colnames(.data)) - if (any(unknown)) stop("Invalid pm_groups: ", pm_groups[unknown]) - class(.data) <- c("grouped_data", class(.data)) - pm_set_groups(.data, pm_groups) -} - -pm_ungroup <- function(x, ...) { - pm_check_is_dataframe(x) - rm_groups <- pm_deparse_dots(...) - pm_groups <- pm_get_groups(x) - if (length(rm_groups) == 0L) rm_groups <- pm_groups - x <- pm_set_groups(x, pm_groups[!(pm_groups %in% rm_groups)]) - if (length(attr(x, "pm_groups")) == 0L) { - attr(x, "pm_groups") <- NULL - class(x) <- class(x)[!(class(x) %in% "grouped_data")] + res <- res[rows[rows %in% rownames(res)], ] + groups <- pm_group_vars(.data) + pre_filtered_groups <- pm_group_data(.data) + post_filtered_groups <- pm_calculate_groups(res, groups) + if (!(!.preserve && isTRUE(attr(pre_filtered_groups, ".drop")))) { + filtered_groups <- anti_join(pre_filtered_groups, post_filtered_groups, by = groups) + filtered_groups <- filtered_groups[, groups, drop = FALSE] + filtered_groups[[".rows"]] <- rep(list(integer()), length.out = nrow(filtered_groups)) + post_filtered_groups <- bind_rows(post_filtered_groups, filtered_groups) + ordered <- do.call(pm_arrange_rows, list(post_filtered_groups, pm_as_symbols(groups))) + post_filtered_groups <- post_filtered_groups[ordered, ] } - x + attr(res, "groups") <- post_filtered_groups + res } - -pm_set_groups <- function(x, pm_groups) { - attr(x, "pm_groups") <- if (is.null(pm_groups) || length(pm_groups) == 0L) { - NULL - } else { - pm_group_data_worker(x, pm_groups) +pm_check_filter <- function(conditions) { + named <- have_name(conditions) + for (i in which(named)) { + if (!is.logical(conditions[[i]])) { + stop( + sprintf("Problem with `pm_filter()` input `..%s`.\n", i), + sprintf("Input `..%s` is named.\n", i), + "This usually means that you've used `=` instead of `==`.\n", + sprintf("Did you mean `%s == %s`?", names(conditions)[[i]], conditions[[i]]) + ) + } } - x } - -pm_get_groups <- function(x) { - pm_groups <- attr(x, "pm_groups", exact = TRUE) - if (is.null(pm_groups)) character(0) else colnames(pm_groups)[!colnames(pm_groups) %in% c(".group_id", ".rows")] +pm_group_by <- function(.data, ..., .add = FALSE, .drop = pm_group_by_drop_default(.data)) { + pm_group_by.data.frame(.data, ..., .add = FALSE, .drop = pm_group_by_drop_default(.data)) } - -pm_get_group_details <- function(x) { - pm_groups <- attr(x, "pm_groups", exact = TRUE) - if (is.null(pm_groups)) character(0) else pm_groups -} - -pm_has_groups <- function(x) { - pm_groups <- pm_get_groups(x) - if (length(pm_groups) == 0L) FALSE else TRUE -} - -pm_apply_grouped_function <- function(fn, .data, drop = FALSE, ...) { - pm_groups <- pm_get_groups(.data) - grouped <- pm_split_into_groups(.data, pm_groups, drop) - res <- do.call(rbind, unname(lapply(grouped, fn, ...))) - if (any(pm_groups %in% colnames(res))) { - class(res) <- c("grouped_data", class(res)) - res <- pm_set_groups(res, pm_groups[pm_groups %in% colnames(res)]) +pm_group_by.data.frame <- function(.data, ..., .add = FALSE, .drop = pm_group_by_drop_default(.data)) { + vars <- pm_dotdotdot(..., .impute_names = TRUE) + if (all(vapply(vars, is.null, FALSE))) { + res <- pm_groups_set(.data, NULL) + class(res) <- class(res)[!(class(res) %in% "grouped_df")] + return(res) + } + new_cols <- pm_add_group_columns(.data, vars) + res <- new_cols$data + groups <- new_cols$groups + if (isTRUE(.add)) groups <- union(pm_group_vars(.data), groups) + unknown <- !(groups %in% colnames(res)) + if (any(unknown)) stop("Invalid groups: ", groups[unknown]) + if (length(groups) > 0L) { + res <- pm_groups_set(res, groups, .drop) + class(res) <- union("grouped_df", class(res)) } res } - -pm_print.grouped_data <- function(x, ..., digits = NULL, quote = FALSE, right = TRUE, row.names = TRUE, max = NULL) { - class(x) <- "data.frame" - print(x, ..., digits = digits, quote = quote, right = right, row.names = row.names, max = max) - cat("\nGroups: ", paste(pm_get_groups(x), collapse = ", "), "\n\n") +pm_group_by_drop_default <- function(.tbl) { + if ("grouped_df" %in% class(.tbl)) pm_group_by_drop_default.grouped_df(.tbl) else pm_group_by_drop_default.data.frame(.tbl) +} +pm_group_by_drop_default.data.frame <- function(.tbl) { + TRUE +} +pm_group_by_drop_default.grouped_df <- function(.tbl) { + tryCatch({ + !identical(attr(pm_group_data(.tbl), ".drop"), FALSE) + }, error = function(e) { + TRUE + }) +} +pm_add_group_columns <- function(.data, vars) { + vars <- vars[!vapply(vars, is.null, FALSE)] + types <- do.call(c, lapply(vars, typeof)) + test <- any(types == "language") + needs_mutate <- if (test) unname(which(types == "language")) else NULL + if (!is.null(needs_mutate)) { + .data <- do.call(pm_mutate, c(list(.data = pm_ungroup(.data)), vars[needs_mutate])) + } + list(data = .data, groups = names(vars)) } - pm_group_data <- function(.data) { - if (!pm_has_groups(.data)) { - return(data.frame(.rows = I(list(seq_len(nrow(.data)))))) - } - pm_groups <- pm_get_groups(.data) - pm_group_data_worker(.data, pm_groups) + if ("grouped_df" %in% class(.data)) pm_group_data.grouped_df(.data) else pm_group_data.data.frame(.data) } - -pm_group_data_worker <- function(.data, pm_groups) { - res <- unique(.data[, pm_groups, drop = FALSE]) - class(res) <- "data.frame" - nrow_res <- nrow(res) - rows <- rep(list(NA), nrow_res) - for (i in seq_len(nrow_res)) { - rows[[i]] <- which(interaction(.data[, pm_groups]) %in% interaction(res[i, pm_groups])) - } - res$`.rows` <- rows - res <- res[do.call(order, lapply(pm_groups, function(x) res[, x])), , drop = FALSE] - rownames(res) <- NULL - res +pm_group_data.data.frame <- function(.data) { + structure(list(.rows = list(seq_len(nrow(.data)))), class = "data.frame", row.names = c(NA, -1L)) +} +pm_group_data.grouped_df <- function(.data) { + attr(.data, "groups") } - pm_group_rows <- function(.data) { pm_group_data(.data)[[".rows"]] } - pm_group_indices <- function(.data) { - if (!pm_has_groups(.data)) { - return(rep(1L, nrow(.data))) - } - pm_groups <- pm_get_groups(.data) - res <- unique(.data[, pm_groups, drop = FALSE]) - res <- res[do.call(order, lapply(pm_groups, function(x) res[, x])), , drop = FALSE] + if (!pm_has_groups(.data)) return(rep(1L, nrow(.data))) + groups <- pm_group_vars(.data) + res <- unique(.data[, groups, drop = FALSE]) + res <- res[do.call(order, lapply(groups, function(x) res[, x])), , drop = FALSE] class(res) <- "data.frame" nrow_data <- nrow(.data) rows <- rep(NA, nrow_data) for (i in seq_len(nrow_data)) { - rows[i] <- which(interaction(res[, pm_groups]) %in% interaction(.data[i, pm_groups])) + rows[i] <- which(interaction(res[, groups]) %in% interaction(.data[i, groups])) } rows } - pm_group_vars <- function(x) { - pm_get_groups(x) + groups <- attr(x, "groups", exact = TRUE) + if (is.null(groups)) character(0) else colnames(groups)[!colnames(groups) %in% c(".pm_group_id", ".rows")] } - pm_groups <- function(x) { - lapply(pm_get_groups(x), as.symbol) + pm_as_symbols(pm_group_vars(x)) } - pm_group_size <- function(x) { lengths(pm_group_rows(x)) } - pm_n_groups <- function(x) { nrow(pm_group_data(x)) } -# pm_group_split <- function(.data, ..., .keep = TRUE) { -# dots_len <- ...length() > 0L -# if (pm_has_groups(.data) && isTRUE(dots_len)) { -# warning("... is ignored in pm_group_split(), please use pm_group_by(..., .add = TRUE) %pm>% pm_group_split()") -# } -# if (!pm_has_groups(.data) && isTRUE(dots_len)) { -# .data <- pm_group_by(.data, ...) -# } -# if (!pm_has_groups(.data) && isFALSE(dots_len)) { -# return(list(.data)) -# } -# pm_context$setup(.data) -# on.exit(pm_context$clean(), add = TRUE) -# pm_groups <- pm_get_groups(.data) -# attr(pm_context$.data, "pm_groups") <- NULL -# res <- pm_split_into_groups(pm_context$.data, pm_groups) -# names(res) <- NULL -# if (isFALSE(.keep)) { -# res <- lapply(res, function(x) x[, !colnames(x) %in% pm_groups]) -# } -# any_empty <- unlist(lapply(res, function(x) !(nrow(x) == 0L))) -# res[any_empty] -# } - -pm_group_keys <- function(.data) { - pm_groups <- pm_get_groups(.data) - pm_context$setup(.data) - res <- pm_context$.data[, pm_context$get_colnames() %in% pm_groups, drop = FALSE] - res <- res[!duplicated(res), , drop = FALSE] - if (nrow(res) == 0L) { - return(res) +pm_group_split <- function(.data, ..., .keep = TRUE) { + dots_len <- length(pm_dotdotdot(...)) > 0L + if (pm_has_groups(.data) && isTRUE(dots_len)) { + warning("... is ignored in pm_group_split(), please use pm_group_by(..., .add = TRUE) %pm>% pm_group_split()") } + if (!pm_has_groups(.data) && isTRUE(dots_len)) { + .data <- pm_group_by(.data, ...) + } + if (!pm_has_groups(.data) && !isTRUE(dots_len)) { + return(list(.data)) + } + pm_context$setup(.data) + on.exit(pm_context$clean(), add = TRUE) + groups <- pm_group_vars(.data) + attr(pm_context$.data, "groups") <- NULL + res <- pm_split_into_groups(pm_context$.data, groups) + names(res) <- NULL + if (!isTRUE(.keep)) { + res <- lapply(res, function(x) x[, !colnames(x) %in% groups]) + } + any_empty <- unlist(lapply(res, function(x) !(nrow(x) == 0L))) + res[any_empty] +} +pm_group_keys <- function(.data) { + groups <- pm_group_vars(.data) + pm_context$setup(.data) + res <- pm_context$get_columns(pm_context$get_colnames() %in% groups) + res <- res[!duplicated(res), , drop = FALSE] + if (nrow(res) == 0L) return(res) class(res) <- "data.frame" - res <- res[do.call(order, lapply(pm_groups, function(x) res[, x])), , drop = FALSE] + res <- res[do.call(order, lapply(groups, function(x) res[, x])), , drop = FALSE] rownames(res) <- NULL res } - -pm_split_into_groups <- function(.data, pm_groups, drop = FALSE, ...) { +pm_split_into_groups <- function(.data, groups, drop = FALSE, ...) { class(.data) <- "data.frame" - group_factors <- lapply(pm_groups, function(x, .data) as.factor(.data[, x]), .data) - split(x = .data, f = group_factors, drop = drop, ...) + pm_group_factors <- lapply(groups, function(x, .data) as.factor(.data[, x]), .data) + split(x = .data, f = pm_group_factors, drop = drop, ...) +} +pm_groups_set <- function(x, groups, drop = pm_group_by_drop_default(x)) { + attr(x, "groups") <- if (is.null(groups) || length(groups) == 0L) { + NULL + } else { + pm_calculate_groups(x, groups, drop) + } + x +} +pm_get_pm_group_details <- function(x) { + groups <- attr(x, "groups", exact = TRUE) + if (is.null(groups)) character(0) else groups +} +pm_has_groups <- function(x) { + groups <- pm_group_vars(x) + if (length(groups) == 0L) FALSE else TRUE +} +pm_apply_grouped_function <- function(fn, .data, drop = FALSE, ...) { + groups <- pm_group_vars(.data) + grouped <- pm_split_into_groups(.data, groups, drop) + res <- do.call(rbind, unname(lapply(grouped, fn, ...))) + if (any(groups %in% colnames(res))) { + class(res) <- c("grouped_df", class(res)) + res <- pm_groups_set(res, groups[groups %in% colnames(res)]) + } + res +} +pm_calculate_groups <- function(data, groups, drop = pm_group_by_drop_default(data)) { + data <- pm_ungroup(data) + unknown <- setdiff(groups, colnames(data)) + if (length(unknown) > 0L) { + stop(sprintf("`groups` missing from `data`: %s.", paste0(groups, collapse = ", "))) + } + unique_groups <- unique(data[, groups, drop = FALSE]) + is_factor <- do.call(c, lapply(unique_groups, function(x) is.factor(x))) + n_comb <- nrow(unique_groups) + rows <- rep(list(NA), n_comb) + data_groups <- interaction(data[, groups, drop = TRUE]) + for (i in seq_len(n_comb)) { + rows[[i]] <- which(data_groups %in% interaction(unique_groups[i, groups])) + } + if (!isTRUE(drop) && any(is_factor)) { + na_lvls <- do.call( + expand.grid, + lapply(unique_groups, function(x) if (is.factor(x)) levels(x)[!(levels(x) %in% x)] else NA) + ) + unique_groups <- rbind(unique_groups, na_lvls) + for (i in seq_len(nrow(na_lvls))) { + rows[[length(rows) + 1]] <- integer(0) + } + } + unique_groups[[".rows"]] <- rows + unique_groups <- unique_groups[do.call(order, lapply(groups, function(x) unique_groups[, x])), , drop = FALSE] + rownames(unique_groups) <- NULL + structure(unique_groups, .drop = drop) +} +pm_is.grouped_df <- function(x) { + inherits(x, "grouped_df") } pm_if_else <- function(condition, true, false, missing = NULL) { if (!is.logical(condition)) stop("`condition` must be a logical vector.") @@ -457,60 +699,61 @@ pm_if_else <- function(condition, true, false, missing = NULL) { attributes(res) <- attributes(true) res } - pm_anti_join <- function(x, y, by = NULL) { pm_filter_join_worker(x, y, by, type = "anti") } - pm_semi_join <- function(x, y, by = NULL) { pm_filter_join_worker(x, y, by, type = "semi") } - pm_filter_join_worker <- function(x, y, by = NULL, type = c("anti", "semi")) { type <- match.arg(type, choices = c("anti", "semi"), several.ok = FALSE) if (is.null(by)) { by <- intersect(names(x), names(y)) - pm_join_message(by) + join_message(by) } rows <- interaction(x[, by]) %in% interaction(y[, by]) if (type == "anti") rows <- !rows res <- x[rows, , drop = FALSE] rownames(res) <- NULL - res + reconstruct_attrs(res, x) } - -pm_inner_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { - pm_join_worker(x = x, y = y, by = by, suffix = suffix, sort = FALSE) +pm_inner_join <- function(x, y, by = NULL, suffix = c(".x", ".y"), ..., na_matches = c("na", "never")) { + join_worker(x = x, y = y, by = by, suffix = suffix, sort = FALSE, ..., keep = FALSE, na_matches = na_matches) } - -# pm_left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { -# pm_join_worker(x = x, y = y, by = by, suffix = suffix, all.x = TRUE) -# } - -pm_right_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { - pm_join_worker(x = x, y = y, by = by, suffix = suffix, all.y = TRUE) +pm_left_join <- function(x, y, by = NULL, suffix = c(".x", ".y"), ..., keep = FALSE, na_matches = c("na", "never")) { + join_worker(x = x, y = y, by = by, suffix = suffix, all.x = TRUE, ..., keep = keep, na_matches = na_matches) } - -pm_full_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { - pm_join_worker(x = x, y = y, by = by, suffix = suffix, all = TRUE) +pm_right_join <- function(x, y, by = NULL, suffix = c(".x", ".y"), ..., keep = FALSE, na_matches = c("na", "never")) { + join_worker(x = x, y = y, by = by, suffix = suffix, all.y = TRUE, ..., keep = keep, na_matches = na_matches) } - -pm_join_worker <- function(x, y, by = NULL, suffix = c(".x", ".y"), ...) { +pm_full_join <- function(x, y, by = NULL, suffix = c(".x", ".y"), ..., keep = FALSE, na_matches = c("na", "never")) { + join_worker(x = x, y = y, by = by, suffix = suffix, all = TRUE, ..., keep = keep, na_matches = na_matches) +} +pm_join_worker <- function(x, y, by = NULL, suffix = c(".x", ".y"), keep = FALSE, na_matches = c("na", "never"), ...) { + na_matches <- match.arg(arg = na_matches, choices = c("na", "never"), several.ok = FALSE) + incomparables <- if (na_matches == "never") NA else NULL x[, ".join_id"] <- seq_len(nrow(x)) - if (is.null(by)) { + merged <- if (is.null(by)) { by <- intersect(names(x), names(y)) - pm_join_message(by) - merged <- merge(x = x, y = y, by = by, suffixes = suffix, ...)[, union(names(x), names(y))] + join_message(by) + merge( + x = x, y = y, by = by, suffixes = suffix, incomparables = incomparables, ... + )[, union(names(x), names(y)), drop = FALSE] } else if (is.null(names(by))) { - merged <- merge(x = x, y = y, by = by, suffixes = suffix, ...) + merge(x = x, y = y, by = by, suffixes = suffix, incomparables = incomparables, ...) } else { - merged <- merge(x = x, y = y, by.x = names(by), by.y = by, suffixes = suffix, ...) + merge(x = x, y = y, by.x = names(by), by.y = by, suffixes = suffix, incomparables = incomparables, ...) + } + merged <- merged[order(merged[, ".join_id"]), colnames(merged) != ".join_id", drop = FALSE] + if (isTRUE(keep)) { + keep_pos <- match(by, names(merged)) + x_by <- paste0(by, suffix[1L]) + colnames(merged)[keep_pos] <- x_by + merged[, paste0(by, suffix[2L])] <- merged[, x_by] } - merged <- merged[order(merged[, ".join_id"]), colnames(merged) != ".join_id"] rownames(merged) <- NULL - merged + reconstruct_attrs(merged, x) } - pm_join_message <- function(by) { if (length(by) > 1L) { message("Joining, by = c(\"", paste0(by, collapse = "\", \""), "\")\n", sep = "") @@ -518,12 +761,54 @@ pm_join_message <- function(by) { message("Joining, by = \"", by, "\"\n", sep = "") } } -pm_lag <- function(x, pm_n = 1L, default = NA) { - if (inherits(x, "ts")) stop("`x` must be a vector, not a `ts` object, do you want `stats::lag()`?") - if (length(pm_n) != 1L || !is.numeric(pm_n) || pm_n < 0L) stop("`pm_n` must be a nonnegative integer scalar") - if (pm_n == 0L) { - return(x) +pm_as_function <- function(x, env = parent.frame()) { + if (is.function(x)) return(x) + if (is_formula(x)) { + if (length(x) > 2) stop("Can't convert a two-sided formula to a function") + env <- attr(x, ".Environment", exact = TRUE) + rhs <- as.list(x)[[2]] + return(as.function(list(... = substitute(), .x = quote(..1), .y = quote(..2), . = quote(..1), rhs), envir = env)) } + if (is_string(x)) return(get(x, envir = env, mode = "function")) + stop("Can't convert an object of class ", class(x), " to a function.") +} +pm_is_formula <- function(x) { + inherits(x, "formula") +} +pm_is_string <- function(x) { + is.character(x) && length(x) == 1L +} +pm_is_wholenumber <- function(x) { + x %% 1L == 0L +} +pm_names_are_invalid <- function(x) { + x == "" | is.na(x) +} +pm_is_named <- function(x) { + nms <- names(x) + if (is.null(nms)) return(FALSE) + if (any(names_are_invalid(nms))) return(FALSE) + TRUE +} +pm_have_name <- function(x) { + nms <- names(x) + if (is.null(nms)) rep(FALSE, length(x)) else !names_are_invalid(nms) +} +pm_is_empty_list <- function(x) { + inherits(x, "list") && length(x) == 0L +} +pm_as_symbols <- function(x) { + lapply(x, as.symbol) +} +pm_is_df_or_vector <- function(x) { + res <- is.data.frame(x) || is.atomic(x) + if (!isTRUE(res)) stop("You must pass vector(s) and/or data.frame(s).") + TRUE +} +pm_lag <- function(x, n = 1L, default = NA) { + if (inherits(x, "ts")) stop("`x` must be a vector, not a `ts` object, do you want `stats::lag()`?") + if (length(n) != 1L || !is.numeric(n) || n < 0L) stop("`n` must be a nonnegative integer scalar") + if (n == 0L) return(x) tryCatch( storage.mode(default) <- typeof(x), warning = function(w) { @@ -531,17 +816,14 @@ pm_lag <- function(x, pm_n = 1L, default = NA) { } ) xlen <- length(x) - pm_n <- pmin(pm_n, xlen) - res <- c(rep(default, pm_n), x[seq_len(xlen - pm_n)]) + n <- pmin(n, xlen) + res <- c(rep(default, n), x[seq_len(xlen - n)]) attributes(res) <- attributes(x) res } - -pm_lead <- function(x, pm_n = 1L, default = NA) { - if (length(pm_n) != 1L || !is.numeric(pm_n) || pm_n < 0L) stop("pm_n must be a nonnegative integer scalar") - if (pm_n == 0L) { - return(x) - } +pm_lead <- function(x, n = 1L, default = NA) { + if (length(n) != 1L || !is.numeric(n) || n < 0L) stop("n must be a nonnegative integer scalar") + if (n == 0L) return(x) tryCatch( storage.mode(default) <- typeof(x), warning = function(w) { @@ -549,114 +831,426 @@ pm_lead <- function(x, pm_n = 1L, default = NA) { } ) xlen <- length(x) - pm_n <- pmin(pm_n, xlen) - res <- c(x[-seq_len(pm_n)], rep(default, pm_n)) + n <- pmin(n, xlen) + res <- c(x[-seq_len(n)], rep(default, n)) attributes(res) <- attributes(x) res } +pm_lst <- function(...) { + fn_call <- match.call() + list_to_eval <- as.list(fn_call)[-1] + out <- vector(mode = "list", length = length(list_to_eval)) + names(out) <- names(list_to_eval) + exprs <- lapply(substitute(list(...)), deparse)[-1] + for (element in seq_along(list_to_eval)) { + value <- list_to_eval[[element]] + if (is.language(value)) { + value <- eval( + value, + envir = if (length(out) == 0) { + list_to_eval + } else { + drop_dup_list(out[1:(element - 1)]) + } + ) + } + if (is.null(value)) { + out[element] <- list(NULL) + } else { + out[[element]] <- value + } + invalid_name <- is.null(names(out)[element]) || + is.na(names(out)[element]) || + names(out)[element] == "" + if (invalid_name) { + if (exprs[[element]] != "NULL" || (exprs[[element]] == "NULL" && is.null(out[[element]]))) { + names(out)[element] <- exprs[[element]] + } + } + } + out +} +pm_drop_dup_list <- function(x) { + list_names <- names(x) + if (identical(list_names, unique(list_names))) return(x) + count <- table(list_names) + dupes <- names(count[count > 1]) + uniques <- names(count[count == 1]) + to_drop <- do.call(c, lapply( + dupes, + function(x) { + matches <- which(list_names == x) + matches[-length(matches)] + } + )) + x[uniques] <- Filter(Negate(is.null), x[uniques]) + return(x[-to_drop]) +} pm_mutate <- function(.data, ...) { - pm_check_is_dataframe(.data) - if ("grouped_data" %in% class(.data)) { - pm_mutate.grouped_data(.data, ...) - } else { - pm_mutate.default(.data, ...) + if ("grouped_df" %in% class(.data)) pm_mutate.grouped_df(.data, ...) else pm_mutate.data.frame(.data, ...) +} +pm_mutate.data.frame <- function( + .data, + ..., + .keep = c("all", "used", "unused", "none"), + .before = NULL, + .after = NULL +) { + keep <- match.arg(arg = .keep, choices = c("all", "used", "unused", "none"), several.ok = FALSE) + res <- pm_mutate_df(.data = .data, ...) + data <- res$data + new_cols <- res$new_cols + .before <- substitute(.before) + .after <- substitute(.after) + if (!is.null(.before) || !is.null(.after)) { + new <- setdiff(new_cols, names(.data)) + data <- do.call(pm_relocate, c(list(.data = data), new, .before = .before, .after = .after)) + } + if (keep == "all") { + data + } else if (keep == "unused") { + unused <- setdiff(colnames(.data), res$used_cols) + keep <- intersect(colnames(data), c(pm_group_vars(.data), unused, new_cols)) + select(.data = data, keep) + } else if (keep == "used") { + keep <- intersect(colnames(data), c(pm_group_vars(.data), res$used_cols, new_cols)) + select(.data = data, keep) + } else if (keep == "none") { + keep <- c(setdiff(pm_group_vars(.data), new_cols), intersect(new_cols, colnames(data))) + select(.data = data, keep) } } - -pm_mutate.default <- function(.data, ...) { +pm_mutate.grouped_df <- function(.data, ...) { + pm_context$pm_group_env <- parent.frame(n = 1) + on.exit(rm(list = c("pm_group_env"), envir = pm_context), add = TRUE) + rows <- rownames(.data) + res <- pm_apply_grouped_function("pm_mutate", .data, drop = TRUE, ...) + res[rows, , drop = FALSE] +} +pm_mutate_df <- function(.data, ...) { conditions <- pm_dotdotdot(..., .impute_names = TRUE) - .data[, setdiff(names(conditions), names(.data))] <- NA + cond_nms <- names(pm_dotdotdot(..., .impute_names = FALSE)) + if (length(conditions) == 0L) { + return(list( + data = .data, + used_cols = NULL, + new_cols = NULL + )) + } + used <- unname(do.call(c, lapply(conditions, pm_find_used))) + used <- used[used %in% colnames(.data)] pm_context$setup(.data) on.exit(pm_context$clean(), add = TRUE) for (i in seq_along(conditions)) { - pm_context$.data[, names(conditions)[i]] <- eval(conditions[[i]], envir = pm_context$.data) + not_named <- (is.null(cond_nms) || cond_nms[i] == "") + res <- eval( + conditions[[i]], + envir = pm_context$as_env(), + enclos = if (!is.null(pm_context$pm_group_env)) pm_context$pm_group_env else parent.frame(n = 2) + ) + res_nms <- names(res) + if (is.data.frame(res)) { + if (not_named) { + pm_context$.data[, res_nms] <- res + } else { + pm_context$.data[[cond_nms[i]]] <- res + } + } else if (is.atomic(res)) { + cond_nms[i] <- names(conditions)[[i]] + pm_context$.data[[cond_nms[i]]] <- res + } else { + if (is.null(res_nms)) names(res) <- names(conditions)[[i]] + pm_context$.data[[names(res)]] <- res + } } - pm_context$.data + list( + data = pm_context$.data, + used_cols = used, + new_cols = cond_nms + ) } - -pm_mutate.grouped_data <- function(.data, ...) { - rows <- rownames(.data) - res <- pm_apply_grouped_function("pm_mutate", .data, drop = TRUE, ...) - res[rows, ] +pm_find_used <- function(expr) { + if (is.symbol(expr)) { + as.character(expr) + } else { + unique(unlist(lapply(expr[-1], pm_find_used))) + } } pm_n_distinct <- function(..., na.rm = FALSE) { - res <- c(...) - if (is.list(res)) { - return(nrow(unique(as.data.frame(res, stringsAsFactors = FALSE)))) - } - if (isTRUE(na.rm)) res <- res[!is.na(res)] - length(unique(res)) + res <- do.call(cbind, list(...)) + if (isTRUE(na.rm)) res <- res[!is.na(res), , drop = FALSE] + nrow(unique(res)) } -pm_na_if <- function(x, y) { - y_len <- length(y) - x_len <- length(x) - if (!(y_len %in% c(1L, x_len))) stop("`y` must be length ", x_len, " (same as `x`) or 1, not ", y_len) - x[x == y] <- NA - x +pm_nth <- function(x, n, order_by = NULL, default = pm_default_missing(x)) { + if (length(n) != 1 || !is.numeric(n)) stop("`n` must be a single integer.") + n <- trunc(n) + if (n == 0 || n > length(x) || n < -length(x)) return(default) + if (n < 0) n <- length(x) + n + 1 + if (is.null(order_by)) x[[n]] else x[[order(order_by)[[n]]]] } -pm_near <- function(x, y, tol = .Machine$double.eps^0.5) { - abs(x - y) < tol +pm_first <- function(x, order_by = NULL, default = pm_default_missing(x)) { + nth(x, 1L, order_by = order_by, default = default) +} +pm_last <- function(x, order_by = NULL, default = pm_default_missing(x)) { + nth(x, -1L, order_by = order_by, default = default) +} +pm_default_missing <- function(x) { + pm_default_missing.data.frame(x) +} +pm_default_missing.data.frame <- function(x) { + if (!is.object(x) && is.list(x)) NULL else x[NA_real_] +} +pm_default_missing.data.frame <- function(x) { + rep(NA, nrow(x)) } `%pm>%` <- function(lhs, rhs) { - lhs <- substitute(lhs) - rhs <- substitute(rhs) - eval(as.call(c(rhs[[1L]], lhs, as.list(rhs[-1L]))), envir = parent.frame()) + rhs_call <- pm_insert_dot(substitute(rhs)) + eval(rhs_call, envir = list(`.` = lhs), enclos = parent.frame()) +} +pm_insert_dot <- function(expr) { + if (is.symbol(expr) || expr[[1]] == quote(`(`)) { + expr <- as.call(c(expr, quote(`.`))) + } else if (length(expr) == 1) { + expr <- as.call(c(expr[[1]], quote(`.`))) + } else if ( + expr[[1]] != quote(`{`) && + !any(vapply(expr[-1], identical, quote(`.`), FUN.VALUE = logical(1))) && + !any(vapply(expr[-1], identical, quote(`!!!.`), FUN.VALUE = logical(1))) + ) { + expr <- as.call(c(expr[[1]], quote(`.`), as.list(expr[-1]))) + } + expr +} +pm_pivot_longer <- function( + data, + cols, + names_to = "name", + names_prefix = NULL, + names_sep = NULL, + names_pattern = NULL, + values_to = "value", + values_drop_na = FALSE, + ... +) { + if (missing(cols)) { + stop("`cols` must select at least one column.") + } + cols <- names(pm_eval_select_pos(data, substitute(cols))) + if (any(names_to %in% setdiff(names(data), cols))) { + stop( + paste0( + "Some values of the columns specified in 'names_to' are already present + as column names. Either use another value in `names_to` or pm_rename the + following columns: ", + paste(names_to[which(names_to %in% setdiff(names(data), cols))], sep = ", ") + ), + call. = FALSE) + } + if (length(cols) == 0L) { + stop("No columns found for reshaping data.", call. = FALSE) + } + data[["_Row"]] <- as.numeric(rownames(data)) + names_to_2 <- paste(names_to, collapse = "_") + long <- stats::reshape( + as.data.frame(data, stringsAsFactors = FALSE), + varying = cols, + idvar = "_Row", + v.names = values_to, + timevar = names_to_2, + direction = "long" + ) + long <- long[do.call(order, long[, c("_Row", names_to_2)]), ] + long[["_Row"]] <- NULL + long[[names_to_2]] <- cols[long[[names_to_2]]] + if (length(names_to) > 1) { + if (is.null(names_pattern)) { + for (i in seq_along(names_to)) { + new_vals <- unlist(lapply( + strsplit(unique(long[[names_to_2]]), names_sep, fixed = TRUE), + function(x) x[i] + )) + long[[names_to[i]]] <- new_vals + } + } else { + tmp <- regmatches( + unique(long[[names_to_2]]), + regexec(names_pattern, unique(long[[names_to_2]])) + ) + tmp <- as.data.frame(do.call(rbind, tmp), stringsAsFactors = FALSE) + names(tmp) <- c(names_to_2, names_to) + long <- cbind(long, tmp[match(long[[names_to_2]], tmp[[names_to_2]]), -1]) + } + long[[names_to_2]] <- NULL + } + long <- pm_relocate(.data = long, "value", .after = -1) + if (!is.null(names_prefix)) { + if (length(names_to) > 1) { + stop("`names_prefix` only works when `names_to` is of length 1.", call. = FALSE) + } + long[[names_to]] <- gsub(paste0("^", names_prefix), "", long[[names_to]]) + } + if (values_drop_na) { + long <- long[!is.na(long[, values_to]), ] + } + rownames(long) <- NULL + attributes(long)$reshapeLong <- NULL + long +} +pm_pivot_wider <- function( + data, + id_cols = NULL, + values_from = "Value", + names_from = "Name", + names_sep = "_", + names_prefix = "", + names_glue = NULL, + values_fill = NULL, + ... +) { + old_names <- names(data) + names_from <- names(pm_eval_select_pos(data, substitute(names_from))) + values_from <- names(pm_eval_select_pos(data, substitute(values_from))) + variable_attr <- lapply(data, attributes) + if (is.null(id_cols)) { + row_index <- do.call( + paste, + c(data[, !names(data) %in% c(values_from, names_from), drop = FALSE], sep = "_") + ) + if (length(row_index) == 0) row_index <- rep("", nrow(data)) + data[["_Rows"]] <- row_index + id_cols <- "_Rows" + } + current_colnames <- colnames(data) + current_colnames <- current_colnames[current_colnames != "_Rows"] + if (is.null(names_glue)) { + future_colnames <- unique(do.call(paste, c(data[, names_from, drop = FALSE], sep = names_sep))) + } else { + vars <- regmatches(names_glue, gregexpr("\\{\\K[^{}]+(?=\\})", names_glue, perl = TRUE))[[1]] + tmp_data <- unique(data[, vars]) + future_colnames <- unique(apply(tmp_data, 1, function(x) { + tmp_vars <- list() + for (i in seq_along(vars)) { + tmp_vars[[i]] <- x[vars[i]] + } + tmp_colname <- gsub("\\{\\K[^{}]+(?=\\})", "", names_glue, perl = TRUE) + tmp_colname <- gsub("\\{\\}", "%s", tmp_colname) + do.call(sprintf, c(fmt = tmp_colname, tmp_vars)) + })) + } + if (any(future_colnames %in% current_colnames)) { + stop( + paste0( + "Some values of the columns specified in 'names_from' are already present + as column names. Either use `name_prefix` or pm_rename the following columns: ", + paste(current_colnames[which(current_colnames %in% future_colnames)], sep = ", ") + ), + call. = FALSE + ) + } + data$new_time <- do.call(paste, c(data[, names_from, drop = FALSE], sep = "_")) + data[, names_from] <- NULL + wide <- stats::reshape( + as.data.frame(data, stringsAsFactors = FALSE), + v.names = values_from, + idvar = id_cols, + timevar = "new_time", + sep = names_sep, + direction = "wide" + ) + if ("_Rows" %in% names(wide)) wide[["_Rows"]] <- NULL + rownames(wide) <- NULL + if (length(values_from) == 1) { + to_rename <- which(startsWith(names(wide), paste0(values_from, names_sep))) + names(wide)[to_rename] <- future_colnames + } + if (length(values_from) > 1) { + for (i in values_from) { + tmp1 <- wide[, which(!startsWith(names(wide), i))] + tmp2 <- wide[, which(startsWith(names(wide), i))] + wide <- cbind(tmp1, tmp2) + } + } + new_cols <- setdiff(names(wide), old_names) + names(wide)[which(names(wide) %in% new_cols)] <- paste0(names_prefix, new_cols) + if (!is.null(values_fill)) { + if (length(values_fill) == 1) { + if (is.numeric(wide[[new_cols[1]]])) { + if (!is.numeric(values_fill)) { + stop(paste0("`values_fill` must be of type numeric."), call. = FALSE) + } else { + for (i in new_cols) { + wide[[i]] <- replace_na(wide[[i]], replace = values_fill) + } + } + } else if (is.character(wide[[new_cols[1]]])) { + if (!is.character(values_fill)) { + stop(paste0("`values_fill` must be of type character."), call. = FALSE) + } else { + for (i in new_cols) { + wide[[i]] <- replace_na(wide[[i]], replace = values_fill) + } + } + } else if (is.factor(wide[[new_cols[1]]])) { + if (!is.factor(values_fill)) { + stop(paste0("`values_fill` must be of type factor."), call. = FALSE) + } else { + for (i in new_cols) { + wide[[i]] <- replace_na(wide[[i]], replace = values_fill) + } + } + } + } else { + stop("`values_fill` must be of length 1.", call. = FALSE) + } + } + attributes(wide)$reshapeWide <- NULL + for (i in colnames(wide)) { + attributes(wide[[i]]) <- variable_attr[[i]] + } + wide } pm_pull <- function(.data, var = -1) { - var_deparse <- pm_deparse_var(var) - col_names <- colnames(.data) - if (!(var_deparse %in% col_names) & grepl("^[[:digit:]]+L|[[:digit:]]", var_deparse)) { - var <- as.integer(gsub("L", "", var_deparse)) - var <- pm_if_else(var < 1L, rev(col_names)[abs(var)], col_names[var]) - } else if (var_deparse %in% col_names) { - var <- var_deparse - } - .data[, var, drop = TRUE] -} -pm_set_names <- function(object = nm, nm) { - names(object) <- nm - object -} - -pm_vec_head <- function(x, pm_n = 6L, ...) { - stopifnot(length(pm_n) == 1L) - pm_n <- if (pm_n < 0L) max(length(x) + pm_n, 0L) else min(pm_n, length(x)) - x[seq_len(pm_n)] + var_list <- as.list(seq_along(.data)) + names(var_list) <- names(.data) + .var <- eval(substitute(var), var_list) + if (.var < 0L) .var <- length(var_list) + .var + 1L + .data[[.var]] } pm_relocate <- function(.data, ..., .before = NULL, .after = NULL) { - pm_check_is_dataframe(.data) + pm_relocate.data.frame(.data, ..., .before = NULL, .after = NULL) +} +pm_relocate.data.frame <- function(.data, ..., .before = NULL, .after = NULL) { data_names <- colnames(.data) col_pos <- pm_select_positions(.data, ...) - - .before <- pm_deparse_var(.before) - .after <- pm_deparse_var(.after) + if (!missing(.before) && !is.null(.before)) .before <- colnames(.data)[pm_eval_select_pos(.data, substitute(.before))] + if (!missing(.after) && !is.null(.after)) .after <- colnames(.data)[pm_eval_select_pos(.data, substitute(.after))] has_before <- !is.null(.before) has_after <- !is.null(.after) - if (has_before && has_after) { stop("You must supply only one of `.before` and `.after`") } else if (has_before) { - pm_where <- min(match(.before, data_names)) - col_pos <- c(setdiff(col_pos, pm_where), pm_where) + where <- min(match(.before, data_names)) + col_pos <- c(setdiff(col_pos, where), where) } else if (has_after) { - pm_where <- max(match(.after, data_names)) - col_pos <- c(pm_where, setdiff(col_pos, pm_where)) + where <- max(match(.after, data_names)) + col_pos <- c(where, setdiff(col_pos, where)) } else { - pm_where <- 1L - col_pos <- union(col_pos, pm_where) + where <- 1L + col_pos <- union(col_pos, where) } - lhs <- setdiff(seq(1L, pm_where - 1L), col_pos) - rhs <- setdiff(seq(pm_where + 1L, ncol(.data)), col_pos) + lhs <- setdiff(seq(1L, where - 1L), col_pos) + rhs <- setdiff(seq(where + 1L, ncol(.data)), col_pos) col_pos <- unique(c(lhs, col_pos, rhs)) col_pos <- col_pos[col_pos <= length(data_names)] - res <- .data[col_pos] - if (pm_has_groups(.data)) res <- pm_set_groups(res, pm_get_groups(.data)) + if (pm_has_groups(.data)) res <- pm_groups_set(res, pm_group_vars(.data)) res } pm_rename <- function(.data, ...) { - pm_check_is_dataframe(.data) - new_names <- names(pm_deparse_dots(...)) + pm_rename.data.frame(.data, ...) +} +pm_rename.data.frame <- function(.data, ...) { + new_names <- names(pm_dotdotdot(...)) if (length(new_names) == 0L) { warning("You didn't give any new names") return(.data) @@ -671,88 +1265,31 @@ pm_rename <- function(.data, ...) { colnames(.data)[col_pos] <- new_names .data } - -pm_rename_with <- function(.data, .fn, .cols = pm_everything(), ...) { +pm_rename_with <- function(.data, .fn, .cols = everything(), ...) { + pm_rename_with.data.frame(.data, .fn, .cols = everything(), ...) +} +pm_rename_with.data.frame <- function(.data, .fn, .cols = everything(), ...) { if (!is.function(.fn)) stop("`", .fn, "` is not a valid function") - grouped <- inherits(.data, "grouped_data") + grouped <- is.grouped_df(.data) if (grouped) grp_pos <- which(colnames(.data) %in% pm_group_vars(.data)) - col_pos <- eval(substitute(pm_select_positions(.data, .cols))) + col_pos <- pm_eval_select_pos(.data = .data, .pm_group_pos = TRUE, .cols = substitute(.cols)) cols <- colnames(.data)[col_pos] new_cols <- .fn(cols, ...) if (any(duplicated(new_cols))) { stop("New names must be unique however `", deparse(substitute(.fn)), "` returns duplicate column names") } colnames(.data)[col_pos] <- new_cols - if (grouped) .data <- pm_set_groups(.data, colnames(.data)[grp_pos]) + if (grouped) .data <- pm_groups_set(.data, colnames(.data)[grp_pos]) .data } -pm_replace_with <- function(x, i, val, arg_name) { - if (is.null(val)) { - return(x) - } - pm_check_length(val, x, arg_name) - pm_check_type(val, x, arg_name) - pm_check_class(val, x, arg_name) - i[is.na(i)] <- FALSE - if (length(val) == 1L) { - x[i] <- val - } else { - x[i] <- val[i] - } - x -} - -pm_check_length <- function(x, y, arg_name) { - length_x <- length(x) - length_y <- length(y) - if (all(length_x %in% c(1L, length_y))) { - return() - } - if (length_y == 1) { - stop(arg_name, " must be length 1, not ", paste(length_x, sep = ", ")) - } else { - stop(arg_name, " must be length ", length_y, " or 1, not ", length_x) - } -} - -pm_check_type <- function(x, y, arg_name) { - x_type <- typeof(x) - y_type <- typeof(y) - if (identical(x_type, y_type)) { - return() - } - stop(arg_name, " must be `", y_type, "`, not `", x_type, "`") -} - -pm_check_class <- function(x, y, arg_name) { - if (!is.object(x)) { - return() - } - exp_classes <- class(y) - out_classes <- class(x) - if (identical(out_classes, exp_classes)) { - return() - } - stop(arg_name, " must have class `", exp_classes, "`, not class `", out_classes, "`") -} -pm_rownames_to_column <- function(.data, var = "rowname") { - pm_check_is_dataframe(.data) - col_names <- colnames(.data) - if (var %in% col_names) stop("Column `", var, "` already exists in `.data`") - .data[, var] <- rownames(.data) - rownames(.data) <- NULL - .data[, c(var, setdiff(col_names, var))] -} -pm_starts_with <- function(match, ignore.case = TRUE, vars = pm_peek_vars()) { +pm_starts_with <- function(match, ignore.case = TRUE, vars = peek_vars()) { grep(pattern = paste0("^", paste0(match, collapse = "|^")), x = vars, ignore.case = ignore.case) } - -pm_ends_with <- function(match, ignore.case = TRUE, vars = pm_peek_vars()) { +pm_ends_with <- function(match, ignore.case = TRUE, vars = peek_vars()) { grep(pattern = paste0(paste0(match, collapse = "$|"), "$"), x = vars, ignore.case = ignore.case) } - -pm_contains <- function(match, ignore.case = TRUE, vars = pm_peek_vars()) { - pm_matches <- lapply( +pm_contains <- function(match, ignore.case = TRUE, vars = peek_vars()) { + matches <- lapply( match, function(x) { if (isTRUE(ignore.case)) { @@ -766,14 +1303,12 @@ pm_contains <- function(match, ignore.case = TRUE, vars = pm_peek_vars()) { } } ) - unique(unlist(pm_matches)) + unique(unlist(matches)) } - -pm_matches <- function(match, ignore.case = TRUE, perl = FALSE, vars = pm_peek_vars()) { +pm_matches <- function(match, ignore.case = TRUE, perl = FALSE, vars = peek_vars()) { grep(pattern = match, x = vars, ignore.case = ignore.case, perl = perl) } - -pm_num_range <- function(prefix, range, width = NULL, vars = pm_peek_vars()) { +pm_num_range <- function(prefix, range, width = NULL, vars = peek_vars()) { if (!is.null(width)) { range <- sprintf(paste0("%0", width, "d"), range) } @@ -785,8 +1320,7 @@ pm_num_range <- function(prefix, range, width = NULL, vars = pm_peek_vars()) { x[!is.na(x)] } } - -pm_all_of <- function(x, vars = pm_peek_vars()) { +pm_all_of <- function(x, vars = peek_vars()) { x_ <- !x %in% vars if (any(x_)) { which_x_ <- which(x_) @@ -799,62 +1333,83 @@ pm_all_of <- function(x, vars = pm_peek_vars()) { which(vars %in% x) } } - -pm_any_of <- function(x, vars = pm_peek_vars()) { +pm_any_of <- function(x, vars = peek_vars()) { which(vars %in% x) } - -pm_everything <- function(vars = pm_peek_vars()) { +pm_everything <- function(vars = peek_vars()) { seq_along(vars) } - -pm_last_col <- function(offset = 0L, vars = pm_peek_vars()) { - if (!pm_is_wholenumber(offset)) stop("`offset` must be an integer") - pm_n <- length(vars) - if (offset && pm_n <= offset) { +pm_last_col <- function(offset = 0L, vars = peek_vars()) { + if (!is_wholenumber(offset)) stop("`offset` must be an integer") + n <- length(vars) + if (offset && n <= offset) { stop("`offset` must be smaller than the number of `vars`") - } else if (pm_n == 0) { - stop("Can't pm_select last column when `vars` is empty") + } else if (n == 0) { + stop("Can't select last column when `vars` is empty") } else { - pm_n - offset + n - offset } } - pm_peek_vars <- function() { pm_select_env$get_colnames() } -pm_select_positions <- function(.data, ..., .group_pos = FALSE) { +pm_select_positions <- function(.data, ..., .pm_group_pos = FALSE) { cols <- pm_dotdotdot(...) + cols <- cols[!vapply(cols, is.null, FALSE)] + if (length(cols) == 0L) return(integer(0)) pm_select_env$setup(.data = .data, calling_frame = parent.frame(2L)) on.exit(pm_select_env$clean(), add = TRUE) data_names <- pm_select_env$get_colnames() pos <- unlist(lapply(cols, pm_eval_expr)) + if (length(pos) > 0) pos <- if (pos[1] >= 0) pos[pos >= 0] else pos[pos < 0] col_len <- pm_select_env$get_ncol() if (any(pos > col_len)) { oor <- pos[which(pos > col_len)] oor_len <- length(oor) stop( - "Location", if (oor_len > 1) "s " else " ", pm_collapse_to_sentence(oor), + "Location", if (oor_len > 1) "s " else " ", collapse_to_sentence(oor), if (oor_len > 1) " don't " else " doesn't ", "exist. There are only ", col_len, " columns." ) } - if (isTRUE(.group_pos)) { - pm_groups <- pm_get_groups(.data) - missing_groups <- !(pm_groups %in% cols) + if (isTRUE(.pm_group_pos)) { + groups <- pm_group_vars(.data) + missing_groups <- !(groups %in% cols) if (any(missing_groups)) { - sel_missing <- pm_groups[missing_groups] - message("Adding missing grouping variables: `", paste(sel_missing, collapse = "`, `"), "`") + sel_missing <- groups[missing_groups] readd <- match(sel_missing, data_names) - if (length(names(cols)) > 0L) names(readd) <- data_names[readd] - pos <- c(readd, pos) + readd <- readd[!(readd %in% pos)] + if (length(readd) > 0L) { + message("Adding missing grouping variables: `", paste(sel_missing, collapse = "`, `"), "`") + if (length(names(cols)) > 0L) names(readd) <- data_names[readd] + pos <- c(readd, pos) + } } } - pos[!duplicated(pos)] + if (length(data_names[pos]) != 0L) { + nm_pos <- names(pos) + if (any(nm_pos == "")) { + names(pos)[which(nm_pos == "")] <- data_names[pos[which(nm_pos == "")]] + } + if (is.null(nm_pos)) { + names(pos) <- data_names[abs(pos)] + } + } + uniques <- pos[!duplicated(pos)] + res_nms <- data_names[uniques] + res <- match(res_nms, data_names) + if (length(res) != 0L) { + res <- if (length(setdiff(names(uniques), data_names)) > 0L) { + if (all(uniques > 0L)) structure(res, .Names = names(uniques)) else structure(res, .Names = res_nms) + } else { + structure(res, .Names = res_nms) + } + } + res } - pm_eval_expr <- function(x) { type <- typeof(x) - switch(type, + switch( + type, "integer" = x, "double" = as.integer(x), "character" = pm_select_char(x), @@ -863,19 +1418,17 @@ pm_eval_expr <- function(x) { stop("Expressions of type <", typeof(x), "> cannot be evaluated for use when subsetting.") ) } - pm_select_char <- function(expr) { pos <- match(expr, pm_select_env$get_colnames()) - if (is.na(pos)) stop("Column `", expr, "` does not exist") + if (any(is.na(pos))) stop("The following columns do not exist:\n ", paste(expr, collapse = "\n ")) pos } - pm_select_symbol <- function(expr) { expr_name <- as.character(expr) - if (grepl("^is\\.", expr_name) && pm_is_function(expr)) { + if (grepl("^is\\.", expr_name) && is.function(expr)) { stop( - "Predicate functions must be wrapped in `pm_where()`.\n\n", - sprintf(" data %%pm>%% pm_select(pm_where(%s))", expr_name) + "Predicate functions must be wrapped in `where()`.\n\n", + sprintf(" data %%pm>%% select(where(%s))", expr_name) ) } res <- try(pm_select_char(as.character(expr)), silent = TRUE) @@ -887,27 +1440,36 @@ pm_select_symbol <- function(expr) { } res } - pm_eval_call <- function(x) { type <- as.character(x[[1]]) - switch(type, + if (length(type) > 1L) { + type <- "pm_context" + } + switch( + type, `:` = pm_select_seq(x), `!` = pm_select_negate(x), `-` = pm_select_minus(x), `c` = pm_select_c(x), `(` = pm_select_bracket(x), - pm_select_pm_context(x) + `&` = pm_select_and(x), + pm_select_context(x) ) } - +pm_select_and <- function(expr) { + exprs <- as.list(expr)[-1] + res <- do.call(c, lapply(exprs, pm_eval_expr)) + if (all(res > 0) || all(res < 0)) return(unique(res)) + res <- res[!(duplicated(abs(res)) | duplicated(abs(res), fromLast = TRUE))] + res[res > 0] +} pm_select_seq <- function(expr) { x <- pm_eval_expr(expr[[2]]) y <- pm_eval_expr(expr[[3]]) x:y } - pm_select_negate <- function(expr) { - x <- if (pm_is_negated_colon(expr)) { + x <- if (is_negated_colon(expr)) { expr <- call(":", expr[[2]][[2]], expr[[2]][[3]][[2]]) pm_eval_expr(expr) } else { @@ -915,30 +1477,24 @@ pm_select_negate <- function(expr) { } x * -1L } - pm_is_negated_colon <- function(expr) { expr[[1]] == "!" && length(expr[[2]]) > 1L && expr[[2]][[1]] == ":" && expr[[2]][[3]][[1]] == "!" } - pm_select_minus <- function(expr) { x <- pm_eval_expr(expr[[2]]) x * -1L } - pm_select_c <- function(expr) { lst_expr <- as.list(expr) lst_expr[[1]] <- NULL unlist(lapply(lst_expr, pm_eval_expr)) } - pm_select_bracket <- function(expr) { pm_eval_expr(expr[[2]]) } - -pm_select_pm_context <- function(expr) { +pm_select_context <- function(expr) { eval(expr, envir = pm_select_env$.data) } - pm_select_env <- new.env() pm_select_env$setup <- function(.data, calling_frame) { pm_select_env$.data <- .data @@ -950,144 +1506,123 @@ pm_select_env$clean <- function() { pm_select_env$get_colnames <- function() colnames(pm_select_env$.data) pm_select_env$get_nrow <- function() nrow(pm_select_env$.data) pm_select_env$get_ncol <- function() ncol(pm_select_env$.data) - +pm_eval_select_pos <- function(.data, .cols, .pm_group_pos = FALSE) { + do.call(pm_select_positions, list(.data = .data, .cols, .pm_group_pos = .pm_group_pos)) +} pm_select <- function(.data, ...) { - col_pos <- pm_select_positions(.data, ..., .group_pos = TRUE) - map_names <- names(col_pos) - map_names_length <- nchar(map_names) - if (any(map_names_length == 0L)) { - no_new_names <- which(map_names_length == 0L) - map_names[no_new_names] <- colnames(.data)[no_new_names] - } + col_pos <- pm_select_positions(.data, ..., .pm_group_pos = TRUE) res <- .data[, col_pos, drop = FALSE] - if (!is.null(map_names) && all(col_pos > 0L)) colnames(res) <- map_names - if (pm_has_groups(.data)) res <- pm_set_groups(res, pm_get_groups(.data)) + if (length(names(res)) != 0) colnames(res) <- names(col_pos) + if (pm_has_groups(.data)) res <- pm_groups_set(res, pm_group_vars(.data)) res } -pm_summarise <- function(.data, ...) { - pm_check_is_dataframe(.data) - if ("grouped_data" %in% class(.data)) { - pm_summarise.grouped_data(.data, ...) - } else { - pm_summarise.default(.data, ...) - } +pm_summarise <- function(.data, ..., .groups = NULL) { + if ("grouped_df" %in% class(.data)) pm_summarise.grouped_df(.data, ..., .groups = NULL) else pm_summarise.data.frame(.data, ..., .groups = NULL) } - -pm_summarise.default <- function(.data, ...) { +pm_summarise.data.frame <- function(.data, ..., .groups = NULL) { fns <- pm_dotdotdot(...) pm_context$setup(.data) on.exit(pm_context$clean(), add = TRUE) - pm_groups_exist <- pm_has_groups(pm_context$.data) - if (pm_groups_exist) { - group <- unique(pm_context$.data[, pm_get_groups(pm_context$.data), drop = FALSE]) + groups_exist <- pm_context$is_grouped() + if (groups_exist) { + group <- unique(pm_context$get_columns(pm_group_vars(pm_context$.data))) } - res <- lapply( - fns, - function(x) { - x_res <- do.call(with, list(pm_context$.data, x)) - if (is.list(x_res)) I(x_res) else x_res + if (is_empty_list(fns)) { + if (groups_exist) return(group) else return(data.frame()) + } + res <- vector(mode = "list", length = length(fns)) + pm_eval_env <- c(as.list(pm_context$.data), vector(mode = "list", length = length(fns))) + new_pos <- seq(length(pm_context$.data) + 1L, length(pm_eval_env), 1L) + for (i in seq_along(fns)) { + pm_eval_env[[new_pos[i]]] <- do.call(with, list(pm_eval_env, fns[[i]])) + nms <- if (!is_named(pm_eval_env[[new_pos[i]]])) { + if (!is.null(names(fns)[[i]])) names(fns)[[i]] else deparse(fns[[i]]) + } else { + NULL } - ) - res <- as.data.frame(res) - fn_names <- names(fns) - colnames(res) <- if (is.null(fn_names)) fns else fn_names - if (pm_groups_exist) res <- cbind(group, res, row.names = NULL) + if (!is.null(nms)) names(pm_eval_env)[[new_pos[i]]] <- nms + res[[i]] <- build_data_frame(pm_eval_env[[new_pos[i]]], nms = nms) + } + res <- do.call(cbind, res) + if (groups_exist) res <- cbind(group, res, row.names = NULL) res } - -pm_summarise.grouped_data <- function(.data, ...) { - pm_groups <- pm_get_groups(.data) +pm_summarise.grouped_df <- function(.data, ..., .groups = NULL) { + if (!is.null(.groups)) { + .groups <- match.arg(arg = .groups, choices = c("drop", "drop_last", "keep"), several.ok = FALSE) + } + groups <- pm_group_vars(.data) res <- pm_apply_grouped_function("pm_summarise", .data, drop = TRUE, ...) - res <- res[do.call(order, lapply(pm_groups, function(x) res[, x])), ] + res <- res[pm_arrange_rows(res, pm_as_symbols(groups)), , drop = FALSE] + verbose <- pm_summarise_verbose(.groups) + if (is.null(.groups)) { + all_one <- as.data.frame(table(res[, groups])) + all_one <- all_one[all_one$Freq != 0, ] + .groups <- if (all(all_one$Freq == 1)) "drop_last" else "keep" + } + if (.groups == "drop_last") { + n <- length(groups) + if (n > 1) { + if (verbose) pm_summarise_inform(groups[-n]) + res <- pm_groups_set(res, groups[-n], pm_group_by_drop_default(.data)) + } + } else if (.groups == "keep") { + if (verbose) pm_summarise_inform(groups) + res <- pm_groups_set(res, groups, pm_group_by_drop_default(.data)) + } else if (.groups == "drop") { + attr(res, "groups") <- NULL + } rownames(res) <- NULL res } - +pm_summarise_inform <- function(new_groups) { + message(sprintf( + "`pm_summarise()` has grouped output by %s. You can override using the `.groups` argument.", + paste0("'", new_groups, "'", collapse = ", ") + )) +} +pm_summarise_verbose <- function(.groups) { + is.null(.groups) && + !identical(getOption("poorman.summarise.inform"), FALSE) +} pm_transmute <- function(.data, ...) { - pm_check_is_dataframe(.data) - if ("grouped_data" %in% class(.data)) { - pm_transmute.grouped_data(.data, ...) - } else { - pm_transmute.default(.data, ...) - } + if ("grouped_df" %in% class(.data)) pm_transmute.grouped_df(.data, ...) else pm_transmute.data.frame(.data, ...) } - -pm_transmute.default <- function(.data, ...) { - conditions <- pm_deparse_dots(...) - mutated <- pm_mutate(.data, ...) - mutated[, names(conditions), drop = FALSE] +pm_transmute.data.frame <- function(.data, ...) { + pm_mutate(.data, ..., .keep = "none") } - -pm_transmute.grouped_data <- function(.data, ...) { +pm_transmute.grouped_df <- function(.data, ...) { rows <- rownames(.data) res <- pm_apply_grouped_function("pm_transmute", .data, drop = TRUE, ...) res[rows, ] } -pm_dotdotdot <- function(..., .impute_names = FALSE) { - dots <- eval(substitute(alist(...))) - if (isTRUE(.impute_names)) { - pm_deparse_dots <- lapply(dots, deparse) - names_dots <- names(dots) - unnamed <- if (is.null(names_dots)) rep(TRUE, length(dots)) else nchar(names_dots) == 0L - names(dots)[unnamed] <- pm_deparse_dots[unnamed] +pm_ungroup <- function(x, ...) { + if ("grouped_df" %in% class(x)) pm_ungroup.grouped_df(x, ...) else pm_ungroup.data.frame(x, ...) +} +pm_ungroup.data.frame <- function(x, ...) { + rm_groups <- pm_deparse_dots(...) + groups <- pm_group_vars(x) + if (length(rm_groups) == 0L) rm_groups <- groups + x <- pm_groups_set(x, groups[!(groups %in% rm_groups)]) + if (length(attr(x, "groups")) == 0L) { + attr(x, "groups") <- NULL + class(x) <- class(x)[!(class(x) %in% "grouped_df")] } - dots + x } - -pm_deparse_dots <- function(...) { - vapply(substitute(...()), deparse, NA_character_) +pm_ungroup.grouped_df <- function(x, ...) { + pm_ungroup.data.frame(...) } - -pm_deparse_var <- function(var, frame = if (is.null(pm_eval_env$env)) parent.frame() else pm_eval_env$env) { - sub_var <- eval(substitute(substitute(var)), frame) - if (is.symbol(sub_var)) var <- as.character(sub_var) - var -} - pm_check_is_dataframe <- function(.data) { parent_fn <- all.names(sys.call(-1L), max.names = 1L) if (!is.data.frame(.data)) stop(parent_fn, " must be given a data.frame") invisible() } - -pm_is_wholenumber <- function(x) { - x %% 1L == 0L -} - pm_seq2 <- function(from, to) { if (length(from) != 1) stop("`from` must be length one") if (length(to) != 1) stop("`to` must be length one") if (from > to) integer() else seq.int(from, to) } - -pm_is_function <- function(x, frame) { - res <- tryCatch( - is.function(x), - warning = function(w) FALSE, - error = function(e) FALSE - ) - if (isTRUE(res)) { - return(res) - } - res <- tryCatch( - is.function(eval(x)), - warning = function(w) FALSE, - error = function(e) FALSE - ) - if (isTRUE(res)) { - return(res) - } - res <- tryCatch( - is.function(eval(as.symbol(deparse(substitute(x))))), - warning = function(w) FALSE, - error = function(e) FALSE - ) - if (isTRUE(res)) { - return(res) - } - FALSE -} - pm_collapse_to_sentence <- function(x) { len_x <- length(x) if (len_x == 0L) { @@ -1100,8 +1635,28 @@ pm_collapse_to_sentence <- function(x) { paste(paste(x[1:(len_x - 1)], collapse = ", "), x[len_x], sep = " and ") } } +pm_build_data_frame <- function(x, nms = NULL) { + res <- if (is.atomic(x)) { + data.frame(x) + } else if (is.list(x) && !is.data.frame(x)) { + structure(list(x = x), class = "data.frame", row.names = c(NA, -1L)) + } else if (is.data.frame(x)) { + x + } + if (!is.null(nms)) colnames(res) <- nms + res +} +pm_is_nested <- function(lst) vapply(lst, function(x) inherits(x[1L], "list"), FALSE) +pm_squash <- function(lst) { + do.call(c, lapply(lst, function(x) if (is.list(x) && !is.data.frame(x)) squash(x) else list(x))) +} +pm_flatten <- function(lst) { + nested <- pm_is_nested(lst) + res <- c(lst[!nested], unlist(lst[nested], recursive = FALSE)) + if (sum(nested)) Recall(res) else return(res) +} pm_where <- function(fn) { - if (!pm_is_function(fn)) { + if (!is.function(fn)) { stop(pm_deparse_var(fn), " is not a valid predicate function.") } preds <- unlist(lapply( @@ -1111,50 +1666,8 @@ pm_where <- function(fn) { }, fn )) - if (!is.logical(preds)) stop("`pm_where()` must be used with functions that return `TRUE` or `FALSE`.") + if (!is.logical(preds)) stop("`where()` must be used with functions that return `TRUE` or `FALSE`.") data_cols <- pm_select_env$get_colnames() cols <- data_cols[preds] which(data_cols %in% cols) } - -pm_cume_dist <- function(x) { - rank(x, ties.method = "max", na.last = "keep") / sum(!is.na(x)) -} - -pm_dense_rank <- function(x) { - match(x, sort(unique(x))) -} - -pm_min_rank <- function(x) { - rank(x, ties.method = "min", na.last = "keep") -} - -pm_ntile <- function(x = pm_row_number(), pm_n) { - if (!missing(x)) x <- pm_row_number(x) - len <- length(x) - sum(is.na(x)) - pm_n <- as.integer(floor(pm_n)) - if (len == 0L) { - rep(NA_integer_, length(x)) - } else { - pm_n_larger <- as.integer(len %% pm_n) - pm_n_smaller <- as.integer(pm_n - pm_n_larger) - size <- len / pm_n - larger_size <- as.integer(ceiling(size)) - smaller_size <- as.integer(floor(size)) - larger_threshold <- larger_size * pm_n_larger - bins <- pm_if_else( - x <= larger_threshold, - (x + (larger_size - 1L)) / larger_size, - (x + (-larger_threshold + smaller_size - 1L)) / smaller_size + pm_n_larger - ) - as.integer(floor(bins)) - } -} - -pm_percent_rank <- function(x) { - (pm_min_rank(x) - 1) / (sum(!is.na(x)) - 1) -} - -pm_row_number <- function(x) { - if (missing(x)) seq_len(pm_n()) else rank(x, ties.method = "first", na.last = "keep") -} diff --git a/R/aaa_helper_functions.R b/R/aaa_helper_functions.R index 41e9669c..efcda3dd 100755 --- a/R/aaa_helper_functions.R +++ b/R/aaa_helper_functions.R @@ -1425,13 +1425,15 @@ case_when <- function(...) { } -# dplyr implementations ---- +# dplyr/tidyr implementations ---- -# take {dplyr} functions if available, and the slower {poorman} functions otherwise -if (pkg_is_available("dplyr", also_load = FALSE)) { +# take {dplyr} and {tidyr} functions if available, and the slower {poorman} functions otherwise +if (pkg_is_available("dplyr", min_version = "1.0.0", also_load = FALSE)) { `%>%` <- import_fn("%>%", "dplyr", error_on_fail = FALSE) + across <- import_fn("across", "dplyr", error_on_fail = FALSE) anti_join <- import_fn("anti_join", "dplyr", error_on_fail = FALSE) arrange <- import_fn("arrange", "dplyr", error_on_fail = FALSE) + bind_rows <- import_fn("bind_rows", "dplyr", error_on_fail = FALSE) count <- import_fn("count", "dplyr", error_on_fail = FALSE) desc <- import_fn("desc", "dplyr", error_on_fail = FALSE) distinct <- import_fn("distinct", "dplyr", error_on_fail = FALSE) @@ -1443,22 +1445,22 @@ if (pkg_is_available("dplyr", also_load = FALSE)) { inner_join <- import_fn("inner_join", "dplyr", error_on_fail = FALSE) lag <- import_fn("lag", "dplyr", error_on_fail = FALSE) left_join <- import_fn("left_join", "dplyr", error_on_fail = FALSE) + mutate <- import_fn("mutate", "dplyr", error_on_fail = FALSE) n_distinct <- import_fn("n_distinct", "dplyr", error_on_fail = FALSE) pull <- import_fn("pull", "dplyr", error_on_fail = FALSE) rename <- import_fn("rename", "dplyr", error_on_fail = FALSE) right_join <- import_fn("right_join", "dplyr", error_on_fail = FALSE) - row_number <- import_fn("row_number", "dplyr", error_on_fail = FALSE) select <- import_fn("select", "dplyr", error_on_fail = FALSE) semi_join <- import_fn("semi_join", "dplyr", error_on_fail = FALSE) summarise <- import_fn("summarise", "dplyr", error_on_fail = FALSE) ungroup <- import_fn("ungroup", "dplyr", error_on_fail = FALSE) - mutate <- import_fn("mutate", "dplyr", error_on_fail = FALSE) - bind_rows <- import_fn("bind_rows", "dplyr", error_on_fail = FALSE) where <- import_fn("where", "dplyr", error_on_fail = FALSE) } else { `%>%` <- `%pm>%` + across <- pm_across anti_join <- pm_anti_join arrange <- pm_arrange + bind_rows <- pm_bind_rows count <- pm_count desc <- pm_desc distinct <- pm_distinct @@ -1470,62 +1472,22 @@ if (pkg_is_available("dplyr", also_load = FALSE)) { inner_join <- pm_inner_join lag <- pm_lag left_join <- pm_left_join + mutate <- pm_mutate n_distinct <- pm_n_distinct pull <- pm_pull rename <- pm_rename right_join <- pm_right_join - row_number <- pm_row_number select <- pm_select semi_join <- pm_semi_join summarise <- pm_summarise ungroup <- pm_ungroup - mutate <- function(.data, ...) { - # pm_mutate is buggy, use this simple alternative - dots <- list(...) - for (i in seq_len(length(dots))) { - .data[, names(dots)[i]] <- dots[[i]] - } - .data - } - bind_rows <- function(..., fill = NA) { - # this AMAZING code is from ChatGPT when I asked for a base R dplyr::bind_rows alternative - dfs <- list(...) - all_cols <- unique(unlist(lapply(dfs, colnames))) - mat_list <- lapply(dfs, function(x) { - mat <- matrix(NA, nrow = nrow(x), ncol = length(all_cols)) - colnames(mat) <- all_cols - mat[, colnames(x)] <- as.matrix(x) - mat - }) - mat <- do.call(rbind, mat_list) - as.data.frame(mat, stringsAsFactors = FALSE) - } - where <- function(fn) { - # adapted from https://github.com/nathaneastwood/poorman/blob/52eb6947e0b4430cd588976ed8820013eddf955f/R/where.R#L17-L32 - if (!is.function(fn)) { - stop_("`", deparse(substitute(fn)), "()` is not a valid predicate function.") - } - df <- pm_select_env$.data - cols <- pm_select_env$get_colnames() - if (is.null(df)) { - df <- get_current_data("where", call = FALSE) - cols <- colnames(df) - } - preds <- unlist(lapply( - df, - function(x, fn) { - do.call("fn", list(x)) - }, - fn - )) - if (!is.logical(preds)) stop_("`where()` must be used with functions that return `TRUE` or `FALSE`.") - data_cols <- cols - cols <- data_cols[preds] - which(data_cols %in% cols) - } - + where <- pm_where +} +if (pkg_is_available("tidyr", min_version = "1.0.0", also_load = FALSE)) { + pivot_longer <- import_fn("pivot_longer", "tidyr", error_on_fail = FALSE) +} else { + pivot_longer <- pm_pivot_longer } - # Faster data.table implementations ---- diff --git a/R/ab_selectors.R b/R/ab_selectors.R index 6337dc5f..37b86157 100755 --- a/R/ab_selectors.R +++ b/R/ab_selectors.R @@ -49,7 +49,6 @@ #' @return (internally) a [character] vector of column names, with additional class `"ab_selector"` #' @export #' @inheritSection AMR Reference Data Publicly Available - #' @examples #' # `example_isolates` is a data set available in the AMR package. #' # See ?example_isolates. diff --git a/R/bug_drug_combinations.R b/R/bug_drug_combinations.R index 43835c5c..a6bbe91e 100755 --- a/R/bug_drug_combinations.R +++ b/R/bug_drug_combinations.R @@ -85,10 +85,7 @@ bug_drug_combinations <- function(x, } # use dplyr and tidyr if they are available, they are much faster! - if (pkg_is_available("dplyr", min_version = "1.0.0", also_load = FALSE) && - pkg_is_available("tidyr", min_version = "1.0.0", also_load = FALSE)) { - across <- import_fn("across", "dplyr") - pivot_longer <- import_fn("pivot_longer", "tidyr") + if (identical(pivot_longer, import_fn("pivot_longer", "tidyr", error_on_fail = FALSE))) { out <- x %>% ungroup() %>% mutate(mo = FUN(ungroup(x)[, col_mo, drop = TRUE], ...)) %>% diff --git a/R/eucast_rules.R b/R/eucast_rules.R index 3183067c..ef581776 100755 --- a/R/eucast_rules.R +++ b/R/eucast_rules.R @@ -926,7 +926,7 @@ eucast_rules <- function(x, # Print overview ---------------------------------------------------------- if (isTRUE(info) || isTRUE(verbose)) { verbose_info <- x.bak %>% - mutate(row = row_number()) %>% + mutate(row = seq_len(NROW(x.bak))) %>% select(`.rowid`, row) %>% right_join(verbose_info, by = c(".rowid" = "rowid") diff --git a/data-raw/antibiograms.Rmd b/data-raw/antibiograms.Rmd new file mode 100644 index 00000000..db02966b --- /dev/null +++ b/data-raw/antibiograms.Rmd @@ -0,0 +1,62 @@ +--- +title: "Generating antibiograms with the AMR package" +author: "AMR package developers" +date: "`r Sys.Date()`" +output: html_document +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE, message = FALSE) +library(AMR) +``` + +This is an example R Markdown file to show the use of `antibiogram()` of the AMR package. + +For starters, this is what our `example_isolates` data set looks like: + +```{r} +example_isolates +``` + + +### Traditional Antibiogram + +```{r trad} +print( + antibiogram(example_isolates, + antibiotics = c(aminoglycosides(), carbapenems())) +) +``` + +### Combined Antibiogram + +```{r comb} +print( + antibiogram(example_isolates, + antibiotics = c("TZP", "TZP+TOB", "TZP+GEN")) +) +``` + +### Syndromic Antibiogram + +```{r synd} +print( + antibiogram(example_isolates, + antibiotics = c(aminoglycosides(), carbapenems()), + syndromic_group = "ward") +) +``` + +### Weighted-Incidence Syndromic Combination Antibiogram (WISCA) + +```{r wisca} +print( + antibiogram(example_isolates, + antibiotics = c("AMC", "AMC+CIP", "TZP", "TZP+TOB"), + mo_transform = "gramstain", + minimum = 10, # this should be >= 30, but now just as example + syndromic_group = ifelse(example_isolates$age >= 65 & + example_isolates$gender == "M", + "WISCA Group 1", "WISCA Group 2")) +) +``` diff --git a/data-raw/antibiograms.html b/data-raw/antibiograms.html new file mode 100644 index 00000000..def4fd20 --- /dev/null +++ b/data-raw/antibiograms.html @@ -0,0 +1,848 @@ + + + + + + + + + + + + + + + +Generating antibiograms with the AMR package + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +

This is an example R Markdown file to show the use of +antibiogram() of the AMR package.

+

For starters, this is what our example_isolates data set +looks like:

+
example_isolates
+
## # A tibble: 2,000 × 46
+##    date       patient   age gender ward     mo           PEN   OXA   FLC   AMX  
+##    <date>     <chr>   <dbl> <chr>  <chr>    <mo>         <sir> <sir> <sir> <sir>
+##  1 2002-01-02 A77334     65 F      Clinical B_ESCHR_COLI R     NA    NA    NA   
+##  2 2002-01-03 A77334     65 F      Clinical B_ESCHR_COLI R     NA    NA    NA   
+##  3 2002-01-07 067927     45 F      ICU      B_STPHY_EPDR R     NA    R     NA   
+##  4 2002-01-07 067927     45 F      ICU      B_STPHY_EPDR R     NA    R     NA   
+##  5 2002-01-13 067927     45 F      ICU      B_STPHY_EPDR R     NA    R     NA   
+##  6 2002-01-13 067927     45 F      ICU      B_STPHY_EPDR R     NA    R     NA   
+##  7 2002-01-14 462729     78 M      Clinical B_STPHY_AURS R     NA    S     R    
+##  8 2002-01-14 462729     78 M      Clinical B_STPHY_AURS R     NA    S     R    
+##  9 2002-01-16 067927     45 F      ICU      B_STPHY_EPDR R     NA    R     NA   
+## 10 2002-01-17 858515     79 F      ICU      B_STPHY_EPDR R     NA    S     NA   
+## # … with 1,990 more rows, and 36 more variables: AMC <sir>, AMP <sir>,
+## #   TZP <sir>, CZO <sir>, FEP <sir>, CXM <sir>, FOX <sir>, CTX <sir>,
+## #   CAZ <sir>, CRO <sir>, GEN <sir>, TOB <sir>, AMK <sir>, KAN <sir>,
+## #   TMP <sir>, SXT <sir>, NIT <sir>, FOS <sir>, LNZ <sir>, CIP <sir>,
+## #   MFX <sir>, VAN <sir>, TEC <sir>, TCY <sir>, TGC <sir>, DOX <sir>,
+## #   ERY <sir>, CLI <sir>, AZM <sir>, IPM <sir>, MEM <sir>, MTR <sir>,
+## #   CHL <sir>, COL <sir>, MUP <sir>, RIF <sir>
+
+

Traditional Antibiogram

+
print(
+  antibiogram(example_isolates,
+              antibiotics = c(aminoglycosides(), carbapenems()))
+)
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Pathogen (N min-max)AMKGENIPMKANMEMTOB
CoNS (43-309)0865205222
E. coli (0-462)10098100NA10097
E. faecalis (0-39)001000NA0
K. pneumoniae (0-58)NA90100NA10090
P. aeruginosa (17-30)NA100NA0NA100
P. mirabilis (0-34)NA9494NANA94
S. aureus (2-233)NA99NANANA98
S. epidermidis (8-163)079NA0NA51
S. hominis (3-80)NA92NANANA85
S. pneumoniae (11-117)00NA0NA0
+
+
+

Combined Antibiogram

+
print(
+  antibiogram(example_isolates,
+              antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"))
+)
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Pathogen (N min-max)TZPTZP + GENTZP + TOB
CoNS (29-274)3097NA
E. coli (416-461)9410099
K. pneumoniae (53-58)899393
P. aeruginosa (27-30)NA100100
P. mirabilis (27-34)NA100100
S. aureus (7-231)NA100100
S. epidermidis (5-128)NA100100
S. hominis (0-74)NA100100
S. pneumoniae (112-112)100100100
+
+
+

Syndromic Antibiogram

+
print(
+  antibiogram(example_isolates,
+              antibiotics = c(aminoglycosides(), carbapenems()),
+              syndromic_group = "ward")
+)
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Syndromic GroupPathogen (N min-max)AMKGENIPMKANMEMTOB
ClinicalCoNS (23-205)NA8957NA5726
ICUCoNS (10-73)NA79NANANANA
OutpatientCoNS (3-31)NA84NANANANA
ClinicalE. coli (0-299)10098100NA10098
ICUE. coli (0-137)10099100NA10096
ClinicalK. pneumoniae (0-51)NA92100NA10092
ClinicalP. mirabilis (0-30)NA100NANANA100
ClinicalS. aureus (2-150)NA99NANANA97
ICUS. aureus (0-66)NA100NANANANA
ClinicalS. epidermidis (4-79)NA82NANANA55
ICUS. epidermidis (4-75)NA72NANANA41
ClinicalS. hominis (1-45)NA96NANANA94
ClinicalS. pneumoniae (5-78)00NA0NA0
ICUS. pneumoniae (5-30)00NA0NA0
+
+
+

Weighted-Incidence Syndromic Combination Antibiogram (WISCA)

+
print(
+  antibiogram(example_isolates,
+              antibiotics = c("AMC", "AMC+CIP", "TZP", "TZP+TOB"),
+              mo_transform = "gramstain",
+              minimum = 10, # this should be >= 30, but now just as example
+              syndromic_group = ifelse(example_isolates$age >= 65 &
+                                         example_isolates$gender == "M",
+                                       "WISCA Group 1", "WISCA Group 2"))
+)
+ ++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Syndromic GroupPathogen (N min-max)AMCAMC + CIPTZPTZP + TOB
WISCA Group 1Gram-negative (261-285)76958999
WISCA Group 2Gram-negative (380-442)76988898
WISCA Group 1Gram-positive (123-406)76898195
WISCA Group 2Gram-positive (222-732)76898895
+
+ + + + +
+ + + + + + + + + + + + + + + diff --git a/data-raw/poorman_prepend.R b/data-raw/poorman_prepend.R index 6b55d897..ec8bf91d 100644 --- a/data-raw/poorman_prepend.R +++ b/data-raw/poorman_prepend.R @@ -32,11 +32,11 @@ # Source file: data-raw/reproduction_of_poorman.R # ------------------------------------------------ -# poorman: a package to replace all dplyr functions with base R so we can lose dependency on dplyr. +# {poorman}: a package to replace all dplyr functions with base R so we can lose dependency on {dplyr}. # These functions were downloaded from https://github.com/nathaneastwood/poorman, # from this commit: https://github.com/nathaneastwood/poorman/tree/{commit}. # -# All functions are prefixed with 'pm_' to make it obvious that they are dplyr substitutes. +# All functions are prefixed with 'pm_' to make it obvious that they are {dplyr} substitutes. # # All code below was released under MIT license, that permits 'free of charge, to any person obtaining a # copy of the software and associated documentation files (the "Software"), to deal in the Software diff --git a/data-raw/reproduction_of_poorman.R b/data-raw/reproduction_of_poorman.R index 7ff3a3ab..cf1a8645 100644 --- a/data-raw/reproduction_of_poorman.R +++ b/data-raw/reproduction_of_poorman.R @@ -1,22 +1,28 @@ # get complete filenames of all R files in the GitHub repository of nathaneastwood/poorman -commit <- "52eb6947e0b4430cd588976ed8820013eddf955f" +library(magrittr) +commit <- "3cc0a9920b1eb559dd166f548561244189586b3a" files <- xml2::read_html(paste0("https://github.com/nathaneastwood/poorman/tree/", commit, "/R")) %>% rvest::html_nodes("a") %>% rvest::html_attr("href") +files <- files[files %like% "/blob/.*R$"] # get full URLs of all raw R files files <- sort(paste0("https://raw.githubusercontent.com", gsub("blob/", "", files[files %like% "/R/.*.R$"]))) # remove files with only pkg specific code files <- files[files %unlike% "(zzz|init)[.]R$"] # also, there's a lot of functions we don't use -files <- files[files %unlike% "(slice|glimpse|recode|replace_na|coalesce)[.]R$"] +files <- files[files %unlike% "/(between|coalesce|cumulative|fill|glimpse|gluestick|group_cols|na_if|near|nest_by|poorman-package|print|recode|reconstruct|replace_na|replace_with|rownames|slice|union_all|unite|window_rank|with_groups)[.]R$"] # add our prepend file, containing info about the source of the data -intro <- readLines("data-raw/poorman_prepend.R") +intro <- readLines("data-raw/poorman_prepend.R") %>% + # add commit to intro part + gsub("{commit}", commit, ., fixed = TRUE) %>% + # add date to intro part + gsub("{date}", trimws(format(Sys.Date(), "%e %B %Y")), ., fixed = TRUE) # copyright info: -copyright <- paste0("# ", readLines("https://raw.githubusercontent.com/nathaneastwood/poorman/master/LICENSE")) +copyright <- paste0("# ", readLines(paste0("https://raw.githubusercontent.com/nathaneastwood/poorman/", commit, "/LICENSE"))) # read all contents to a character vector contents <- character(0) @@ -25,31 +31,35 @@ sapply(files, function(file) { contents <<- c(contents, readLines(file)) invisible() }) -contents <- c( - intro, - copyright, - "", - contents -) # remove lines starting with "#'" and NULL and write to file contents <- contents[!grepl("^(#'|NULL|\"_PACKAGE)", contents)] +contents.bak <- contents +# grouped attributes same as dplyr +contents <- gsub("grouped_data", "grouped_df", contents, fixed = TRUE) # now make it independent on UseMethod, since we will not export these functions -contents <- gsub('UseMethod[(]"(.*?)"[)]', - 'if ("grouped_data" %in% class(.data)) {||| \\1.grouped_data(.data, ...)||| } else {||| \\1.default(.data, ...)||| }', - paste(contents, collapse = "|||"), - perl = TRUE -) %>% - # add commit to intro part - gsub("{commit}", commit, ., fixed = TRUE) %>% - # add date to intro part - gsub("{date}", format(Sys.Date(), "%e %B %Y"), ., fixed = TRUE) %>% - strsplit(split = "|||", fixed = TRUE) %>% - unlist() %>% - # add "pm_" as prefix to all functions - gsub("^([a-z_.]+) <- function", "pm_\\1 <- function", .) - +has_usemethods <- gsub("^([a-z_]+).*", "\\1", contents[which(contents %like% "usemethod") - 1]) +for (use in has_usemethods) { + relevant_row <- which(contents %like% paste0("^", use, " <- function")) + 1 + function_call <- trimws(gsub(".*function(.*)\\{.*", "\\1", contents[relevant_row - 1])) + function_call1 <- trimws(gsub("[()]", "", strsplit(function_call, ",")[[1]][1])) + if (any(contents %like% paste0(use, ".grouped_df"))) { + # this function will have methods for data.frame and grouped_df + contents[relevant_row] <- paste0(" if (\"grouped_df\" %in% class(", function_call1, ")) ", use, ".grouped_df", function_call, " else ", use, ".data.frame", function_call) + } else { + # this function will only have data.frame as method + contents[relevant_row] <- paste0(" ", use, ".data.frame", function_call) + } + # add pm_ prefix + contents[relevant_row - 1] <- paste0("pm_", contents[relevant_row - 1]) + +} +# correct for NextMethod +contents <- gsub("NextMethod\\(\"(.*)\"\\)", "\\1.data.frame(...)", contents) +# correct for 'default' method +contents <- gsub(".default <-", ".data.frame <-", contents, fixed = TRUE) +contents <- gsub("pm_group_by_drop.data.frame", "pm_group_by_drop", contents, fixed = TRUE) # now get all those pm_* functions to replace all untransformed function name calls as well new_pm_names <- sort(gsub("pm_(.*?) <-.*", "\\1", contents[grepl("^pm_", contents)])) for (i in seq_len(length(new_pm_names))) { @@ -57,29 +67,39 @@ for (i in seq_len(length(new_pm_names))) { # starting with a space or a straight bracket or an opening parenthesis, ending with nothing or a non-character or a closing parenthesis contents <- gsub(paste0("( |\\[|\\()", new_pm_names[i], "($|[^a-z]|\\))"), paste0("\\1pm_", new_pm_names[i], "\\2"), contents) } - # replace %>% with %pm>% +contents[which(contents %like% "^\\|\\|") - 1] <- paste0(contents[which(contents %like% "^\\|\\|") - 1], " ||") +contents[which(contents %like% "^\\|\\|")] <- gsub("^\\|\\|", "", contents[which(contents %like% "^\\|\\|")]) contents <- gsub("%>%", "%pm>%", contents, fixed = TRUE) # fix for new lines, since n() also existed contents <- gsub("\\pm_n", "\\n", contents, fixed = TRUE) # prefix other functions also with "pm_" contents <- gsub("^([a-z_]+)(\\$|)", "pm_\\1\\2", contents) -# prefix environments -contents <- gsub("eval_env", "pm_eval_env", contents, fixed = TRUE) -contents <- gsub("select_env", "pm_select_env", contents, fixed = TRUE) -contents <- gsub("context", "pm_context", contents, fixed = TRUE) +# prefix environmental objects and functions +contents <- gsub("(eval_env|select_env|select_context|context|dotdotdot|as_symbols|insert_dot|deparse_|groups_set|apply_grouped_function|split_into_groups|calculate_groups|has_groups|eval_select_pos|select_positions|eval_expr|eval_call|add_group_columns|find_used|is_nested|setup_|select_|group_)", "pm_\\1", contents) # now some items are overprefixed contents <- gsub("(pm_)+", "pm_", contents) -# special case for pm_distinct(), we need '.keep_all' to work -contents <- gsub("pm_distinct <- function(.data, ..., .keep_all = FALSE)", "pm_distinct <- function(.data, ...)", contents, fixed = TRUE) -# pm_pull does not correct for tibbles, misses the drop argument -contents[contents == ".data[, var]"] <- ".data[, var, drop = TRUE]" +contents <- gsub("pm_if (\"grouped_df", "if (\"grouped_df", contents, fixed = TRUE) +# remove comments and empty lines +contents <- gsub("#.*", "", contents) +contents <- contents[trimws(contents) != ""] +# fix for their relocate() +contents <- gsub("if (!missing(.before))", "if (!missing(.before) && !is.null(.before))", contents, fixed = TRUE) +contents <- gsub("if (!missing(.after))", "if (!missing(.after) && !is.null(.after))", contents, fixed = TRUE) +contents[which(contents %like% "reshape\\($") + 1] <- gsub("data", "as.data.frame(data, stringsAsFactors = FALSE)", contents[which(contents %like% "reshape\\($") + 1]) +contents <- gsub('pm_relocate(.data = long, values_to, .after = -1)', 'pm_relocate(.data = long, "value", .after = -1)', contents, fixed = TRUE) # who needs US spelling? -contents <- contents[!grepl("summarize", contents)] +contents <- contents[contents %unlike% "summarize"] + +# add intro +contents <- c( + intro, + copyright, + "", + contents +) writeLines(contents, "R/aa_helper_pm_functions.R") -# after this, comment out: -# pm_left_join() since we use a faster version -# pm_group_split() since we don't use it and it relies on R 3.5.0 for the use of ...length(), which is hard to support without C++ code +# note: pm_left_join() will be overwritten by aaa_helper_functions.R, which contains a faster implementation diff --git a/inst/tinytest/test-zzz.R b/inst/tinytest/test-zzz.R index ee30ad30..a951e12a 100644 --- a/inst/tinytest/test-zzz.R +++ b/inst/tinytest/test-zzz.R @@ -68,7 +68,6 @@ import_functions <- c( "read_html" = "xml2", "rename" = "dplyr", "right_join" = "dplyr", - "row_number" = "dplyr", "select" = "dplyr", "semi_join" = "dplyr", "showQuestion" = "rstudioapi", diff --git a/tests/tinytest.R b/tests/tinytest.R index 74a3ce07..2f0bf5ea 100644 --- a/tests/tinytest.R +++ b/tests/tinytest.R @@ -40,11 +40,9 @@ if (tryCatch(isTRUE(AMR:::import_fn("isJob", "rstudioapi")()), error = function( if (AMR:::pkg_is_available("tinytest", also_load = TRUE)) { library(AMR) if (identical(AMR:::import_fn("select", "dplyr"), AMR:::select)) { - print("This test will rely on {dplyr} verbs") - message("This test will rely on {dplyr} verbs") + message("\n\n------------------------------------\nThis test will rely on {dplyr} verbs\n------------------------------------\n\n") } else { - print("This test will rely on {poorman} verbs") - message("This test will rely on {poorman} verbs") + message("\n\n---------------------------------------------------------------------\nThis test will rely on {poorman} verbs (installed state dplyr: ", AMR:::pkg_is_available("dplyr", also_load = FALSE), ")\n---------------------------------------------------------------------\n\n") } # set language set_AMR_locale("English")