2020-09-18 16:05:53 +02:00
|
|
|
# ==================================================================== #
|
|
|
|
# TITLE #
|
2022-10-05 09:12:22 +02:00
|
|
|
# AMR: An R Package for Working with Antimicrobial Resistance Data #
|
2020-09-18 16:05:53 +02:00
|
|
|
# #
|
|
|
|
# SOURCE #
|
|
|
|
# https://github.com/msberends/AMR #
|
|
|
|
# #
|
2022-10-05 09:12:22 +02:00
|
|
|
# CITE AS #
|
|
|
|
# Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C #
|
|
|
|
# (2022). AMR: An R Package for Working with Antimicrobial Resistance #
|
|
|
|
# Data. Journal of Statistical Software, 104(3), 1-31. #
|
|
|
|
# doi:10.18637/jss.v104.i03 #
|
|
|
|
# #
|
2022-12-27 15:16:15 +01:00
|
|
|
# Developed at the University of Groningen and the University Medical #
|
|
|
|
# Center Groningen in The Netherlands, in collaboration with many #
|
|
|
|
# colleagues from around the world, see our website. #
|
2020-09-18 16:05:53 +02:00
|
|
|
# #
|
|
|
|
# This R package is free software; you can freely use and distribute #
|
|
|
|
# it for both personal and commercial purposes under the terms of the #
|
|
|
|
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
|
|
|
|
# the Free Software Foundation. #
|
|
|
|
# We created this package for both routine data analysis and academic #
|
|
|
|
# research and it was publicly released in the hope that it will be #
|
|
|
|
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
|
2020-10-08 11:16:03 +02:00
|
|
|
# #
|
|
|
|
# Visit our website for the full manual and a complete tutorial about #
|
2021-02-02 23:57:35 +01:00
|
|
|
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
|
2020-09-18 16:05:53 +02:00
|
|
|
# ==================================================================== #
|
|
|
|
|
|
|
|
# ------------------------------------------------
|
|
|
|
# THIS FILE WAS CREATED AUTOMATICALLY!
|
|
|
|
# Source file: data-raw/reproduction_of_poorman.R
|
|
|
|
# ------------------------------------------------
|
|
|
|
|
2023-02-08 13:48:06 +01:00
|
|
|
# {poorman}: a package to replace all dplyr functions with base R so we can lose dependency on {dplyr}.
|
2020-09-18 16:05:53 +02:00
|
|
|
# These functions were downloaded from https://github.com/nathaneastwood/poorman,
|
2023-02-08 13:48:06 +01:00
|
|
|
# from this commit: https://github.com/nathaneastwood/poorman/tree/3cc0a9920b1eb559dd166f548561244189586b3a.
|
2020-09-18 16:05:53 +02:00
|
|
|
#
|
2023-02-08 13:48:06 +01:00
|
|
|
# All functions are prefixed with 'pm_' to make it obvious that they are {dplyr} substitutes.
|
2020-09-18 16:05:53 +02:00
|
|
|
#
|
2022-08-28 10:31:50 +02:00
|
|
|
# All code below was released under MIT license, that permits 'free of charge, to any person obtaining a
|
2020-09-18 16:05:53 +02:00
|
|
|
# copy of the software and associated documentation files (the "Software"), to deal in the Software
|
|
|
|
# without restriction, including without limitation the rights to use, copy, modify, merge, publish,
|
2022-08-28 10:31:50 +02:00
|
|
|
# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
|
2020-09-18 16:05:53 +02:00
|
|
|
# is furnished to do so', given that a copyright notice is given in the software.
|
|
|
|
#
|
2023-02-08 13:48:06 +01:00
|
|
|
# Copyright notice on 8 February 2023, the day this code was downloaded, as found on
|
|
|
|
# https://github.com/nathaneastwood/poorman/blob/3cc0a9920b1eb559dd166f548561244189586b3a/LICENSE:
|
2020-09-18 16:05:53 +02:00
|
|
|
# YEAR: 2020
|
|
|
|
# COPYRIGHT HOLDER: Nathan Eastwood
|
|
|
|
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_across <- function(.cols = everything(), .fns = NULL, ..., .names = NULL) {
|
|
|
|
setup <- pm_setup_across(substitute(.cols), .fns, .names)
|
|
|
|
if (length(setup$names) == 1 && grepl("\\{\\.col\\}|\\{\\.fn\\}", setup$names)) {
|
|
|
|
ref <- setup$names
|
|
|
|
id <- 1
|
|
|
|
fn_names <- unique(names(setup$funs))
|
|
|
|
for (i in seq_along(setup$cols)) {
|
|
|
|
.col <- setup$cols[i]
|
|
|
|
for (j in seq_along(fn_names)) {
|
|
|
|
.fn <- fn_names[j]
|
2023-02-08 16:51:41 +01:00
|
|
|
setup$names[id] <- pm_gluestick(ref)
|
2023-02-08 13:48:06 +01:00
|
|
|
id <- id + 1
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
cols <- setup$cols
|
|
|
|
n_cols <- length(cols)
|
|
|
|
if (n_cols == 0L) return(data.frame())
|
|
|
|
funs <- setup$funs
|
|
|
|
data <- pm_context$get_columns(cols)
|
|
|
|
names <- setup$names
|
|
|
|
if (is.null(funs)) {
|
|
|
|
data <- data.frame(data)
|
|
|
|
if (is.null(names)) {
|
|
|
|
return(data)
|
|
|
|
} else {
|
2023-02-08 16:51:41 +01:00
|
|
|
return(stats::setNames(data, names))
|
2023-02-08 13:48:06 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
n_fns <- length(funs)
|
|
|
|
res <- vector(mode = "list", length = n_fns * n_cols)
|
|
|
|
k <- 1L
|
|
|
|
for (i in seq_len(n_cols)) {
|
|
|
|
pm_context$cur_column <- cols[[i]]
|
|
|
|
col <- data[[i]]
|
|
|
|
for (j in seq_len(n_fns)) {
|
|
|
|
res[[k]] <- funs[[j]](col, ...)
|
|
|
|
k <- k + 1L
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (is.null(names(res))) names(res) <- names
|
|
|
|
as.data.frame(res)
|
|
|
|
}
|
|
|
|
pm_if_any <- function(.cols, .fns = NULL, ..., .names = NULL) {
|
|
|
|
df <- do.call(across, list(.cols = substitute(.cols), .fns = .fns, ..., .names = .names))
|
|
|
|
if (nrow(df) == 0L) return(FALSE)
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_check_if_types(df)
|
2023-02-08 13:48:06 +01:00
|
|
|
Reduce(`|`, df)
|
|
|
|
}
|
|
|
|
pm_if_all <- function(.cols, .fns = NULL, ..., .names = NULL) {
|
|
|
|
df <- do.call(across, list(.cols = substitute(.cols), .fns = .fns, ..., .names = .names))
|
|
|
|
if (nrow(df) == 0L) return(FALSE)
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_check_if_types(df)
|
2023-02-08 13:48:06 +01:00
|
|
|
Reduce(`&`, df)
|
|
|
|
}
|
|
|
|
pm_check_if_types <- function(df) {
|
|
|
|
types <- vapply(df, class, NA_character_)
|
|
|
|
not_logical <- types != "logical"
|
|
|
|
if (any(not_logical)) {
|
|
|
|
stop(
|
|
|
|
"Cannot convert the following columns to <logical>:\n ",
|
|
|
|
paste0(colnames(df)[not_logical], " <", types, "> ", collapse = "\n ")
|
|
|
|
)
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_setup_across <- function(.cols, .fns, .names) {
|
|
|
|
cols <- pm_eval_select_pos(.data = pm_context$.data, .cols, .pm_group_pos = FALSE)
|
|
|
|
cols <- pm_context$get_colnames()[cols]
|
|
|
|
if (pm_context$is_grouped()) cols <- setdiff(cols, pm_group_vars(pm_context$.data))
|
|
|
|
funs <- if (is.null(.fns)) NULL else if (!is.list(.fns)) list(.fns) else .fns
|
|
|
|
if (is.null(funs)) return(list(cols = cols, funs = funs, names = .names))
|
|
|
|
f_nms <- names(funs)
|
|
|
|
if (is.null(f_nms) && !is.null(.fns)) names(funs) <- seq_along(funs)
|
|
|
|
if (any(nchar(f_nms) == 0L)) {
|
|
|
|
miss <- which(nchar(f_nms) == 0L)
|
|
|
|
names(funs)[miss] <- miss
|
|
|
|
f_nms <- names(funs)
|
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
funs <- lapply(funs, pm_as_function)
|
2023-02-08 13:48:06 +01:00
|
|
|
names <- if (!is.null(.names)) {
|
|
|
|
.names
|
|
|
|
} else {
|
|
|
|
if (length(funs) == 1L && is.null(f_nms)) {
|
|
|
|
cols
|
|
|
|
} else {
|
|
|
|
nms <- do.call(paste, c(rev(expand.grid(names(funs), cols)), sep = "_"))
|
|
|
|
if (length(nms) == 0L) nms <- NULL
|
|
|
|
nms
|
|
|
|
}
|
|
|
|
}
|
|
|
|
list(cols = cols, funs = funs, names = names)
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_arrange <- function(.data, ...) {
|
|
|
|
pm_arrange.data.frame(.data, ...)
|
|
|
|
}
|
|
|
|
pm_arrange.data.frame <- function(.data, ..., .by_group = FALSE) {
|
|
|
|
dots <- pm_dotdotdot(...)
|
|
|
|
is_grouped <- pm_has_groups(.data)
|
2023-02-08 16:51:41 +01:00
|
|
|
if (isTRUE(.by_group)) dots <- c(pm_groups(.data), dots)
|
2023-02-08 13:48:06 +01:00
|
|
|
rows <- pm_arrange_rows(.data = .data, dots)
|
|
|
|
row_number <- attr(.data, "row.names")
|
|
|
|
out <- .data[rows, , drop = FALSE]
|
|
|
|
if (is.numeric(row_number)) {
|
|
|
|
row.names(out) <- row_number
|
|
|
|
}
|
|
|
|
if (is_grouped) {
|
2023-02-08 16:51:41 +01:00
|
|
|
attr(out, "pm_groups") <- pm_calculate_groups(out, pm_group_vars(out))
|
2023-02-08 13:48:06 +01:00
|
|
|
}
|
|
|
|
out
|
|
|
|
}
|
|
|
|
pm_arrange_rows <- function(.data, dots) {
|
|
|
|
if (length(dots) == 0L) return(seq_len(nrow(.data)))
|
|
|
|
for (i in seq_along(dots)) {
|
|
|
|
tmp <- deparse(dots[[i]])
|
|
|
|
if (startsWith(tmp, "desc(")) {
|
|
|
|
tmp <- gsub("^desc\\(", "-", tmp)
|
|
|
|
tmp <- gsub("\\)$", "", tmp)
|
|
|
|
}
|
|
|
|
dots[[i]] <- parse(text = tmp, keep.source = FALSE)[[1]]
|
|
|
|
}
|
|
|
|
used <- unname(do.call(c, lapply(dots, pm_find_used)))
|
|
|
|
used <- used[used %in% colnames(.data)]
|
|
|
|
for (i in seq_along(dots)) {
|
|
|
|
if (is.character(.data[[used[[i]]]])) {
|
|
|
|
.data[[used[[i]]]] <- factor(.data[[used[[i]]]])
|
|
|
|
}
|
|
|
|
if (is.factor(.data[[used[[i]]]]) &&
|
|
|
|
(startsWith(deparse(dots[[i]]), "desc(") ||
|
|
|
|
startsWith(deparse(dots[[i]]), "-"))) {
|
|
|
|
dots[[i]] <- bquote(-xtfrm(.(as.name(used[[i]]))))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
data <- do.call(pm_transmute, c(list(.data = pm_ungroup(.data)), dots))
|
|
|
|
do.call(order, c(data, list(decreasing = FALSE, na.last = TRUE)))
|
|
|
|
}
|
|
|
|
pm_bind_cols <- function(...) {
|
|
|
|
lsts <- list(...)
|
2023-02-08 16:51:41 +01:00
|
|
|
lsts <- pm_squash(lsts)
|
2023-02-08 13:48:06 +01:00
|
|
|
lsts <- Filter(Negate(is.null), lsts)
|
|
|
|
if (length(lsts) == 0L) return(data.frame())
|
2023-02-08 16:51:41 +01:00
|
|
|
lapply(lsts, function(x) pm_is_df_or_vector(x))
|
2023-02-08 13:48:06 +01:00
|
|
|
lsts <- do.call(cbind, lsts)
|
|
|
|
if (!is.data.frame(lsts)) lsts <- as.data.frame(lsts)
|
|
|
|
lsts
|
|
|
|
}
|
|
|
|
pm_bind_rows <- function(..., .id = NULL) {
|
|
|
|
lsts <- list(...)
|
2023-02-08 16:51:41 +01:00
|
|
|
lsts <- pm_flatten(lsts)
|
2023-02-08 13:48:06 +01:00
|
|
|
lsts <- Filter(Negate(is.null), lsts)
|
2023-02-08 16:51:41 +01:00
|
|
|
lapply(lsts, function(x) pm_is_df_or_vector(x))
|
|
|
|
lapply(lsts, function(x) if (is.atomic(x) && !pm_is_named(x)) stop("Vectors must be named."))
|
2023-02-08 13:48:06 +01:00
|
|
|
if (!missing(.id)) {
|
|
|
|
lsts <- lapply(seq_along(lsts), function(i) {
|
|
|
|
nms <- names(lsts)
|
|
|
|
id_df <- data.frame(id = if (is.null(nms)) as.character(i) else nms[i], stringsAsFactors = FALSE)
|
|
|
|
colnames(id_df) <- .id
|
|
|
|
cbind(id_df, lsts[[i]])
|
|
|
|
})
|
|
|
|
}
|
|
|
|
nms <- unique(unlist(lapply(lsts, names)))
|
|
|
|
lsts <- lapply(
|
|
|
|
lsts,
|
|
|
|
function(x) {
|
|
|
|
if (!is.data.frame(x)) x <- data.frame(as.list(x), stringsAsFactors = FALSE)
|
|
|
|
for (i in nms[!nms %in% names(x)]) x[[i]] <- NA
|
|
|
|
x
|
|
|
|
}
|
|
|
|
)
|
|
|
|
names(lsts) <- NULL
|
|
|
|
do.call(rbind, lsts)
|
|
|
|
}
|
|
|
|
pm_case_when <- function(...) {
|
|
|
|
fs <- list(...)
|
|
|
|
lapply(fs, function(x) if (!inherits(x, "formula")) stop("`case_when()` requires formula inputs."))
|
|
|
|
n <- length(fs)
|
|
|
|
if (n == 0L) stop("No cases provided.")
|
|
|
|
query <- vector("list", n)
|
|
|
|
value <- vector("list", n)
|
|
|
|
default_env <- parent.frame()
|
|
|
|
for (i in seq_len(n)) {
|
|
|
|
query[[i]] <- eval(fs[[i]][[2]], envir = default_env)
|
|
|
|
value[[i]] <- eval(fs[[i]][[3]], envir = default_env)
|
|
|
|
if (!is.logical(query[[i]])) stop(fs[[i]][[2]], " does not return a `logical` vector.")
|
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
m <- pm_validate_case_when_length(query, value, fs)
|
2023-02-08 13:48:06 +01:00
|
|
|
out <- value[[1]][rep(NA_integer_, m)]
|
|
|
|
replaced <- rep(FALSE, m)
|
|
|
|
for (i in seq_len(n)) {
|
2023-02-08 16:51:41 +01:00
|
|
|
out <- pm_replace_with(out, query[[i]] & !replaced, value[[i]], NULL)
|
2023-02-08 13:48:06 +01:00
|
|
|
replaced <- replaced | (query[[i]] & !is.na(query[[i]]))
|
|
|
|
}
|
|
|
|
out
|
|
|
|
}
|
|
|
|
pm_validate_case_when_length <- function(query, value, fs) {
|
|
|
|
lhs_lengths <- lengths(query)
|
|
|
|
rhs_lengths <- lengths(value)
|
|
|
|
all_lengths <- unique(c(lhs_lengths, rhs_lengths))
|
|
|
|
if (length(all_lengths) <= 1L) return(all_lengths[[1L]])
|
|
|
|
non_atomic_lengths <- all_lengths[all_lengths != 1L]
|
|
|
|
len <- non_atomic_lengths[[1L]]
|
|
|
|
if (length(non_atomic_lengths) == 1L) return(len)
|
|
|
|
inconsistent_lengths <- non_atomic_lengths[-1L]
|
|
|
|
lhs_problems <- lhs_lengths %in% inconsistent_lengths
|
|
|
|
rhs_problems <- rhs_lengths %in% inconsistent_lengths
|
|
|
|
problems <- lhs_problems | rhs_problems
|
|
|
|
if (any(problems)) {
|
|
|
|
stop(
|
|
|
|
"The following formulas must be length ", len, " or 1, not ",
|
|
|
|
paste(inconsistent_lengths, collapse = ", "), ".\n ",
|
|
|
|
paste(fs[problems], collapse = "\n ")
|
|
|
|
)
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
pm_context <- new.env()
|
|
|
|
pm_context$setup <- function(.data) pm_context$.data <- .data
|
|
|
|
pm_context$get_data <- function() pm_context$.data
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_context$get_columns <- function(cols) pm_context$.data[, cols, drop = FALSE]
|
|
|
|
pm_context$cur_column <- NULL
|
2020-09-18 16:05:53 +02:00
|
|
|
pm_context$get_nrow <- function() nrow(pm_context$.data)
|
|
|
|
pm_context$get_colnames <- function() colnames(pm_context$.data)
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_context$is_grouped <- function() pm_has_groups(pm_context$.data)
|
|
|
|
pm_context$as_env <- function() {
|
|
|
|
if (any(pm_is_nested(pm_context$.data))) {
|
|
|
|
lapply(as.list(pm_context$.data), function(x) if (is.data.frame(x[[1]])) x[[1]] else x)
|
|
|
|
} else {
|
|
|
|
pm_context$.data
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pm_context$pm_group_env <- NULL
|
|
|
|
pm_context$clean <- function() {
|
|
|
|
rm(list = c(".data"), envir = pm_context)
|
|
|
|
if (!is.null(pm_context$cur_column)) rm(list = c("cur_column"), envir = pm_context)
|
|
|
|
}
|
2020-09-18 16:05:53 +02:00
|
|
|
pm_n <- function() {
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_check_context("`n()`", pm_context$.data)
|
2020-09-18 16:05:53 +02:00
|
|
|
pm_context$get_nrow()
|
|
|
|
}
|
|
|
|
pm_cur_data <- function() {
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_check_context("`cur_data()`", pm_context$.data)
|
2020-09-18 16:05:53 +02:00
|
|
|
data <- pm_context$get_data()
|
2023-02-08 13:48:06 +01:00
|
|
|
data[, !(colnames(data) %in% pm_group_vars(data)), drop = FALSE]
|
|
|
|
}
|
|
|
|
pm_cur_data_all <- function() {
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_check_context("`cur_data_all()`", pm_context$.data)
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_ungroup(pm_context$get_data())
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
|
|
|
pm_cur_group <- function() {
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_check_context("`cur_group()`", pm_context$.data)
|
2020-09-18 16:05:53 +02:00
|
|
|
data <- pm_context$get_data()
|
2023-02-08 13:48:06 +01:00
|
|
|
res <- data[1L, pm_group_vars(data), drop = FALSE]
|
2020-09-18 16:05:53 +02:00
|
|
|
rownames(res) <- NULL
|
|
|
|
res
|
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_cur_group_id <- function() {
|
|
|
|
pm_check_context("`cur_group_id()`", pm_context$.data)
|
2020-09-18 16:05:53 +02:00
|
|
|
data <- pm_context$get_data()
|
2023-02-08 13:48:06 +01:00
|
|
|
res <- data[1L, pm_group_vars(data), drop = FALSE]
|
2023-02-08 16:51:41 +01:00
|
|
|
details <- pm_get_group_details(data)
|
2023-02-08 13:48:06 +01:00
|
|
|
details[, ".pm_group_id"] <- seq_len(nrow(details))
|
|
|
|
res <- suppressMessages(semi_join(details, res))
|
|
|
|
res[, ".pm_group_id"]
|
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_cur_group_rows <- function() {
|
|
|
|
pm_check_context("`cur_group_rows()`", pm_context$.data)
|
2020-09-18 16:05:53 +02:00
|
|
|
data <- pm_context$get_data()
|
2023-02-08 13:48:06 +01:00
|
|
|
res <- data[1L, pm_group_vars(data), drop = FALSE]
|
2023-02-08 16:51:41 +01:00
|
|
|
res <- suppressMessages(semi_join(pm_get_group_details(data), res))
|
2020-09-18 16:05:53 +02:00
|
|
|
unlist(res[, ".rows"])
|
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_cur_column <- function() {
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_check_context("`cur_column()`", pm_context$cur_column, "`across`")
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_context$cur_column
|
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_check_context <- function(fn, pm_context, name = NULL) {
|
2023-02-08 13:48:06 +01:00
|
|
|
if (is.null(pm_context)) {
|
|
|
|
stop(fn, " must only be used inside ", if (is.null(name)) "poorman verbs" else name)
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
pm_count <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) {
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_groups <- pm_group_vars(x)
|
2020-09-18 16:05:53 +02:00
|
|
|
if (!missing(...)) x <- pm_group_by(x, ..., .add = TRUE)
|
|
|
|
wt <- pm_deparse_var(wt)
|
2023-02-08 16:51:41 +01:00
|
|
|
res <- do.call(pm_tally, list(x, wt, sort, name))
|
|
|
|
if (length(pm_groups) > 0L) res <- do.call(pm_group_by, list(res, as.name(pm_groups)))
|
2020-09-18 16:05:53 +02:00
|
|
|
res
|
|
|
|
}
|
|
|
|
pm_tally <- function(x, wt = NULL, sort = FALSE, name = NULL) {
|
2023-02-08 16:51:41 +01:00
|
|
|
name <- pm_check_name(x, name)
|
2020-09-18 16:05:53 +02:00
|
|
|
wt <- pm_deparse_var(wt)
|
2023-02-08 16:51:41 +01:00
|
|
|
res <- do.call(pm_summarise, stats::setNames(list(x, pm_tally_n(x, wt)), c(".data", name)))
|
2020-09-18 16:05:53 +02:00
|
|
|
res <- pm_ungroup(res)
|
2023-02-08 13:48:06 +01:00
|
|
|
if (isTRUE(sort)) res <- do.call(pm_arrange, list(res, call("desc", as.name(name))))
|
2020-09-18 16:05:53 +02:00
|
|
|
rownames(res) <- NULL
|
|
|
|
res
|
|
|
|
}
|
|
|
|
pm_add_count <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) {
|
2023-02-08 16:51:41 +01:00
|
|
|
name <- pm_check_name(x, name)
|
2020-09-18 16:05:53 +02:00
|
|
|
row_names <- rownames(x)
|
|
|
|
wt <- pm_deparse_var(wt)
|
|
|
|
if (!missing(...)) x <- pm_group_by(x, ..., .add = TRUE)
|
2023-02-08 16:51:41 +01:00
|
|
|
res <- do.call(pm_add_tally, list(x, wt, sort, name))
|
2020-09-18 16:05:53 +02:00
|
|
|
res[row_names, ]
|
|
|
|
}
|
|
|
|
pm_add_tally <- function(x, wt = NULL, sort = FALSE, name = NULL) {
|
|
|
|
wt <- pm_deparse_var(wt)
|
2023-02-08 16:51:41 +01:00
|
|
|
n <- pm_tally_n(x, wt)
|
|
|
|
name <- pm_check_name(x, name)
|
|
|
|
res <- do.call(pm_mutate, stats::setNames(list(x, n), c(".data", name)))
|
2020-09-18 16:05:53 +02:00
|
|
|
if (isTRUE(sort)) {
|
2023-02-08 13:48:06 +01:00
|
|
|
do.call(pm_arrange, list(res, call("desc", as.name(name))))
|
2020-09-18 16:05:53 +02:00
|
|
|
} else {
|
|
|
|
res
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pm_tally_n <- function(x, wt) {
|
2023-02-08 13:48:06 +01:00
|
|
|
if (is.null(wt) && "n" %in% colnames(x)) {
|
|
|
|
message("Using `n` as weighting variable")
|
|
|
|
wt <- "n"
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
|
|
|
pm_context$setup(.data = x)
|
|
|
|
on.exit(pm_context$clean(), add = TRUE)
|
|
|
|
if (is.null(wt)) {
|
2023-02-08 13:48:06 +01:00
|
|
|
call("n")
|
2020-09-18 16:05:53 +02:00
|
|
|
} else {
|
|
|
|
call("sum", as.name(wt), na.rm = TRUE)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pm_check_name <- function(df, name) {
|
|
|
|
if (is.null(name)) {
|
2023-02-08 13:48:06 +01:00
|
|
|
if ("n" %in% colnames(df)) {
|
2020-09-18 16:05:53 +02:00
|
|
|
stop(
|
2023-02-08 13:48:06 +01:00
|
|
|
"Column 'n' is already present in output\n",
|
2020-09-18 16:05:53 +02:00
|
|
|
"* Use `name = \"new_name\"` to pick a new name"
|
|
|
|
)
|
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
return("n")
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
|
|
|
if (!is.character(name) || length(name) != 1) {
|
|
|
|
stop("`name` must be a single string")
|
|
|
|
}
|
|
|
|
name
|
|
|
|
}
|
|
|
|
pm_desc <- function(x) -xtfrm(x)
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_distinct <- function(.data, ..., .keep_all = FALSE) {
|
|
|
|
if ("grouped_df" %in% class(.data)) pm_distinct.grouped_df(.data, ..., .keep_all = FALSE) else pm_distinct.data.frame(.data, ..., .keep_all = FALSE)
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_distinct.data.frame <- function(.data, ..., .keep_all = FALSE) {
|
|
|
|
if (ncol(.data) == 0L) return(.data[1, ])
|
|
|
|
cols <- pm_dotdotdot(...)
|
2020-09-18 16:05:53 +02:00
|
|
|
col_names <- names(cols)
|
|
|
|
col_len <- length(cols)
|
|
|
|
if (is.null(col_names) && col_len > 0L) names(cols) <- cols
|
|
|
|
if (col_len == 0L) {
|
|
|
|
res <- .data
|
|
|
|
} else {
|
2023-02-08 13:48:06 +01:00
|
|
|
mut <- pm_mutate_df(.data, ...)
|
|
|
|
res <- mut$data
|
2020-09-18 16:05:53 +02:00
|
|
|
col_names <- names(cols)
|
|
|
|
res <- if (!is.null(col_names)) {
|
|
|
|
zero_names <- nchar(col_names) == 0L
|
|
|
|
if (any(zero_names)) {
|
|
|
|
names(cols)[zero_names] <- cols[zero_names]
|
|
|
|
col_names <- names(cols)
|
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
suppressMessages(select(res, col_names))
|
2020-09-18 16:05:53 +02:00
|
|
|
} else {
|
2023-02-08 13:48:06 +01:00
|
|
|
suppressMessages(select(res, cols))
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
res <- unique(res)
|
|
|
|
if (isTRUE(.keep_all)) {
|
|
|
|
res <- cbind(res, .data[rownames(res), setdiff(colnames(.data), colnames(res)), drop = FALSE])
|
|
|
|
}
|
|
|
|
common_cols <- c(intersect(colnames(.data), colnames(res)), setdiff(col_names, colnames(.data)))
|
2023-02-08 13:48:06 +01:00
|
|
|
if (is.numeric(attr(res, "row.names"))) {
|
|
|
|
row.names(res) <- seq_len(nrow(res))
|
|
|
|
}
|
2020-09-18 16:05:53 +02:00
|
|
|
if (length(common_cols) > 0L) res[, common_cols, drop = FALSE] else res
|
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_distinct.grouped_df <- function(.data, ..., .keep_all = FALSE) {
|
2020-09-18 16:05:53 +02:00
|
|
|
pm_apply_grouped_function("pm_distinct", .data, drop = TRUE, ..., .keep_all = .keep_all)
|
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_dotdotdot <- function(..., .impute_names = FALSE) {
|
|
|
|
dots <- eval(substitute(alist(...)))
|
|
|
|
if (isTRUE(.impute_names)) {
|
|
|
|
pm_deparse_dots <- lapply(dots, deparse)
|
|
|
|
names_dots <- names(dots)
|
|
|
|
unnamed <- if (is.null(names_dots)) rep(TRUE, length(dots)) else nchar(names_dots) == 0L
|
|
|
|
names(dots)[unnamed] <- pm_deparse_dots[unnamed]
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
dots
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_deparse_dots <- function(...) {
|
|
|
|
vapply(substitute(...()), deparse, NA_character_)
|
|
|
|
}
|
|
|
|
pm_deparse_var <- function(var, frame = if (is.null(pm_eval_env$env)) parent.frame() else pm_eval_env$env) {
|
|
|
|
sub_var <- eval(substitute(substitute(var)), frame)
|
|
|
|
if (is.symbol(sub_var)) var <- as.character(sub_var)
|
|
|
|
var
|
|
|
|
}
|
|
|
|
pm_eval_env <- new.env()
|
|
|
|
pm_filter <- function(.data, ..., .preserve = FALSE) {
|
|
|
|
if ("grouped_df" %in% class(.data)) pm_filter.grouped_df(.data, ..., .preserve = FALSE) else pm_filter.data.frame(.data, ..., .preserve = FALSE)
|
|
|
|
}
|
|
|
|
pm_filter.data.frame <- function(.data, ..., .preserve = FALSE) {
|
2020-09-18 16:05:53 +02:00
|
|
|
conditions <- pm_dotdotdot(...)
|
2023-02-08 13:48:06 +01:00
|
|
|
if (length(conditions) == 0L) return(.data)
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_check_filter(conditions)
|
2020-09-18 16:05:53 +02:00
|
|
|
cond_class <- vapply(conditions, typeof, NA_character_)
|
2023-02-08 13:48:06 +01:00
|
|
|
cond_class <- cond_class[!cond_class %in% c("language", "logical")]
|
|
|
|
if (length(cond_class) > 0L) stop("Conditions must be logical vectors")
|
2020-09-18 16:05:53 +02:00
|
|
|
pm_context$setup(.data)
|
|
|
|
on.exit(pm_context$clean(), add = TRUE)
|
|
|
|
pm_eval_env$env <- parent.frame()
|
|
|
|
on.exit(rm(list = "env", envir = pm_eval_env), add = TRUE)
|
|
|
|
rows <- lapply(
|
|
|
|
conditions,
|
|
|
|
function(cond, frame) eval(cond, pm_context$.data, frame),
|
|
|
|
frame = pm_eval_env$env
|
|
|
|
)
|
|
|
|
rows <- Reduce("&", rows)
|
|
|
|
.data[rows & !is.na(rows), ]
|
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_filter.grouped_df <- function(.data, ..., .preserve = FALSE) {
|
2020-09-18 16:05:53 +02:00
|
|
|
rows <- rownames(.data)
|
|
|
|
res <- pm_apply_grouped_function("pm_filter", .data, drop = TRUE, ...)
|
2023-02-08 13:48:06 +01:00
|
|
|
res <- res[rows[rows %in% rownames(res)], ]
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_groups <- pm_group_vars(.data)
|
2023-02-08 13:48:06 +01:00
|
|
|
pre_filtered_groups <- pm_group_data(.data)
|
2023-02-08 16:51:41 +01:00
|
|
|
post_filtered_groups <- pm_calculate_groups(res, pm_groups)
|
2023-02-08 13:48:06 +01:00
|
|
|
if (!(!.preserve && isTRUE(attr(pre_filtered_groups, ".drop")))) {
|
2023-02-08 16:51:41 +01:00
|
|
|
filtered_groups <- anti_join(pre_filtered_groups, post_filtered_groups, by = pm_groups)
|
|
|
|
filtered_groups <- filtered_groups[, pm_groups, drop = FALSE]
|
2023-02-08 13:48:06 +01:00
|
|
|
filtered_groups[[".rows"]] <- rep(list(integer()), length.out = nrow(filtered_groups))
|
|
|
|
post_filtered_groups <- bind_rows(post_filtered_groups, filtered_groups)
|
2023-02-08 16:51:41 +01:00
|
|
|
ordered <- do.call(pm_arrange_rows, list(post_filtered_groups, pm_as_symbols(pm_groups)))
|
2023-02-08 13:48:06 +01:00
|
|
|
post_filtered_groups <- post_filtered_groups[ordered, ]
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
attr(res, "pm_groups") <- post_filtered_groups
|
2023-02-08 13:48:06 +01:00
|
|
|
res
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_check_filter <- function(conditions) {
|
2023-02-08 16:51:41 +01:00
|
|
|
named <- pm_have_name(conditions)
|
2023-02-08 13:48:06 +01:00
|
|
|
for (i in which(named)) {
|
|
|
|
if (!is.logical(conditions[[i]])) {
|
|
|
|
stop(
|
|
|
|
sprintf("Problem with `pm_filter()` input `..%s`.\n", i),
|
|
|
|
sprintf("Input `..%s` is named.\n", i),
|
|
|
|
"This usually means that you've used `=` instead of `==`.\n",
|
|
|
|
sprintf("Did you mean `%s == %s`?", names(conditions)[[i]], conditions[[i]])
|
|
|
|
)
|
|
|
|
}
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_gluestick <- function(fmt, src = parent.frame(), open = "{", close = "}", eval = TRUE) {
|
|
|
|
nchar_open <- nchar(open)
|
|
|
|
nchar_close <- nchar(close)
|
|
|
|
stopifnot(exprs = {
|
|
|
|
is.character(fmt)
|
|
|
|
length(fmt) == 1L
|
|
|
|
is.character(open)
|
|
|
|
length(open) == 1L
|
|
|
|
nchar_open > 0L
|
|
|
|
is.character(close)
|
|
|
|
length(close) == 1
|
|
|
|
nchar_close > 0
|
|
|
|
})
|
|
|
|
open <- gsub("(.)", "\\\\\\1", open)
|
|
|
|
close <- gsub("(.)", "\\\\\\1", close)
|
|
|
|
re <- paste0(open, ".*?", close)
|
|
|
|
matches <- gregexpr(re, fmt)
|
|
|
|
exprs <- regmatches(fmt, matches)[[1]]
|
|
|
|
exprs <- substr(exprs, nchar_open + 1L, nchar(exprs) - nchar_close)
|
|
|
|
fmt_sprintf <- gsub(re, "%s", fmt)
|
|
|
|
fmt_sprintf <- gsub("%(?!s)", "%%", fmt_sprintf, perl = TRUE)
|
|
|
|
args <- if (eval) {
|
|
|
|
lapply(exprs, function(expr) eval(parse(text = expr), envir = src))
|
|
|
|
} else {
|
|
|
|
unname(mget(exprs, envir = as.environment(src)))
|
|
|
|
}
|
|
|
|
do.call(sprintf, c(list(fmt_sprintf), args))
|
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_group_by <- function(.data, ..., .add = FALSE, .drop = pm_group_by_drop_default(.data)) {
|
|
|
|
pm_group_by.data.frame(.data, ..., .add = FALSE, .drop = pm_group_by_drop_default(.data))
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_group_by.data.frame <- function(.data, ..., .add = FALSE, .drop = pm_group_by_drop_default(.data)) {
|
|
|
|
vars <- pm_dotdotdot(..., .impute_names = TRUE)
|
|
|
|
if (all(vapply(vars, is.null, FALSE))) {
|
|
|
|
res <- pm_groups_set(.data, NULL)
|
|
|
|
class(res) <- class(res)[!(class(res) %in% "grouped_df")]
|
|
|
|
return(res)
|
|
|
|
}
|
|
|
|
new_cols <- pm_add_group_columns(.data, vars)
|
|
|
|
res <- new_cols$data
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_groups <- new_cols$pm_groups
|
|
|
|
if (isTRUE(.add)) pm_groups <- union(pm_group_vars(.data), pm_groups)
|
|
|
|
unknown <- !(pm_groups %in% colnames(res))
|
|
|
|
if (any(unknown)) stop("Invalid pm_groups: ", pm_groups[unknown])
|
|
|
|
if (length(pm_groups) > 0L) {
|
|
|
|
res <- pm_groups_set(res, pm_groups, .drop)
|
2023-02-08 13:48:06 +01:00
|
|
|
class(res) <- union("grouped_df", class(res))
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
|
|
|
res
|
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_group_by_drop_default <- function(.tbl) {
|
|
|
|
if ("grouped_df" %in% class(.tbl)) pm_group_by_drop_default.grouped_df(.tbl) else pm_group_by_drop_default.data.frame(.tbl)
|
|
|
|
}
|
|
|
|
pm_group_by_drop_default.data.frame <- function(.tbl) {
|
|
|
|
TRUE
|
|
|
|
}
|
|
|
|
pm_group_by_drop_default.grouped_df <- function(.tbl) {
|
|
|
|
tryCatch({
|
|
|
|
!identical(attr(pm_group_data(.tbl), ".drop"), FALSE)
|
|
|
|
}, error = function(e) {
|
|
|
|
TRUE
|
|
|
|
})
|
|
|
|
}
|
|
|
|
pm_add_group_columns <- function(.data, vars) {
|
|
|
|
vars <- vars[!vapply(vars, is.null, FALSE)]
|
|
|
|
types <- do.call(c, lapply(vars, typeof))
|
|
|
|
test <- any(types == "language")
|
|
|
|
needs_mutate <- if (test) unname(which(types == "language")) else NULL
|
|
|
|
if (!is.null(needs_mutate)) {
|
|
|
|
.data <- do.call(pm_mutate, c(list(.data = pm_ungroup(.data)), vars[needs_mutate]))
|
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
list(data = .data, pm_groups = names(vars))
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
|
|
|
pm_group_data <- function(.data) {
|
2023-02-08 13:48:06 +01:00
|
|
|
if ("grouped_df" %in% class(.data)) pm_group_data.grouped_df(.data) else pm_group_data.data.frame(.data)
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_group_data.data.frame <- function(.data) {
|
|
|
|
structure(list(.rows = list(seq_len(nrow(.data)))), class = "data.frame", row.names = c(NA, -1L))
|
|
|
|
}
|
|
|
|
pm_group_data.grouped_df <- function(.data) {
|
2023-02-08 16:51:41 +01:00
|
|
|
attr(.data, "pm_groups")
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
|
|
|
pm_group_rows <- function(.data) {
|
|
|
|
pm_group_data(.data)[[".rows"]]
|
|
|
|
}
|
|
|
|
pm_group_indices <- function(.data) {
|
2023-02-08 13:48:06 +01:00
|
|
|
if (!pm_has_groups(.data)) return(rep(1L, nrow(.data)))
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_groups <- pm_group_vars(.data)
|
|
|
|
res <- unique(.data[, pm_groups, drop = FALSE])
|
|
|
|
res <- res[do.call(order, lapply(pm_groups, function(x) res[, x])), , drop = FALSE]
|
2020-09-18 16:05:53 +02:00
|
|
|
class(res) <- "data.frame"
|
|
|
|
nrow_data <- nrow(.data)
|
|
|
|
rows <- rep(NA, nrow_data)
|
|
|
|
for (i in seq_len(nrow_data)) {
|
2023-02-08 16:51:41 +01:00
|
|
|
rows[i] <- which(interaction(res[, pm_groups]) %in% interaction(.data[i, pm_groups]))
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
|
|
|
rows
|
|
|
|
}
|
|
|
|
pm_group_vars <- function(x) {
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_groups <- attr(x, "pm_groups", exact = TRUE)
|
|
|
|
if (is.null(pm_groups)) character(0) else colnames(pm_groups)[!colnames(pm_groups) %in% c(".pm_group_id", ".rows")]
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
|
|
|
pm_groups <- function(x) {
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_as_symbols(pm_group_vars(x))
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
|
|
|
pm_group_size <- function(x) {
|
|
|
|
lengths(pm_group_rows(x))
|
|
|
|
}
|
|
|
|
pm_n_groups <- function(x) {
|
|
|
|
nrow(pm_group_data(x))
|
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_group_split <- function(.data, ..., .keep = TRUE) {
|
|
|
|
dots_len <- length(pm_dotdotdot(...)) > 0L
|
|
|
|
if (pm_has_groups(.data) && isTRUE(dots_len)) {
|
|
|
|
warning("... is ignored in pm_group_split(<grouped_df>), please use pm_group_by(..., .add = TRUE) %pm>% pm_group_split()")
|
|
|
|
}
|
|
|
|
if (!pm_has_groups(.data) && isTRUE(dots_len)) {
|
|
|
|
.data <- pm_group_by(.data, ...)
|
|
|
|
}
|
|
|
|
if (!pm_has_groups(.data) && !isTRUE(dots_len)) {
|
|
|
|
return(list(.data))
|
|
|
|
}
|
|
|
|
pm_context$setup(.data)
|
|
|
|
on.exit(pm_context$clean(), add = TRUE)
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_groups <- pm_group_vars(.data)
|
|
|
|
attr(pm_context$.data, "pm_groups") <- NULL
|
|
|
|
res <- pm_split_into_groups(pm_context$.data, pm_groups)
|
2023-02-08 13:48:06 +01:00
|
|
|
names(res) <- NULL
|
|
|
|
if (!isTRUE(.keep)) {
|
2023-02-08 16:51:41 +01:00
|
|
|
res <- lapply(res, function(x) x[, !colnames(x) %in% pm_groups])
|
2023-02-08 13:48:06 +01:00
|
|
|
}
|
|
|
|
any_empty <- unlist(lapply(res, function(x) !(nrow(x) == 0L)))
|
|
|
|
res[any_empty]
|
|
|
|
}
|
2020-09-18 16:05:53 +02:00
|
|
|
pm_group_keys <- function(.data) {
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_groups <- pm_group_vars(.data)
|
2020-09-18 16:05:53 +02:00
|
|
|
pm_context$setup(.data)
|
2023-02-08 16:51:41 +01:00
|
|
|
res <- pm_context$get_columns(pm_context$get_colnames() %in% pm_groups)
|
2020-09-18 16:05:53 +02:00
|
|
|
res <- res[!duplicated(res), , drop = FALSE]
|
2023-02-08 13:48:06 +01:00
|
|
|
if (nrow(res) == 0L) return(res)
|
2020-09-18 16:05:53 +02:00
|
|
|
class(res) <- "data.frame"
|
2023-02-08 16:51:41 +01:00
|
|
|
res <- res[do.call(order, lapply(pm_groups, function(x) res[, x])), , drop = FALSE]
|
2020-09-18 16:05:53 +02:00
|
|
|
rownames(res) <- NULL
|
|
|
|
res
|
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_split_into_groups <- function(.data, pm_groups, drop = FALSE, ...) {
|
2020-09-18 16:05:53 +02:00
|
|
|
class(.data) <- "data.frame"
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_group_factors <- lapply(pm_groups, function(x, .data) as.factor(.data[, x]), .data)
|
2023-02-08 13:48:06 +01:00
|
|
|
split(x = .data, f = pm_group_factors, drop = drop, ...)
|
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_groups_set <- function(x, pm_groups, drop = pm_group_by_drop_default(x)) {
|
|
|
|
attr(x, "pm_groups") <- if (is.null(pm_groups) || length(pm_groups) == 0L) {
|
2023-02-08 13:48:06 +01:00
|
|
|
NULL
|
|
|
|
} else {
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_calculate_groups(x, pm_groups, drop)
|
2023-02-08 13:48:06 +01:00
|
|
|
}
|
|
|
|
x
|
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_get_group_details <- function(x) {
|
|
|
|
pm_groups <- attr(x, "pm_groups", exact = TRUE)
|
|
|
|
if (is.null(pm_groups)) character(0) else pm_groups
|
2023-02-08 13:48:06 +01:00
|
|
|
}
|
|
|
|
pm_has_groups <- function(x) {
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_groups <- pm_group_vars(x)
|
|
|
|
if (length(pm_groups) == 0L) FALSE else TRUE
|
2023-02-08 13:48:06 +01:00
|
|
|
}
|
|
|
|
pm_apply_grouped_function <- function(fn, .data, drop = FALSE, ...) {
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_groups <- pm_group_vars(.data)
|
|
|
|
grouped <- pm_split_into_groups(.data, pm_groups, drop)
|
2023-02-08 13:48:06 +01:00
|
|
|
res <- do.call(rbind, unname(lapply(grouped, fn, ...)))
|
2023-02-08 16:51:41 +01:00
|
|
|
if (any(pm_groups %in% colnames(res))) {
|
2023-02-08 13:48:06 +01:00
|
|
|
class(res) <- c("grouped_df", class(res))
|
2023-02-08 16:51:41 +01:00
|
|
|
res <- pm_groups_set(res, pm_groups[pm_groups %in% colnames(res)])
|
2023-02-08 13:48:06 +01:00
|
|
|
}
|
|
|
|
res
|
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_calculate_groups <- function(data, pm_groups, drop = pm_group_by_drop_default(data)) {
|
2023-02-08 13:48:06 +01:00
|
|
|
data <- pm_ungroup(data)
|
2023-02-08 16:51:41 +01:00
|
|
|
unknown <- setdiff(pm_groups, colnames(data))
|
2023-02-08 13:48:06 +01:00
|
|
|
if (length(unknown) > 0L) {
|
2023-02-08 16:51:41 +01:00
|
|
|
stop(sprintf("`pm_groups` missing from `data`: %s.", paste0(pm_groups, collapse = ", ")))
|
2023-02-08 13:48:06 +01:00
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
unique_groups <- unique(data[, pm_groups, drop = FALSE])
|
2023-02-08 13:48:06 +01:00
|
|
|
is_factor <- do.call(c, lapply(unique_groups, function(x) is.factor(x)))
|
|
|
|
n_comb <- nrow(unique_groups)
|
|
|
|
rows <- rep(list(NA), n_comb)
|
2023-02-08 16:51:41 +01:00
|
|
|
data_groups <- interaction(data[, pm_groups, drop = TRUE])
|
2023-02-08 13:48:06 +01:00
|
|
|
for (i in seq_len(n_comb)) {
|
2023-02-08 16:51:41 +01:00
|
|
|
rows[[i]] <- which(data_groups %in% interaction(unique_groups[i, pm_groups]))
|
2023-02-08 13:48:06 +01:00
|
|
|
}
|
|
|
|
if (!isTRUE(drop) && any(is_factor)) {
|
|
|
|
na_lvls <- do.call(
|
|
|
|
expand.grid,
|
|
|
|
lapply(unique_groups, function(x) if (is.factor(x)) levels(x)[!(levels(x) %in% x)] else NA)
|
|
|
|
)
|
|
|
|
unique_groups <- rbind(unique_groups, na_lvls)
|
|
|
|
for (i in seq_len(nrow(na_lvls))) {
|
|
|
|
rows[[length(rows) + 1]] <- integer(0)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
unique_groups[[".rows"]] <- rows
|
2023-02-08 16:51:41 +01:00
|
|
|
unique_groups <- unique_groups[do.call(order, lapply(pm_groups, function(x) unique_groups[, x])), , drop = FALSE]
|
2023-02-08 13:48:06 +01:00
|
|
|
rownames(unique_groups) <- NULL
|
|
|
|
structure(unique_groups, .drop = drop)
|
|
|
|
}
|
|
|
|
pm_is.grouped_df <- function(x) {
|
|
|
|
inherits(x, "grouped_df")
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
|
|
|
pm_if_else <- function(condition, true, false, missing = NULL) {
|
|
|
|
if (!is.logical(condition)) stop("`condition` must be a logical vector.")
|
|
|
|
cls_true <- class(true)
|
|
|
|
cls_false <- class(false)
|
|
|
|
cls_missing <- class(missing)
|
|
|
|
if (!identical(cls_true, cls_false)) {
|
|
|
|
stop("The class of `true` <", class(true), "> is not the same as the class of `false` <", class(false), ">")
|
|
|
|
}
|
|
|
|
if (!is.null(missing) && !identical(cls_true, cls_missing)) {
|
|
|
|
stop("`missing` must be a ", cls_true, " vector, not a ", cls_missing, " vector.")
|
|
|
|
}
|
|
|
|
res <- ifelse(condition, true, false)
|
|
|
|
if (!is.null(missing)) res[is.na(res)] <- missing
|
|
|
|
attributes(res) <- attributes(true)
|
|
|
|
res
|
|
|
|
}
|
2020-09-19 11:54:01 +02:00
|
|
|
pm_anti_join <- function(x, y, by = NULL) {
|
|
|
|
pm_filter_join_worker(x, y, by, type = "anti")
|
|
|
|
}
|
|
|
|
pm_semi_join <- function(x, y, by = NULL) {
|
|
|
|
pm_filter_join_worker(x, y, by, type = "semi")
|
|
|
|
}
|
|
|
|
pm_filter_join_worker <- function(x, y, by = NULL, type = c("anti", "semi")) {
|
|
|
|
type <- match.arg(type, choices = c("anti", "semi"), several.ok = FALSE)
|
|
|
|
if (is.null(by)) {
|
|
|
|
by <- intersect(names(x), names(y))
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_join_message(by)
|
2020-09-19 11:54:01 +02:00
|
|
|
}
|
|
|
|
rows <- interaction(x[, by]) %in% interaction(y[, by])
|
|
|
|
if (type == "anti") rows <- !rows
|
|
|
|
res <- x[rows, , drop = FALSE]
|
|
|
|
rownames(res) <- NULL
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_reconstruct_attrs(res, x)
|
2020-09-19 11:54:01 +02:00
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_inner_join <- function(x, y, by = NULL, suffix = c(".x", ".y"), ..., na_matches = c("na", "never")) {
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_join_worker(x = x, y = y, by = by, suffix = suffix, sort = FALSE, ..., keep = FALSE, na_matches = na_matches)
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_left_join <- function(x, y, by = NULL, suffix = c(".x", ".y"), ..., keep = FALSE, na_matches = c("na", "never")) {
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_join_worker(x = x, y = y, by = by, suffix = suffix, all.x = TRUE, ..., keep = keep, na_matches = na_matches)
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_right_join <- function(x, y, by = NULL, suffix = c(".x", ".y"), ..., keep = FALSE, na_matches = c("na", "never")) {
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_join_worker(x = x, y = y, by = by, suffix = suffix, all.y = TRUE, ..., keep = keep, na_matches = na_matches)
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_full_join <- function(x, y, by = NULL, suffix = c(".x", ".y"), ..., keep = FALSE, na_matches = c("na", "never")) {
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_join_worker(x = x, y = y, by = by, suffix = suffix, all = TRUE, ..., keep = keep, na_matches = na_matches)
|
2023-02-08 13:48:06 +01:00
|
|
|
}
|
|
|
|
pm_join_worker <- function(x, y, by = NULL, suffix = c(".x", ".y"), keep = FALSE, na_matches = c("na", "never"), ...) {
|
|
|
|
na_matches <- match.arg(arg = na_matches, choices = c("na", "never"), several.ok = FALSE)
|
|
|
|
incomparables <- if (na_matches == "never") NA else NULL
|
2020-09-18 16:05:53 +02:00
|
|
|
x[, ".join_id"] <- seq_len(nrow(x))
|
2023-02-08 13:48:06 +01:00
|
|
|
merged <- if (is.null(by)) {
|
2020-09-18 16:05:53 +02:00
|
|
|
by <- intersect(names(x), names(y))
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_join_message(by)
|
2023-02-08 13:48:06 +01:00
|
|
|
merge(
|
|
|
|
x = x, y = y, by = by, suffixes = suffix, incomparables = incomparables, ...
|
|
|
|
)[, union(names(x), names(y)), drop = FALSE]
|
2020-09-18 16:05:53 +02:00
|
|
|
} else if (is.null(names(by))) {
|
2023-02-08 13:48:06 +01:00
|
|
|
merge(x = x, y = y, by = by, suffixes = suffix, incomparables = incomparables, ...)
|
2020-09-18 16:05:53 +02:00
|
|
|
} else {
|
2023-02-08 13:48:06 +01:00
|
|
|
merge(x = x, y = y, by.x = names(by), by.y = by, suffixes = suffix, incomparables = incomparables, ...)
|
|
|
|
}
|
|
|
|
merged <- merged[order(merged[, ".join_id"]), colnames(merged) != ".join_id", drop = FALSE]
|
|
|
|
if (isTRUE(keep)) {
|
|
|
|
keep_pos <- match(by, names(merged))
|
|
|
|
x_by <- paste0(by, suffix[1L])
|
|
|
|
colnames(merged)[keep_pos] <- x_by
|
|
|
|
merged[, paste0(by, suffix[2L])] <- merged[, x_by]
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
|
|
|
rownames(merged) <- NULL
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_reconstruct_attrs(merged, x)
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
|
|
|
pm_join_message <- function(by) {
|
|
|
|
if (length(by) > 1L) {
|
|
|
|
message("Joining, by = c(\"", paste0(by, collapse = "\", \""), "\")\n", sep = "")
|
|
|
|
} else {
|
|
|
|
message("Joining, by = \"", by, "\"\n", sep = "")
|
|
|
|
}
|
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_as_function <- function(x, env = parent.frame()) {
|
|
|
|
if (is.function(x)) return(x)
|
2023-02-08 16:51:41 +01:00
|
|
|
if (pm_is_formula(x)) {
|
2023-02-08 13:48:06 +01:00
|
|
|
if (length(x) > 2) stop("Can't convert a two-sided formula to a function")
|
|
|
|
env <- attr(x, ".Environment", exact = TRUE)
|
|
|
|
rhs <- as.list(x)[[2]]
|
|
|
|
return(as.function(list(... = substitute(), .x = quote(..1), .y = quote(..2), . = quote(..1), rhs), envir = env))
|
2022-08-28 10:31:50 +02:00
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
if (pm_is_string(x)) return(get(x, envir = env, mode = "function"))
|
2023-02-08 13:48:06 +01:00
|
|
|
stop("Can't convert an object of class ", class(x), " to a function.")
|
|
|
|
}
|
|
|
|
pm_is_formula <- function(x) {
|
|
|
|
inherits(x, "formula")
|
|
|
|
}
|
|
|
|
pm_is_string <- function(x) {
|
|
|
|
is.character(x) && length(x) == 1L
|
|
|
|
}
|
|
|
|
pm_is_wholenumber <- function(x) {
|
|
|
|
x %% 1L == 0L
|
|
|
|
}
|
|
|
|
pm_names_are_invalid <- function(x) {
|
|
|
|
x == "" | is.na(x)
|
|
|
|
}
|
|
|
|
pm_is_named <- function(x) {
|
|
|
|
nms <- names(x)
|
|
|
|
if (is.null(nms)) return(FALSE)
|
2023-02-08 16:51:41 +01:00
|
|
|
if (any(pm_names_are_invalid(nms))) return(FALSE)
|
2023-02-08 13:48:06 +01:00
|
|
|
TRUE
|
|
|
|
}
|
|
|
|
pm_have_name <- function(x) {
|
|
|
|
nms <- names(x)
|
2023-02-08 16:51:41 +01:00
|
|
|
if (is.null(nms)) rep(FALSE, length(x)) else !pm_names_are_invalid(nms)
|
2023-02-08 13:48:06 +01:00
|
|
|
}
|
|
|
|
pm_is_empty_list <- function(x) {
|
|
|
|
inherits(x, "list") && length(x) == 0L
|
|
|
|
}
|
|
|
|
pm_as_symbols <- function(x) {
|
|
|
|
lapply(x, as.symbol)
|
|
|
|
}
|
|
|
|
pm_is_df_or_vector <- function(x) {
|
|
|
|
res <- is.data.frame(x) || is.atomic(x)
|
|
|
|
if (!isTRUE(res)) stop("You must pass vector(s) and/or data.frame(s).")
|
|
|
|
TRUE
|
|
|
|
}
|
|
|
|
pm_lag <- function(x, n = 1L, default = NA) {
|
|
|
|
if (inherits(x, "ts")) stop("`x` must be a vector, not a `ts` object, do you want `stats::lag()`?")
|
|
|
|
if (length(n) != 1L || !is.numeric(n) || n < 0L) stop("`n` must be a nonnegative integer scalar")
|
|
|
|
if (n == 0L) return(x)
|
2020-09-18 16:05:53 +02:00
|
|
|
tryCatch(
|
|
|
|
storage.mode(default) <- typeof(x),
|
|
|
|
warning = function(w) {
|
|
|
|
stop("Cannot convert `default` <", typeof(default), "> to `x` <", typeof(x), ">")
|
|
|
|
}
|
|
|
|
)
|
|
|
|
xlen <- length(x)
|
2023-02-08 13:48:06 +01:00
|
|
|
n <- pmin(n, xlen)
|
|
|
|
res <- c(rep(default, n), x[seq_len(xlen - n)])
|
2020-09-18 16:05:53 +02:00
|
|
|
attributes(res) <- attributes(x)
|
|
|
|
res
|
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_lead <- function(x, n = 1L, default = NA) {
|
|
|
|
if (length(n) != 1L || !is.numeric(n) || n < 0L) stop("n must be a nonnegative integer scalar")
|
|
|
|
if (n == 0L) return(x)
|
2020-09-18 16:05:53 +02:00
|
|
|
tryCatch(
|
|
|
|
storage.mode(default) <- typeof(x),
|
|
|
|
warning = function(w) {
|
|
|
|
stop("Cannot convert `default` <", typeof(default), "> to `x` <", typeof(x), ">")
|
|
|
|
}
|
|
|
|
)
|
|
|
|
xlen <- length(x)
|
2023-02-08 13:48:06 +01:00
|
|
|
n <- pmin(n, xlen)
|
|
|
|
res <- c(x[-seq_len(n)], rep(default, n))
|
2020-09-18 16:05:53 +02:00
|
|
|
attributes(res) <- attributes(x)
|
|
|
|
res
|
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_lst <- function(...) {
|
|
|
|
fn_call <- match.call()
|
|
|
|
list_to_eval <- as.list(fn_call)[-1]
|
|
|
|
out <- vector(mode = "list", length = length(list_to_eval))
|
|
|
|
names(out) <- names(list_to_eval)
|
|
|
|
exprs <- lapply(substitute(list(...)), deparse)[-1]
|
|
|
|
for (element in seq_along(list_to_eval)) {
|
|
|
|
value <- list_to_eval[[element]]
|
|
|
|
if (is.language(value)) {
|
|
|
|
value <- eval(
|
|
|
|
value,
|
|
|
|
envir = if (length(out) == 0) {
|
|
|
|
list_to_eval
|
|
|
|
} else {
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_drop_dup_list(out[1:(element - 1)])
|
2023-02-08 13:48:06 +01:00
|
|
|
}
|
|
|
|
)
|
|
|
|
}
|
|
|
|
if (is.null(value)) {
|
|
|
|
out[element] <- list(NULL)
|
|
|
|
} else {
|
|
|
|
out[[element]] <- value
|
|
|
|
}
|
|
|
|
invalid_name <- is.null(names(out)[element]) ||
|
|
|
|
is.na(names(out)[element]) ||
|
|
|
|
names(out)[element] == ""
|
|
|
|
if (invalid_name) {
|
|
|
|
if (exprs[[element]] != "NULL" || (exprs[[element]] == "NULL" && is.null(out[[element]]))) {
|
|
|
|
names(out)[element] <- exprs[[element]]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
out
|
|
|
|
}
|
|
|
|
pm_drop_dup_list <- function(x) {
|
|
|
|
list_names <- names(x)
|
|
|
|
if (identical(list_names, unique(list_names))) return(x)
|
|
|
|
count <- table(list_names)
|
|
|
|
dupes <- names(count[count > 1])
|
|
|
|
uniques <- names(count[count == 1])
|
|
|
|
to_drop <- do.call(c, lapply(
|
|
|
|
dupes,
|
|
|
|
function(x) {
|
|
|
|
matches <- which(list_names == x)
|
|
|
|
matches[-length(matches)]
|
|
|
|
}
|
|
|
|
))
|
|
|
|
x[uniques] <- Filter(Negate(is.null), x[uniques])
|
|
|
|
return(x[-to_drop])
|
|
|
|
}
|
2020-09-18 16:05:53 +02:00
|
|
|
pm_mutate <- function(.data, ...) {
|
2023-02-08 13:48:06 +01:00
|
|
|
if ("grouped_df" %in% class(.data)) pm_mutate.grouped_df(.data, ...) else pm_mutate.data.frame(.data, ...)
|
|
|
|
}
|
|
|
|
pm_mutate.data.frame <- function(
|
|
|
|
.data,
|
|
|
|
...,
|
|
|
|
.keep = c("all", "used", "unused", "none"),
|
|
|
|
.before = NULL,
|
|
|
|
.after = NULL
|
|
|
|
) {
|
|
|
|
keep <- match.arg(arg = .keep, choices = c("all", "used", "unused", "none"), several.ok = FALSE)
|
|
|
|
res <- pm_mutate_df(.data = .data, ...)
|
|
|
|
data <- res$data
|
|
|
|
new_cols <- res$new_cols
|
|
|
|
.before <- substitute(.before)
|
|
|
|
.after <- substitute(.after)
|
|
|
|
if (!is.null(.before) || !is.null(.after)) {
|
|
|
|
new <- setdiff(new_cols, names(.data))
|
|
|
|
data <- do.call(pm_relocate, c(list(.data = data), new, .before = .before, .after = .after))
|
|
|
|
}
|
|
|
|
if (keep == "all") {
|
|
|
|
data
|
|
|
|
} else if (keep == "unused") {
|
|
|
|
unused <- setdiff(colnames(.data), res$used_cols)
|
|
|
|
keep <- intersect(colnames(data), c(pm_group_vars(.data), unused, new_cols))
|
|
|
|
select(.data = data, keep)
|
|
|
|
} else if (keep == "used") {
|
|
|
|
keep <- intersect(colnames(data), c(pm_group_vars(.data), res$used_cols, new_cols))
|
|
|
|
select(.data = data, keep)
|
|
|
|
} else if (keep == "none") {
|
|
|
|
keep <- c(setdiff(pm_group_vars(.data), new_cols), intersect(new_cols, colnames(data)))
|
|
|
|
select(.data = data, keep)
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_mutate.grouped_df <- function(.data, ...) {
|
|
|
|
pm_context$pm_group_env <- parent.frame(n = 1)
|
|
|
|
on.exit(rm(list = c("pm_group_env"), envir = pm_context), add = TRUE)
|
|
|
|
rows <- rownames(.data)
|
|
|
|
res <- pm_apply_grouped_function("pm_mutate", .data, drop = TRUE, ...)
|
|
|
|
res[rows, , drop = FALSE]
|
|
|
|
}
|
|
|
|
pm_mutate_df <- function(.data, ...) {
|
2020-09-18 16:05:53 +02:00
|
|
|
conditions <- pm_dotdotdot(..., .impute_names = TRUE)
|
2023-02-08 13:48:06 +01:00
|
|
|
cond_nms <- names(pm_dotdotdot(..., .impute_names = FALSE))
|
|
|
|
if (length(conditions) == 0L) {
|
|
|
|
return(list(
|
|
|
|
data = .data,
|
|
|
|
used_cols = NULL,
|
|
|
|
new_cols = NULL
|
|
|
|
))
|
|
|
|
}
|
|
|
|
used <- unname(do.call(c, lapply(conditions, pm_find_used)))
|
|
|
|
used <- used[used %in% colnames(.data)]
|
2020-09-18 16:05:53 +02:00
|
|
|
pm_context$setup(.data)
|
|
|
|
on.exit(pm_context$clean(), add = TRUE)
|
|
|
|
for (i in seq_along(conditions)) {
|
2023-02-08 13:48:06 +01:00
|
|
|
not_named <- (is.null(cond_nms) || cond_nms[i] == "")
|
|
|
|
res <- eval(
|
|
|
|
conditions[[i]],
|
|
|
|
envir = pm_context$as_env(),
|
|
|
|
enclos = if (!is.null(pm_context$pm_group_env)) pm_context$pm_group_env else parent.frame(n = 2)
|
|
|
|
)
|
|
|
|
res_nms <- names(res)
|
|
|
|
if (is.data.frame(res)) {
|
|
|
|
if (not_named) {
|
|
|
|
pm_context$.data[, res_nms] <- res
|
|
|
|
} else {
|
|
|
|
pm_context$.data[[cond_nms[i]]] <- res
|
|
|
|
}
|
|
|
|
} else if (is.atomic(res)) {
|
|
|
|
cond_nms[i] <- names(conditions)[[i]]
|
|
|
|
pm_context$.data[[cond_nms[i]]] <- res
|
|
|
|
} else {
|
|
|
|
if (is.null(res_nms)) names(res) <- names(conditions)[[i]]
|
|
|
|
pm_context$.data[[names(res)]] <- res
|
|
|
|
}
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
list(
|
|
|
|
data = pm_context$.data,
|
|
|
|
used_cols = used,
|
|
|
|
new_cols = cond_nms
|
|
|
|
)
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_find_used <- function(expr) {
|
|
|
|
if (is.symbol(expr)) {
|
|
|
|
as.character(expr)
|
|
|
|
} else {
|
|
|
|
unique(unlist(lapply(expr[-1], pm_find_used)))
|
|
|
|
}
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
|
|
|
pm_n_distinct <- function(..., na.rm = FALSE) {
|
2023-02-08 13:48:06 +01:00
|
|
|
res <- do.call(cbind, list(...))
|
|
|
|
if (isTRUE(na.rm)) res <- res[!is.na(res), , drop = FALSE]
|
|
|
|
nrow(unique(res))
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_nth <- function(x, n, order_by = NULL, default = pm_default_missing(x)) {
|
|
|
|
if (length(n) != 1 || !is.numeric(n)) stop("`n` must be a single integer.")
|
|
|
|
n <- trunc(n)
|
|
|
|
if (n == 0 || n > length(x) || n < -length(x)) return(default)
|
|
|
|
if (n < 0) n <- length(x) + n + 1
|
|
|
|
if (is.null(order_by)) x[[n]] else x[[order(order_by)[[n]]]]
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_first <- function(x, order_by = NULL, default = pm_default_missing(x)) {
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_nth(x, 1L, order_by = order_by, default = default)
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_last <- function(x, order_by = NULL, default = pm_default_missing(x)) {
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_nth(x, -1L, order_by = order_by, default = default)
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_default_missing <- function(x) {
|
|
|
|
pm_default_missing.data.frame(x)
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_default_missing.data.frame <- function(x) {
|
|
|
|
if (!is.object(x) && is.list(x)) NULL else x[NA_real_]
|
|
|
|
}
|
|
|
|
pm_default_missing.data.frame <- function(x) {
|
|
|
|
rep(NA, nrow(x))
|
|
|
|
}
|
|
|
|
`%pm>%` <- function(lhs, rhs) {
|
|
|
|
rhs_call <- pm_insert_dot(substitute(rhs))
|
|
|
|
eval(rhs_call, envir = list(`.` = lhs), enclos = parent.frame())
|
|
|
|
}
|
|
|
|
pm_insert_dot <- function(expr) {
|
|
|
|
if (is.symbol(expr) || expr[[1]] == quote(`(`)) {
|
|
|
|
expr <- as.call(c(expr, quote(`.`)))
|
|
|
|
} else if (length(expr) == 1) {
|
|
|
|
expr <- as.call(c(expr[[1]], quote(`.`)))
|
|
|
|
} else if (
|
|
|
|
expr[[1]] != quote(`{`) &&
|
|
|
|
!any(vapply(expr[-1], identical, quote(`.`), FUN.VALUE = logical(1))) &&
|
|
|
|
!any(vapply(expr[-1], identical, quote(`!!!.`), FUN.VALUE = logical(1)))
|
|
|
|
) {
|
|
|
|
expr <- as.call(c(expr[[1]], quote(`.`), as.list(expr[-1])))
|
|
|
|
}
|
|
|
|
expr
|
|
|
|
}
|
|
|
|
pm_pivot_longer <- function(
|
|
|
|
data,
|
|
|
|
cols,
|
|
|
|
names_to = "name",
|
|
|
|
names_prefix = NULL,
|
|
|
|
names_sep = NULL,
|
|
|
|
names_pattern = NULL,
|
|
|
|
values_to = "value",
|
|
|
|
values_drop_na = FALSE,
|
|
|
|
...
|
|
|
|
) {
|
|
|
|
if (missing(cols)) {
|
|
|
|
stop("`cols` must select at least one column.")
|
|
|
|
}
|
|
|
|
cols <- names(pm_eval_select_pos(data, substitute(cols)))
|
|
|
|
if (any(names_to %in% setdiff(names(data), cols))) {
|
|
|
|
stop(
|
|
|
|
paste0(
|
|
|
|
"Some values of the columns specified in 'names_to' are already present
|
|
|
|
as column names. Either use another value in `names_to` or pm_rename the
|
|
|
|
following columns: ",
|
|
|
|
paste(names_to[which(names_to %in% setdiff(names(data), cols))], sep = ", ")
|
|
|
|
),
|
|
|
|
call. = FALSE)
|
|
|
|
}
|
|
|
|
if (length(cols) == 0L) {
|
|
|
|
stop("No columns found for reshaping data.", call. = FALSE)
|
|
|
|
}
|
|
|
|
data[["_Row"]] <- as.numeric(rownames(data))
|
|
|
|
names_to_2 <- paste(names_to, collapse = "_")
|
|
|
|
long <- stats::reshape(
|
|
|
|
as.data.frame(data, stringsAsFactors = FALSE),
|
|
|
|
varying = cols,
|
|
|
|
idvar = "_Row",
|
|
|
|
v.names = values_to,
|
|
|
|
timevar = names_to_2,
|
|
|
|
direction = "long"
|
|
|
|
)
|
|
|
|
long <- long[do.call(order, long[, c("_Row", names_to_2)]), ]
|
|
|
|
long[["_Row"]] <- NULL
|
|
|
|
long[[names_to_2]] <- cols[long[[names_to_2]]]
|
|
|
|
if (length(names_to) > 1) {
|
|
|
|
if (is.null(names_pattern)) {
|
|
|
|
for (i in seq_along(names_to)) {
|
|
|
|
new_vals <- unlist(lapply(
|
|
|
|
strsplit(unique(long[[names_to_2]]), names_sep, fixed = TRUE),
|
|
|
|
function(x) x[i]
|
|
|
|
))
|
|
|
|
long[[names_to[i]]] <- new_vals
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
tmp <- regmatches(
|
|
|
|
unique(long[[names_to_2]]),
|
|
|
|
regexec(names_pattern, unique(long[[names_to_2]]))
|
|
|
|
)
|
|
|
|
tmp <- as.data.frame(do.call(rbind, tmp), stringsAsFactors = FALSE)
|
|
|
|
names(tmp) <- c(names_to_2, names_to)
|
|
|
|
long <- cbind(long, tmp[match(long[[names_to_2]], tmp[[names_to_2]]), -1])
|
|
|
|
}
|
|
|
|
long[[names_to_2]] <- NULL
|
|
|
|
}
|
|
|
|
long <- pm_relocate(.data = long, "value", .after = -1)
|
|
|
|
if (!is.null(names_prefix)) {
|
|
|
|
if (length(names_to) > 1) {
|
|
|
|
stop("`names_prefix` only works when `names_to` is of length 1.", call. = FALSE)
|
|
|
|
}
|
|
|
|
long[[names_to]] <- gsub(paste0("^", names_prefix), "", long[[names_to]])
|
|
|
|
}
|
|
|
|
if (values_drop_na) {
|
|
|
|
long <- long[!is.na(long[, values_to]), ]
|
|
|
|
}
|
|
|
|
rownames(long) <- NULL
|
|
|
|
attributes(long)$reshapeLong <- NULL
|
|
|
|
long
|
|
|
|
}
|
|
|
|
pm_pivot_wider <- function(
|
|
|
|
data,
|
|
|
|
id_cols = NULL,
|
|
|
|
values_from = "Value",
|
|
|
|
names_from = "Name",
|
|
|
|
names_sep = "_",
|
|
|
|
names_prefix = "",
|
|
|
|
names_glue = NULL,
|
|
|
|
values_fill = NULL,
|
|
|
|
...
|
|
|
|
) {
|
|
|
|
old_names <- names(data)
|
|
|
|
names_from <- names(pm_eval_select_pos(data, substitute(names_from)))
|
|
|
|
values_from <- names(pm_eval_select_pos(data, substitute(values_from)))
|
|
|
|
variable_attr <- lapply(data, attributes)
|
|
|
|
if (is.null(id_cols)) {
|
|
|
|
row_index <- do.call(
|
|
|
|
paste,
|
|
|
|
c(data[, !names(data) %in% c(values_from, names_from), drop = FALSE], sep = "_")
|
|
|
|
)
|
|
|
|
if (length(row_index) == 0) row_index <- rep("", nrow(data))
|
|
|
|
data[["_Rows"]] <- row_index
|
|
|
|
id_cols <- "_Rows"
|
|
|
|
}
|
|
|
|
current_colnames <- colnames(data)
|
|
|
|
current_colnames <- current_colnames[current_colnames != "_Rows"]
|
|
|
|
if (is.null(names_glue)) {
|
|
|
|
future_colnames <- unique(do.call(paste, c(data[, names_from, drop = FALSE], sep = names_sep)))
|
|
|
|
} else {
|
|
|
|
vars <- regmatches(names_glue, gregexpr("\\{\\K[^{}]+(?=\\})", names_glue, perl = TRUE))[[1]]
|
|
|
|
tmp_data <- unique(data[, vars])
|
|
|
|
future_colnames <- unique(apply(tmp_data, 1, function(x) {
|
|
|
|
tmp_vars <- list()
|
|
|
|
for (i in seq_along(vars)) {
|
|
|
|
tmp_vars[[i]] <- x[vars[i]]
|
|
|
|
}
|
|
|
|
tmp_colname <- gsub("\\{\\K[^{}]+(?=\\})", "", names_glue, perl = TRUE)
|
|
|
|
tmp_colname <- gsub("\\{\\}", "%s", tmp_colname)
|
|
|
|
do.call(sprintf, c(fmt = tmp_colname, tmp_vars))
|
|
|
|
}))
|
|
|
|
}
|
|
|
|
if (any(future_colnames %in% current_colnames)) {
|
|
|
|
stop(
|
|
|
|
paste0(
|
|
|
|
"Some values of the columns specified in 'names_from' are already present
|
|
|
|
as column names. Either use `name_prefix` or pm_rename the following columns: ",
|
|
|
|
paste(current_colnames[which(current_colnames %in% future_colnames)], sep = ", ")
|
|
|
|
),
|
|
|
|
call. = FALSE
|
|
|
|
)
|
|
|
|
}
|
|
|
|
data$new_time <- do.call(paste, c(data[, names_from, drop = FALSE], sep = "_"))
|
|
|
|
data[, names_from] <- NULL
|
|
|
|
wide <- stats::reshape(
|
|
|
|
as.data.frame(data, stringsAsFactors = FALSE),
|
|
|
|
v.names = values_from,
|
|
|
|
idvar = id_cols,
|
|
|
|
timevar = "new_time",
|
|
|
|
sep = names_sep,
|
|
|
|
direction = "wide"
|
|
|
|
)
|
|
|
|
if ("_Rows" %in% names(wide)) wide[["_Rows"]] <- NULL
|
|
|
|
rownames(wide) <- NULL
|
|
|
|
if (length(values_from) == 1) {
|
|
|
|
to_rename <- which(startsWith(names(wide), paste0(values_from, names_sep)))
|
|
|
|
names(wide)[to_rename] <- future_colnames
|
|
|
|
}
|
|
|
|
if (length(values_from) > 1) {
|
|
|
|
for (i in values_from) {
|
|
|
|
tmp1 <- wide[, which(!startsWith(names(wide), i))]
|
|
|
|
tmp2 <- wide[, which(startsWith(names(wide), i))]
|
|
|
|
wide <- cbind(tmp1, tmp2)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
new_cols <- setdiff(names(wide), old_names)
|
|
|
|
names(wide)[which(names(wide) %in% new_cols)] <- paste0(names_prefix, new_cols)
|
|
|
|
if (!is.null(values_fill)) {
|
|
|
|
if (length(values_fill) == 1) {
|
|
|
|
if (is.numeric(wide[[new_cols[1]]])) {
|
|
|
|
if (!is.numeric(values_fill)) {
|
|
|
|
stop(paste0("`values_fill` must be of type numeric."), call. = FALSE)
|
|
|
|
} else {
|
|
|
|
for (i in new_cols) {
|
2023-02-08 16:51:41 +01:00
|
|
|
wide[[i]] <- pm_replace_na(wide[[i]], replace = values_fill)
|
2023-02-08 13:48:06 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (is.character(wide[[new_cols[1]]])) {
|
|
|
|
if (!is.character(values_fill)) {
|
|
|
|
stop(paste0("`values_fill` must be of type character."), call. = FALSE)
|
|
|
|
} else {
|
|
|
|
for (i in new_cols) {
|
2023-02-08 16:51:41 +01:00
|
|
|
wide[[i]] <- pm_replace_na(wide[[i]], replace = values_fill)
|
2023-02-08 13:48:06 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (is.factor(wide[[new_cols[1]]])) {
|
|
|
|
if (!is.factor(values_fill)) {
|
|
|
|
stop(paste0("`values_fill` must be of type factor."), call. = FALSE)
|
|
|
|
} else {
|
|
|
|
for (i in new_cols) {
|
2023-02-08 16:51:41 +01:00
|
|
|
wide[[i]] <- pm_replace_na(wide[[i]], replace = values_fill)
|
2023-02-08 13:48:06 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
stop("`values_fill` must be of length 1.", call. = FALSE)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
attributes(wide)$reshapeWide <- NULL
|
|
|
|
for (i in colnames(wide)) {
|
|
|
|
attributes(wide[[i]]) <- variable_attr[[i]]
|
|
|
|
}
|
|
|
|
wide
|
|
|
|
}
|
|
|
|
pm_pull <- function(.data, var = -1) {
|
|
|
|
var_list <- as.list(seq_along(.data))
|
|
|
|
names(var_list) <- names(.data)
|
|
|
|
.var <- eval(substitute(var), var_list)
|
|
|
|
if (.var < 0L) .var <- length(var_list) + .var + 1L
|
|
|
|
.data[[.var]]
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
|
|
|
pm_relocate <- function(.data, ..., .before = NULL, .after = NULL) {
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_relocate.data.frame(.data, ..., .before = NULL, .after = NULL)
|
|
|
|
}
|
|
|
|
pm_relocate.data.frame <- function(.data, ..., .before = NULL, .after = NULL) {
|
2020-09-18 16:05:53 +02:00
|
|
|
data_names <- colnames(.data)
|
|
|
|
col_pos <- pm_select_positions(.data, ...)
|
2023-02-08 13:48:06 +01:00
|
|
|
if (!missing(.before) && !is.null(.before)) .before <- colnames(.data)[pm_eval_select_pos(.data, substitute(.before))]
|
|
|
|
if (!missing(.after) && !is.null(.after)) .after <- colnames(.data)[pm_eval_select_pos(.data, substitute(.after))]
|
2020-09-18 16:05:53 +02:00
|
|
|
has_before <- !is.null(.before)
|
|
|
|
has_after <- !is.null(.after)
|
|
|
|
if (has_before && has_after) {
|
|
|
|
stop("You must supply only one of `.before` and `.after`")
|
|
|
|
} else if (has_before) {
|
2023-02-08 13:48:06 +01:00
|
|
|
where <- min(match(.before, data_names))
|
|
|
|
col_pos <- c(setdiff(col_pos, where), where)
|
2020-09-18 16:05:53 +02:00
|
|
|
} else if (has_after) {
|
2023-02-08 13:48:06 +01:00
|
|
|
where <- max(match(.after, data_names))
|
|
|
|
col_pos <- c(where, setdiff(col_pos, where))
|
2020-09-18 16:05:53 +02:00
|
|
|
} else {
|
2023-02-08 13:48:06 +01:00
|
|
|
where <- 1L
|
|
|
|
col_pos <- union(col_pos, where)
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
lhs <- setdiff(seq(1L, where - 1L), col_pos)
|
|
|
|
rhs <- setdiff(seq(where + 1L, ncol(.data)), col_pos)
|
2020-09-18 16:05:53 +02:00
|
|
|
col_pos <- unique(c(lhs, col_pos, rhs))
|
|
|
|
col_pos <- col_pos[col_pos <= length(data_names)]
|
|
|
|
res <- .data[col_pos]
|
2023-02-08 13:48:06 +01:00
|
|
|
if (pm_has_groups(.data)) res <- pm_groups_set(res, pm_group_vars(.data))
|
2020-09-18 16:05:53 +02:00
|
|
|
res
|
|
|
|
}
|
|
|
|
pm_rename <- function(.data, ...) {
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_rename.data.frame(.data, ...)
|
|
|
|
}
|
|
|
|
pm_rename.data.frame <- function(.data, ...) {
|
|
|
|
new_names <- names(pm_dotdotdot(...))
|
2020-09-18 16:05:53 +02:00
|
|
|
if (length(new_names) == 0L) {
|
|
|
|
warning("You didn't give any new names")
|
|
|
|
return(.data)
|
|
|
|
}
|
|
|
|
col_pos <- pm_select_positions(.data, ...)
|
|
|
|
old_names <- colnames(.data)[col_pos]
|
|
|
|
new_names_zero <- nchar(new_names) == 0L
|
|
|
|
if (any(new_names_zero)) {
|
|
|
|
warning("You didn't provide new names for: ", paste0("`", old_names[new_names_zero], collapse = ", "), "`")
|
|
|
|
new_names[new_names_zero] <- old_names[new_names_zero]
|
|
|
|
}
|
|
|
|
colnames(.data)[col_pos] <- new_names
|
|
|
|
.data
|
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_rename_with <- function(.data, .fn, .cols = everything(), ...) {
|
|
|
|
pm_rename_with.data.frame(.data, .fn, .cols = everything(), ...)
|
|
|
|
}
|
|
|
|
pm_rename_with.data.frame <- function(.data, .fn, .cols = everything(), ...) {
|
2020-09-18 16:05:53 +02:00
|
|
|
if (!is.function(.fn)) stop("`", .fn, "` is not a valid function")
|
2023-02-08 16:51:41 +01:00
|
|
|
grouped <- pm_is.grouped_df(.data)
|
2020-09-18 16:05:53 +02:00
|
|
|
if (grouped) grp_pos <- which(colnames(.data) %in% pm_group_vars(.data))
|
2023-02-08 13:48:06 +01:00
|
|
|
col_pos <- pm_eval_select_pos(.data = .data, .pm_group_pos = TRUE, .cols = substitute(.cols))
|
2020-09-18 16:05:53 +02:00
|
|
|
cols <- colnames(.data)[col_pos]
|
|
|
|
new_cols <- .fn(cols, ...)
|
|
|
|
if (any(duplicated(new_cols))) {
|
|
|
|
stop("New names must be unique however `", deparse(substitute(.fn)), "` returns duplicate column names")
|
|
|
|
}
|
|
|
|
colnames(.data)[col_pos] <- new_cols
|
2023-02-08 13:48:06 +01:00
|
|
|
if (grouped) .data <- pm_groups_set(.data, colnames(.data)[grp_pos])
|
2020-09-18 16:05:53 +02:00
|
|
|
.data
|
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_starts_with <- function(match, ignore.case = TRUE, vars = pm_peek_vars()) {
|
2020-09-18 16:05:53 +02:00
|
|
|
grep(pattern = paste0("^", paste0(match, collapse = "|^")), x = vars, ignore.case = ignore.case)
|
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_ends_with <- function(match, ignore.case = TRUE, vars = pm_peek_vars()) {
|
2020-09-18 16:05:53 +02:00
|
|
|
grep(pattern = paste0(paste0(match, collapse = "$|"), "$"), x = vars, ignore.case = ignore.case)
|
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_contains <- function(match, ignore.case = TRUE, vars = pm_peek_vars()) {
|
2023-02-08 13:48:06 +01:00
|
|
|
matches <- lapply(
|
2020-09-18 16:05:53 +02:00
|
|
|
match,
|
|
|
|
function(x) {
|
|
|
|
if (isTRUE(ignore.case)) {
|
|
|
|
match_u <- toupper(x)
|
|
|
|
match_l <- tolower(x)
|
|
|
|
pos_u <- grep(pattern = match_u, x = toupper(vars), fixed = TRUE)
|
|
|
|
pos_l <- grep(pattern = match_l, x = tolower(vars), fixed = TRUE)
|
|
|
|
unique(c(pos_l, pos_u))
|
|
|
|
} else {
|
|
|
|
grep(pattern = x, x = vars, fixed = TRUE)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
)
|
2023-02-08 13:48:06 +01:00
|
|
|
unique(unlist(matches))
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_matches <- function(match, ignore.case = TRUE, perl = FALSE, vars = pm_peek_vars()) {
|
2020-09-18 16:05:53 +02:00
|
|
|
grep(pattern = match, x = vars, ignore.case = ignore.case, perl = perl)
|
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_num_range <- function(prefix, range, width = NULL, vars = pm_peek_vars()) {
|
2020-09-18 16:05:53 +02:00
|
|
|
if (!is.null(width)) {
|
|
|
|
range <- sprintf(paste0("%0", width, "d"), range)
|
|
|
|
}
|
|
|
|
find <- paste0(prefix, range)
|
|
|
|
if (any(duplicated(vars))) {
|
|
|
|
stop("Column names must be unique")
|
|
|
|
} else {
|
|
|
|
x <- match(find, vars)
|
|
|
|
x[!is.na(x)]
|
|
|
|
}
|
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_all_of <- function(x, vars = pm_peek_vars()) {
|
2020-09-18 16:05:53 +02:00
|
|
|
x_ <- !x %in% vars
|
|
|
|
if (any(x_)) {
|
|
|
|
which_x_ <- which(x_)
|
|
|
|
if (length(which_x_) == 1L) {
|
|
|
|
stop("The column ", x[which_x_], " does not exist.")
|
|
|
|
} else {
|
|
|
|
stop("The columns ", paste(x[which_x_], collapse = ", "), " do not exist.")
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
which(vars %in% x)
|
|
|
|
}
|
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_any_of <- function(x, vars = pm_peek_vars()) {
|
2020-09-18 16:05:53 +02:00
|
|
|
which(vars %in% x)
|
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_everything <- function(vars = pm_peek_vars()) {
|
2020-09-18 16:05:53 +02:00
|
|
|
seq_along(vars)
|
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_last_col <- function(offset = 0L, vars = pm_peek_vars()) {
|
|
|
|
if (!pm_is_wholenumber(offset)) stop("`offset` must be an integer")
|
2023-02-08 13:48:06 +01:00
|
|
|
n <- length(vars)
|
|
|
|
if (offset && n <= offset) {
|
2020-09-18 16:05:53 +02:00
|
|
|
stop("`offset` must be smaller than the number of `vars`")
|
2023-02-08 13:48:06 +01:00
|
|
|
} else if (n == 0) {
|
|
|
|
stop("Can't select last column when `vars` is empty")
|
2020-09-18 16:05:53 +02:00
|
|
|
} else {
|
2023-02-08 13:48:06 +01:00
|
|
|
n - offset
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
pm_peek_vars <- function() {
|
|
|
|
pm_select_env$get_colnames()
|
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_select_positions <- function(.data, ..., .pm_group_pos = FALSE) {
|
2020-09-18 16:05:53 +02:00
|
|
|
cols <- pm_dotdotdot(...)
|
2023-02-08 13:48:06 +01:00
|
|
|
cols <- cols[!vapply(cols, is.null, FALSE)]
|
|
|
|
if (length(cols) == 0L) return(integer(0))
|
2020-09-18 16:05:53 +02:00
|
|
|
pm_select_env$setup(.data = .data, calling_frame = parent.frame(2L))
|
|
|
|
on.exit(pm_select_env$clean(), add = TRUE)
|
|
|
|
data_names <- pm_select_env$get_colnames()
|
|
|
|
pos <- unlist(lapply(cols, pm_eval_expr))
|
2023-02-08 13:48:06 +01:00
|
|
|
if (length(pos) > 0) pos <- if (pos[1] >= 0) pos[pos >= 0] else pos[pos < 0]
|
2020-09-18 16:05:53 +02:00
|
|
|
col_len <- pm_select_env$get_ncol()
|
|
|
|
if (any(pos > col_len)) {
|
|
|
|
oor <- pos[which(pos > col_len)]
|
|
|
|
oor_len <- length(oor)
|
|
|
|
stop(
|
2023-02-08 16:51:41 +01:00
|
|
|
"Location", if (oor_len > 1) "s " else " ", pm_collapse_to_sentence(oor),
|
2020-09-18 16:05:53 +02:00
|
|
|
if (oor_len > 1) " don't " else " doesn't ", "exist. There are only ", col_len, " columns."
|
|
|
|
)
|
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
if (isTRUE(.pm_group_pos)) {
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_groups <- pm_group_vars(.data)
|
|
|
|
missing_groups <- !(pm_groups %in% cols)
|
2020-09-18 16:05:53 +02:00
|
|
|
if (any(missing_groups)) {
|
2023-02-08 16:51:41 +01:00
|
|
|
sel_missing <- pm_groups[missing_groups]
|
2020-09-18 16:05:53 +02:00
|
|
|
readd <- match(sel_missing, data_names)
|
2023-02-08 13:48:06 +01:00
|
|
|
readd <- readd[!(readd %in% pos)]
|
|
|
|
if (length(readd) > 0L) {
|
|
|
|
message("Adding missing grouping variables: `", paste(sel_missing, collapse = "`, `"), "`")
|
|
|
|
if (length(names(cols)) > 0L) names(readd) <- data_names[readd]
|
|
|
|
pos <- c(readd, pos)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (length(data_names[pos]) != 0L) {
|
|
|
|
nm_pos <- names(pos)
|
|
|
|
if (any(nm_pos == "")) {
|
|
|
|
names(pos)[which(nm_pos == "")] <- data_names[pos[which(nm_pos == "")]]
|
|
|
|
}
|
|
|
|
if (is.null(nm_pos)) {
|
|
|
|
names(pos) <- data_names[abs(pos)]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
uniques <- pos[!duplicated(pos)]
|
|
|
|
res_nms <- data_names[uniques]
|
|
|
|
res <- match(res_nms, data_names)
|
|
|
|
if (length(res) != 0L) {
|
|
|
|
res <- if (length(setdiff(names(uniques), data_names)) > 0L) {
|
|
|
|
if (all(uniques > 0L)) structure(res, .Names = names(uniques)) else structure(res, .Names = res_nms)
|
|
|
|
} else {
|
|
|
|
structure(res, .Names = res_nms)
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
res
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
|
|
|
pm_eval_expr <- function(x) {
|
|
|
|
type <- typeof(x)
|
2023-02-08 13:48:06 +01:00
|
|
|
switch(
|
|
|
|
type,
|
2020-09-18 16:05:53 +02:00
|
|
|
"integer" = x,
|
|
|
|
"double" = as.integer(x),
|
|
|
|
"character" = pm_select_char(x),
|
|
|
|
"symbol" = pm_select_symbol(x),
|
|
|
|
"language" = pm_eval_call(x),
|
|
|
|
stop("Expressions of type <", typeof(x), "> cannot be evaluated for use when subsetting.")
|
|
|
|
)
|
|
|
|
}
|
|
|
|
pm_select_char <- function(expr) {
|
|
|
|
pos <- match(expr, pm_select_env$get_colnames())
|
2023-02-08 13:48:06 +01:00
|
|
|
if (any(is.na(pos))) stop("The following columns do not exist:\n ", paste(expr, collapse = "\n "))
|
2020-09-18 16:05:53 +02:00
|
|
|
pos
|
|
|
|
}
|
|
|
|
pm_select_symbol <- function(expr) {
|
|
|
|
expr_name <- as.character(expr)
|
2023-02-08 13:48:06 +01:00
|
|
|
if (grepl("^is\\.", expr_name) && is.function(expr)) {
|
2020-09-18 16:05:53 +02:00
|
|
|
stop(
|
2023-02-08 13:48:06 +01:00
|
|
|
"Predicate functions must be wrapped in `where()`.\n\n",
|
|
|
|
sprintf(" data %%pm>%% select(where(%s))", expr_name)
|
2020-09-18 16:05:53 +02:00
|
|
|
)
|
|
|
|
}
|
|
|
|
res <- try(pm_select_char(as.character(expr)), silent = TRUE)
|
|
|
|
if (inherits(res, "try-error")) {
|
|
|
|
res <- tryCatch(
|
|
|
|
unlist(lapply(eval(expr, envir = pm_select_env$calling_frame), pm_eval_expr)),
|
|
|
|
error = function(e) stop("Column ", expr, " does not exist.")
|
|
|
|
)
|
|
|
|
}
|
|
|
|
res
|
|
|
|
}
|
|
|
|
pm_eval_call <- function(x) {
|
|
|
|
type <- as.character(x[[1]])
|
2023-02-08 13:48:06 +01:00
|
|
|
if (length(type) > 1L) {
|
|
|
|
type <- "pm_context"
|
|
|
|
}
|
|
|
|
switch(
|
|
|
|
type,
|
2020-09-18 16:05:53 +02:00
|
|
|
`:` = pm_select_seq(x),
|
|
|
|
`!` = pm_select_negate(x),
|
|
|
|
`-` = pm_select_minus(x),
|
|
|
|
`c` = pm_select_c(x),
|
|
|
|
`(` = pm_select_bracket(x),
|
2023-02-08 13:48:06 +01:00
|
|
|
`&` = pm_select_and(x),
|
|
|
|
pm_select_context(x)
|
2020-09-18 16:05:53 +02:00
|
|
|
)
|
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_select_and <- function(expr) {
|
|
|
|
exprs <- as.list(expr)[-1]
|
|
|
|
res <- do.call(c, lapply(exprs, pm_eval_expr))
|
|
|
|
if (all(res > 0) || all(res < 0)) return(unique(res))
|
|
|
|
res <- res[!(duplicated(abs(res)) | duplicated(abs(res), fromLast = TRUE))]
|
|
|
|
res[res > 0]
|
|
|
|
}
|
2020-09-18 16:05:53 +02:00
|
|
|
pm_select_seq <- function(expr) {
|
|
|
|
x <- pm_eval_expr(expr[[2]])
|
|
|
|
y <- pm_eval_expr(expr[[3]])
|
|
|
|
x:y
|
|
|
|
}
|
|
|
|
pm_select_negate <- function(expr) {
|
2023-02-08 16:51:41 +01:00
|
|
|
x <- if (pm_is_negated_colon(expr)) {
|
2020-09-18 16:05:53 +02:00
|
|
|
expr <- call(":", expr[[2]][[2]], expr[[2]][[3]][[2]])
|
|
|
|
pm_eval_expr(expr)
|
|
|
|
} else {
|
|
|
|
pm_eval_expr(expr[[2]])
|
|
|
|
}
|
|
|
|
x * -1L
|
|
|
|
}
|
|
|
|
pm_is_negated_colon <- function(expr) {
|
|
|
|
expr[[1]] == "!" && length(expr[[2]]) > 1L && expr[[2]][[1]] == ":" && expr[[2]][[3]][[1]] == "!"
|
|
|
|
}
|
|
|
|
pm_select_minus <- function(expr) {
|
|
|
|
x <- pm_eval_expr(expr[[2]])
|
|
|
|
x * -1L
|
|
|
|
}
|
|
|
|
pm_select_c <- function(expr) {
|
|
|
|
lst_expr <- as.list(expr)
|
|
|
|
lst_expr[[1]] <- NULL
|
|
|
|
unlist(lapply(lst_expr, pm_eval_expr))
|
|
|
|
}
|
|
|
|
pm_select_bracket <- function(expr) {
|
|
|
|
pm_eval_expr(expr[[2]])
|
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_select_context <- function(expr) {
|
2020-09-18 16:05:53 +02:00
|
|
|
eval(expr, envir = pm_select_env$.data)
|
|
|
|
}
|
|
|
|
pm_select_env <- new.env()
|
|
|
|
pm_select_env$setup <- function(.data, calling_frame) {
|
|
|
|
pm_select_env$.data <- .data
|
|
|
|
pm_select_env$calling_frame <- calling_frame
|
|
|
|
}
|
|
|
|
pm_select_env$clean <- function() {
|
|
|
|
rm(list = c(".data", "calling_frame"), envir = pm_select_env)
|
|
|
|
}
|
|
|
|
pm_select_env$get_colnames <- function() colnames(pm_select_env$.data)
|
|
|
|
pm_select_env$get_nrow <- function() nrow(pm_select_env$.data)
|
|
|
|
pm_select_env$get_ncol <- function() ncol(pm_select_env$.data)
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_eval_select_pos <- function(.data, .cols, .pm_group_pos = FALSE) {
|
|
|
|
do.call(pm_select_positions, list(.data = .data, .cols, .pm_group_pos = .pm_group_pos))
|
|
|
|
}
|
2020-09-19 11:54:01 +02:00
|
|
|
pm_select <- function(.data, ...) {
|
2023-02-08 13:48:06 +01:00
|
|
|
col_pos <- pm_select_positions(.data, ..., .pm_group_pos = TRUE)
|
2020-09-19 11:54:01 +02:00
|
|
|
res <- .data[, col_pos, drop = FALSE]
|
2023-02-08 13:48:06 +01:00
|
|
|
if (length(names(res)) != 0) colnames(res) <- names(col_pos)
|
|
|
|
if (pm_has_groups(.data)) res <- pm_groups_set(res, pm_group_vars(.data))
|
2020-09-19 11:54:01 +02:00
|
|
|
res
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_summarise <- function(.data, ..., .pm_groups = NULL) {
|
|
|
|
if ("grouped_df" %in% class(.data)) pm_summarise.grouped_df(.data, ..., .pm_groups = NULL) else pm_summarise.data.frame(.data, ..., .pm_groups = NULL)
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_summarise.data.frame <- function(.data, ..., .pm_groups = NULL) {
|
2020-09-18 16:05:53 +02:00
|
|
|
fns <- pm_dotdotdot(...)
|
|
|
|
pm_context$setup(.data)
|
|
|
|
on.exit(pm_context$clean(), add = TRUE)
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_groups_exist <- pm_context$is_grouped()
|
|
|
|
if (pm_groups_exist) {
|
2023-02-08 13:48:06 +01:00
|
|
|
group <- unique(pm_context$get_columns(pm_group_vars(pm_context$.data)))
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
if (pm_is_empty_list(fns)) {
|
|
|
|
if (pm_groups_exist) return(group) else return(data.frame())
|
2023-02-08 13:48:06 +01:00
|
|
|
}
|
|
|
|
res <- vector(mode = "list", length = length(fns))
|
|
|
|
pm_eval_env <- c(as.list(pm_context$.data), vector(mode = "list", length = length(fns)))
|
|
|
|
new_pos <- seq(length(pm_context$.data) + 1L, length(pm_eval_env), 1L)
|
|
|
|
for (i in seq_along(fns)) {
|
|
|
|
pm_eval_env[[new_pos[i]]] <- do.call(with, list(pm_eval_env, fns[[i]]))
|
2023-02-08 16:51:41 +01:00
|
|
|
nms <- if (!pm_is_named(pm_eval_env[[new_pos[i]]])) {
|
2023-02-08 13:48:06 +01:00
|
|
|
if (!is.null(names(fns)[[i]])) names(fns)[[i]] else deparse(fns[[i]])
|
|
|
|
} else {
|
|
|
|
NULL
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
if (!is.null(nms)) names(pm_eval_env)[[new_pos[i]]] <- nms
|
2023-02-08 16:51:41 +01:00
|
|
|
res[[i]] <- pm_build_data_frame(pm_eval_env[[new_pos[i]]], nms = nms)
|
2023-02-08 13:48:06 +01:00
|
|
|
}
|
|
|
|
res <- do.call(cbind, res)
|
2023-02-08 16:51:41 +01:00
|
|
|
if (pm_groups_exist) res <- cbind(group, res, row.names = NULL)
|
2020-09-18 16:05:53 +02:00
|
|
|
res
|
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_summarise.grouped_df <- function(.data, ..., .pm_groups = NULL) {
|
|
|
|
if (!is.null(.pm_groups)) {
|
|
|
|
.pm_groups <- match.arg(arg = .pm_groups, choices = c("drop", "drop_last", "keep"), several.ok = FALSE)
|
2023-02-08 13:48:06 +01:00
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_groups <- pm_group_vars(.data)
|
2020-09-18 16:05:53 +02:00
|
|
|
res <- pm_apply_grouped_function("pm_summarise", .data, drop = TRUE, ...)
|
2023-02-08 16:51:41 +01:00
|
|
|
res <- res[pm_arrange_rows(res, pm_as_symbols(pm_groups)), , drop = FALSE]
|
|
|
|
verbose <- pm_summarise_verbose(.pm_groups)
|
|
|
|
if (is.null(.pm_groups)) {
|
|
|
|
all_one <- as.data.frame(table(res[, pm_groups]))
|
2023-02-08 13:48:06 +01:00
|
|
|
all_one <- all_one[all_one$Freq != 0, ]
|
2023-02-08 16:51:41 +01:00
|
|
|
.pm_groups <- if (all(all_one$Freq == 1)) "drop_last" else "keep"
|
2023-02-08 13:48:06 +01:00
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
if (.pm_groups == "drop_last") {
|
|
|
|
n <- length(pm_groups)
|
2023-02-08 13:48:06 +01:00
|
|
|
if (n > 1) {
|
2023-02-08 16:51:41 +01:00
|
|
|
if (verbose) pm_summarise_inform(pm_groups[-n])
|
|
|
|
res <- pm_groups_set(res, pm_groups[-n], pm_group_by_drop_default(.data))
|
2023-02-08 13:48:06 +01:00
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
} else if (.pm_groups == "keep") {
|
|
|
|
if (verbose) pm_summarise_inform(pm_groups)
|
|
|
|
res <- pm_groups_set(res, pm_groups, pm_group_by_drop_default(.data))
|
|
|
|
} else if (.pm_groups == "drop") {
|
|
|
|
attr(res, "pm_groups") <- NULL
|
2023-02-08 13:48:06 +01:00
|
|
|
}
|
2020-09-18 16:05:53 +02:00
|
|
|
rownames(res) <- NULL
|
|
|
|
res
|
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_summarise_inform <- function(new_groups) {
|
|
|
|
message(sprintf(
|
2023-02-08 16:51:41 +01:00
|
|
|
"`pm_summarise()` has grouped output by %s. You can override using the `.pm_groups` argument.",
|
2023-02-08 13:48:06 +01:00
|
|
|
paste0("'", new_groups, "'", collapse = ", ")
|
|
|
|
))
|
|
|
|
}
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_summarise_verbose <- function(.pm_groups) {
|
|
|
|
is.null(.pm_groups) &&
|
2023-02-08 13:48:06 +01:00
|
|
|
!identical(getOption("poorman.summarise.inform"), FALSE)
|
|
|
|
}
|
2020-09-18 16:05:53 +02:00
|
|
|
pm_transmute <- function(.data, ...) {
|
2023-02-08 13:48:06 +01:00
|
|
|
if ("grouped_df" %in% class(.data)) pm_transmute.grouped_df(.data, ...) else pm_transmute.data.frame(.data, ...)
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_transmute.data.frame <- function(.data, ...) {
|
|
|
|
pm_mutate(.data, ..., .keep = "none")
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_transmute.grouped_df <- function(.data, ...) {
|
2020-09-18 16:05:53 +02:00
|
|
|
rows <- rownames(.data)
|
|
|
|
res <- pm_apply_grouped_function("pm_transmute", .data, drop = TRUE, ...)
|
|
|
|
res[rows, ]
|
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_ungroup <- function(x, ...) {
|
|
|
|
if ("grouped_df" %in% class(x)) pm_ungroup.grouped_df(x, ...) else pm_ungroup.data.frame(x, ...)
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_ungroup.data.frame <- function(x, ...) {
|
|
|
|
rm_groups <- pm_deparse_dots(...)
|
2023-02-08 16:51:41 +01:00
|
|
|
pm_groups <- pm_group_vars(x)
|
|
|
|
if (length(rm_groups) == 0L) rm_groups <- pm_groups
|
|
|
|
x <- pm_groups_set(x, pm_groups[!(pm_groups %in% rm_groups)])
|
|
|
|
if (length(attr(x, "pm_groups")) == 0L) {
|
|
|
|
attr(x, "pm_groups") <- NULL
|
2023-02-08 13:48:06 +01:00
|
|
|
class(x) <- class(x)[!(class(x) %in% "grouped_df")]
|
|
|
|
}
|
|
|
|
x
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_ungroup.grouped_df <- function(x, ...) {
|
|
|
|
pm_ungroup.data.frame(...)
|
2020-09-18 16:05:53 +02:00
|
|
|
}
|
|
|
|
pm_check_is_dataframe <- function(.data) {
|
|
|
|
parent_fn <- all.names(sys.call(-1L), max.names = 1L)
|
|
|
|
if (!is.data.frame(.data)) stop(parent_fn, " must be given a data.frame")
|
|
|
|
invisible()
|
|
|
|
}
|
2022-08-28 10:31:50 +02:00
|
|
|
pm_seq2 <- function(from, to) {
|
2020-09-18 16:05:53 +02:00
|
|
|
if (length(from) != 1) stop("`from` must be length one")
|
|
|
|
if (length(to) != 1) stop("`to` must be length one")
|
|
|
|
if (from > to) integer() else seq.int(from, to)
|
|
|
|
}
|
|
|
|
pm_collapse_to_sentence <- function(x) {
|
|
|
|
len_x <- length(x)
|
|
|
|
if (len_x == 0L) {
|
|
|
|
stop("Length of `x` is 0")
|
|
|
|
} else if (len_x == 1L) {
|
|
|
|
as.character(x)
|
|
|
|
} else if (len_x == 2L) {
|
|
|
|
paste(x, collapse = " and ")
|
|
|
|
} else {
|
|
|
|
paste(paste(x[1:(len_x - 1)], collapse = ", "), x[len_x], sep = " and ")
|
|
|
|
}
|
|
|
|
}
|
2023-02-08 13:48:06 +01:00
|
|
|
pm_build_data_frame <- function(x, nms = NULL) {
|
|
|
|
res <- if (is.atomic(x)) {
|
|
|
|
data.frame(x)
|
|
|
|
} else if (is.list(x) && !is.data.frame(x)) {
|
|
|
|
structure(list(x = x), class = "data.frame", row.names = c(NA, -1L))
|
|
|
|
} else if (is.data.frame(x)) {
|
|
|
|
x
|
|
|
|
}
|
|
|
|
if (!is.null(nms)) colnames(res) <- nms
|
|
|
|
res
|
|
|
|
}
|
|
|
|
pm_is_nested <- function(lst) vapply(lst, function(x) inherits(x[1L], "list"), FALSE)
|
|
|
|
pm_squash <- function(lst) {
|
2023-02-08 16:51:41 +01:00
|
|
|
do.call(c, lapply(lst, function(x) if (is.list(x) && !is.data.frame(x)) pm_squash(x) else list(x)))
|
2023-02-08 13:48:06 +01:00
|
|
|
}
|
|
|
|
pm_flatten <- function(lst) {
|
|
|
|
nested <- pm_is_nested(lst)
|
|
|
|
res <- c(lst[!nested], unlist(lst[nested], recursive = FALSE))
|
|
|
|
if (sum(nested)) Recall(res) else return(res)
|
|
|
|
}
|
2020-09-18 16:05:53 +02:00
|
|
|
pm_where <- function(fn) {
|
2023-02-08 13:48:06 +01:00
|
|
|
if (!is.function(fn)) {
|
2020-09-18 16:05:53 +02:00
|
|
|
stop(pm_deparse_var(fn), " is not a valid predicate function.")
|
|
|
|
}
|
|
|
|
preds <- unlist(lapply(
|
|
|
|
pm_select_env$.data,
|
|
|
|
function(x, fn) {
|
|
|
|
do.call("fn", list(x))
|
|
|
|
},
|
|
|
|
fn
|
|
|
|
))
|
2023-02-08 13:48:06 +01:00
|
|
|
if (!is.logical(preds)) stop("`where()` must be used with functions that return `TRUE` or `FALSE`.")
|
2020-09-18 16:05:53 +02:00
|
|
|
data_cols <- pm_select_env$get_colnames()
|
|
|
|
cols <- data_cols[preds]
|
|
|
|
which(data_cols %in% cols)
|
|
|
|
}
|