diff --git a/.github/workflows/lintr.yaml b/.github/workflows/lintr.yaml
index 82bb69ee..7931193b 100644
--- a/.github/workflows/lintr.yaml
+++ b/.github/workflows/lintr.yaml
@@ -62,5 +62,5 @@ jobs:
shell: Rscript {0}
- name: Lint
- run: lintr::lint_package(linters = lintr::with_defaults(line_length_linter = NULL, trailing_whitespace_linter = NULL, object_name_linter = NULL, cyclocomp_linter = NULL, object_usage_linter = NULL, object_length_linter = lintr::object_length_linter(length = 50L)), exclusions = list("R/aa_helper_functions_dplyr.R"))
+ run: lintr::lint_package(linters = lintr::with_defaults(line_length_linter = NULL, trailing_whitespace_linter = NULL, object_name_linter = NULL, cyclocomp_linter = NULL, object_usage_linter = NULL, object_length_linter = lintr::object_length_linter(length = 50L)), exclusions = list("R/aa_helper_pm_functions.R"))
shell: Rscript {0}
diff --git a/DESCRIPTION b/DESCRIPTION
index e9af8e61..b8a09a25 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: AMR
-Version: 1.3.0.9022
-Date: 2020-09-18
+Version: 1.3.0.9023
+Date: 2020-09-19
Title: Antimicrobial Resistance Analysis
Authors@R: c(
person(role = c("aut", "cre"),
diff --git a/NEWS.md b/NEWS.md
index 3c6dacfb..3205a48c 100755
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,5 @@
-# AMR 1.3.0.9022
-## Last updated: 18 September 2020
+# AMR 1.3.0.9023
+## Last updated: 19 September 2020
Note: some changes in this version were suggested by anonymous reviewers from the journal we submitted our manuscipt to. We are those reviewers very grateful for going through our code so thoroughly!
diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R
index f21d733a..20881558 100755
--- a/R/aa_helper_functions.R
+++ b/R/aa_helper_functions.R
@@ -26,7 +26,7 @@ pm_left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) {
if (is.na(by)) {
stop_("no common column found for pm_left_join()")
}
- join_message(by)
+ pm_join_message(by)
} else if (!is.null(names(by))) {
by <- unname(c(names(by), by))
}
diff --git a/R/aa_helper_pm_functions.R b/R/aa_helper_pm_functions.R
index 50556b9a..ef9d54a1 100644
--- a/R/aa_helper_pm_functions.R
+++ b/R/aa_helper_pm_functions.R
@@ -36,7 +36,7 @@
# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
# is furnished to do so', given that a copyright notice is given in the software.
#
-# Copyright notice on 18 September 2020, the day this code was downloaded, as found on
+# Copyright notice on 19 September 2020, the day this code was downloaded, as found on
# https://github.com/nathaneastwood/poorman/blob/52eb6947e0b4430cd588976ed8820013eddf955f/LICENSE:
# YEAR: 2020
# COPYRIGHT HOLDER: Nathan Eastwood
@@ -67,28 +67,6 @@ pm_between <- function(x, left, right) {
if (!is.double(x)) x <- as.numeric(x)
x >= as.numeric(left) & x <= as.numeric(right)
}
-pm_coalesce <- function(...) {
- if (missing(..1)) stop("At least one argument must be supplied.")
-
- vectors <- list(...)
- vectors_lens <- unique(lengths(vectors))
- if (length(vectors_lens) > 2L || (length(vectors_lens) == 2L & !1 %in% vectors_lens)) {
- stop("Vectors must all be of length 1 and/or pm_n")
- }
- max_len <- max(vectors_lens)
-
- len_one <- lengths(vectors) == 1L
- vectors[len_one] <- lapply(vectors[len_one], function(x) rep(x, max_len))
-
- x <- vectors[[1]]
- vectors <- vectors[-1]
-
- for (i in seq_along(vectors)) {
- x_miss <- is.na(x)
- x[x_miss] <- vectors[[i]][x_miss]
- }
- x
-}
pm_context <- new.env()
# Data
@@ -288,23 +266,6 @@ pm_filter.grouped_data <- function(.data, ...) {
res <- pm_apply_grouped_function("pm_filter", .data, drop = TRUE, ...)
res[rows[rows %in% rownames(res)], ]
}
-pm_glimpse <- function(x, width = getOption("width"), ...) {
- if ("grouped_data" %in% class(.data)) {
- pm_glimpse.grouped_data(.data, ...)
- } else {
- pm_glimpse.default(.data, ...)
- }
-}
-
-pm_glimpse.default <- function (x, width = getOption("width"), max.level = 3, ...) {
- utils::str(x, width = width, max.level = max.level, ...)
- invisible(x)
-}
-
-pm_glimpse.data.frame <- function(x, width = getOption("width"), ...) {
- utils::str(x, width = width, ...)
- invisible(x)
-}
pm_group_by <- function(.data, ..., .add = FALSE) {
pm_check_is_dataframe(.data)
pre_groups <- pm_get_groups(.data)
@@ -481,6 +442,27 @@ pm_if_else <- function(condition, true, false, missing = NULL) {
res
}
+pm_anti_join <- function(x, y, by = NULL) {
+ pm_filter_join_worker(x, y, by, type = "anti")
+}
+
+pm_semi_join <- function(x, y, by = NULL) {
+ pm_filter_join_worker(x, y, by, type = "semi")
+}
+
+pm_filter_join_worker <- function(x, y, by = NULL, type = c("anti", "semi")) {
+ type <- match.arg(type, choices = c("anti", "semi"), several.ok = FALSE)
+ if (is.null(by)) {
+ by <- intersect(names(x), names(y))
+ pm_join_message(by)
+ }
+ rows <- interaction(x[, by]) %in% interaction(y[, by])
+ if (type == "anti") rows <- !rows
+ res <- x[rows, , drop = FALSE]
+ rownames(res) <- NULL
+ res
+}
+
pm_inner_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) {
pm_join_worker(x = x, y = y, by = by, suffix = suffix, sort = FALSE)
}
@@ -520,27 +502,6 @@ pm_join_message <- function(by) {
message("Joining, by = \"", by, "\"\n", sep = "")
}
}
-
-pm_anti_join <- function(x, y, by = NULL) {
- pm_filter_join_worker(x, y, by, type = "anti")
-}
-
-pm_semi_join <- function(x, y, by = NULL) {
- pm_filter_join_worker(x, y, by, type = "semi")
-}
-
-pm_filter_join_worker <- function(x, y, by = NULL, type = c("anti", "semi")) {
- type <- match.arg(type, choices = c("anti", "semi"), several.ok = FALSE)
- if (is.null(by)) {
- by <- intersect(names(x), names(y))
- pm_join_message(by)
- }
- rows <- interaction(x[, by]) %in% interaction(y[, by])
- if (type == "anti") rows <- !rows
- res <- x[rows,, drop = FALSE]
- rownames(res) <- NULL
- res
-}
pm_lag <- function(x, pm_n = 1L, default = NA) {
if (inherits(x, "ts")) stop("`x` must be a vector, not a `ts` object, do you want `stats::pm_lag()`?")
if (length(pm_n) != 1L || !is.numeric(pm_n) || pm_n < 0L) stop("`pm_n` must be a nonnegative integer scalar")
@@ -630,163 +591,6 @@ pm_pull <- function(.data, var = -1) {
}
.data[, var]
}
-pm_recode <- function(.x, ..., .default = NULL, .missing = NULL) {
- if ("grouped_data" %in% class(.data)) {
- pm_recode.grouped_data(.data, ...)
- } else {
- pm_recode.default(.data, ...)
- }
-}
-
-pm_recode.numeric <- function(.x, ..., .default = NULL, .missing = NULL) {
- values <- pm_dotdotdot(...)
-
- nms <- pm_have_name(values)
- if (all(nms)) {
- vals <- as.double(names(values))
- } else if (all(!nms)) {
- vals <- seq_along(values)
- } else {
- stop("Either all values must be named, or none must be named.")
- }
-
- pm_n <- length(.x)
- template <- pm_find_template(values, .default, .missing)
- res <- template[rep(NA_integer_, pm_n)]
- replaced <- rep(FALSE, pm_n)
-
- for (i in seq_along(values)) {
- res <- pm_replace_with(res, .x == vals[i], values[[i]], paste0("Vector ", i))
- replaced[.x == vals[i]] <- TRUE
- }
-
- .default <- pm_validate_recode_default(.default, .x, res, replaced)
- res <- pm_replace_with(res, !replaced & !is.na(.x), .default, "`.default`")
- res <- pm_replace_with(res, is.na(.x), .missing, "`.missing`")
- res
-}
-
-pm_recode.character <- function(.x, ..., .default = NULL, .missing = NULL) {
- .x <- as.character(.x)
- values <- pm_dotdotdot(...)
- val_names <- names(values)
- have_names <- pm_have_name(values)
- if (!all(have_names)) {
- bad <- which(!have_names) + 1L
- stop("Argument", if (length(bad) > 1L) "s", " ", paste(bad, sep = ", "), " must be named, not unnamed.")
- }
-
- pm_n <- length(.x)
- template <- pm_find_template(values, .default, .missing)
- res <- template[rep(NA_integer_, pm_n)]
- replaced <- rep(FALSE, pm_n)
-
- for (nm in val_names) {
- res <- pm_replace_with(res, .x == nm, values[[nm]], paste0("`", nm, "`"))
- replaced[.x == nm] <- TRUE
- }
-
- .default <- pm_validate_recode_default(.default, .x, res, replaced)
- res <- pm_replace_with(res, !replaced & !is.na(.x), .default, "`.default`")
- res <- pm_replace_with(res, is.na(.x), .missing, "`.missing`")
- res
-}
-
-pm_recode.factor <- function(.x, ..., .default = NULL, .missing = NULL) {
- values <- pm_dotdotdot(...)
- if (length(values) == 0) stop("No replacements provided.")
-
- have_names <- pm_have_name(values)
- if (!all(have_names)) {
- bad <- which(!have_names) + 1
- stop(bad, " must be named, not unnamed.")
- }
- if (!is.null(.missing)) {
- stop("`.missing` is not supported for factors.")
- }
-
- pm_n <- length(levels(.x))
- template <- pm_find_template(values, .default, .missing)
- res <- template[rep(NA_integer_, pm_n)]
- replaced <- rep(FALSE, pm_n)
-
- for (nm in names(values)) {
- res <- pm_replace_with(res, levels(.x) == nm, values[[nm]], paste0("`", nm, "`"))
- replaced[levels(.x) == nm] <- TRUE
- }
- .default <- pm_validate_recode_default(.default, .x, res, replaced)
- res <- pm_replace_with(res, !replaced, .default, "`.default`")
-
- if (is.character(res)) {
- levels(.x) <- res
- .x
- } else {
- res[as.integer(.x)]
- }
-}
-
-pm_have_name <- function(x) {
- nms <- names(x)
- if (is.null(nms)) rep(FALSE, length(x)) else !(nms == "" | is.na(nms))
-}
-
-pm_compact <- function(.x) Filter(length, .x)
-
-pm_find_template <- function(values, .default = NULL, .missing = NULL) {
- x <- pm_compact(c(values, .default, .missing))
- if (length(x) == 0L) {
- stop("No replacements provided.")
- }
- x[[1]]
-}
-
-pm_validate_recode_default <- function(default, x, res, replaced) {
- default <- pm_recode_default(x, default, res)
- if (is.null(default) && sum(replaced & !is.na(x)) < length(res[!is.na(x)])) {
- warning(
- "Unreplaced values treated as NA as .x is not compatible. ",
- "Please specify replacements exhaustively or supply .default",
- call. = FALSE
- )
- }
- default
-}
-
-pm_recode_default <- function(x, default, res) {
- if ("grouped_data" %in% class(.data)) {
- pm_recode_default.grouped_data(.data, ...)
- } else {
- pm_recode_default.default(.data, ...)
- }
-}
-
-pm_recode_default.default <- function(x, default, res) {
- same_type <- identical(typeof(x), typeof(res))
- if (is.null(default) && same_type) x else default
-}
-
-pm_recode_default.factor <- function(x, default, res) {
- if (is.null(default)) {
- if ((is.character(res) || is.factor(res)) && is.factor(x)) {
- levels(x)
- } else {
- res[NA_integer_]
- }
- } else {
- default
- }
-}
-
-pm_recode_factor <- function(.x, ..., .default = NULL, .missing = NULL, .ordered = FALSE) {
- recoded <- pm_recode(.x, ..., .default = .default, .missing = .missing)
-
- values <- pm_dotdotdot(...)
- all_levels <- unique(c(values, pm_recode_default(.x, .default, recoded), .missing))
- recoded_levels <- if (is.factor(recoded)) levels(recoded) else unique(recoded)
- levels <- intersect(all_levels, recoded_levels)
-
- factor(recoded, levels, ordered = .ordered)
-}
pm_set_names <- function(object = nm, nm) {
names(object) <- nm
object
@@ -860,35 +664,6 @@ pm_rename_with <- function(.data, .fn, .cols = pm_everything(), ...) {
if (grouped) .data <- pm_set_groups(.data, colnames(.data)[grp_pos])
.data
}
-pm_replace_na <- function(data, replace, ...) {
- if ("grouped_data" %in% class(.data)) {
- pm_replace_na.grouped_data(.data, ...)
- } else {
- pm_replace_na.default(.data, ...)
- }
-}
-
-pm_replace_na.default <- function(data, replace = NA, ...) {
- pm_check_replacement(replace, deparse(substitute(data)))
- data[is.na(data)] <- replace
- data
-}
-
-pm_replace_na.data.frame <- function(data, replace = list(), ...) {
- stopifnot(is.list(replace))
- replace_vars <- intersect(names(replace), names(data))
- for (var in replace_vars) {
- pm_check_replacement(replace[[var]], var)
- data[[var]][is.na(data[[var]])] <- replace[[var]]
- }
- data
-}
-
-pm_check_replacement <- function(x, var) {
- pm_n <- length(x)
- if (pm_n == 1L) return()
- stop("Replacement for `", var, "` is length ", pm_n, ", not length 1")
-}
pm_replace_with <- function(x, i, val, arg_name) {
if (is.null(val)) return(x)
pm_check_length(val, x, arg_name)
@@ -937,20 +712,6 @@ pm_rownames_to_column <- function(.data, var = "rowname") {
rownames(.data) <- NULL
.data[, c(var, setdiff(col_names, var))]
}
-
-pm_select <- function(.data, ...) {
- col_pos <- pm_select_positions(.data, ..., .group_pos = TRUE)
- map_names <- names(col_pos)
- map_names_length <- nchar(map_names)
- if (any(map_names_length == 0L)) {
- no_new_names <- which(map_names_length == 0L)
- map_names[no_new_names] <- colnames(.data)[no_new_names]
- }
- res <- .data[, col_pos, drop = FALSE]
- if (!is.null(map_names) && all(col_pos > 0L)) colnames(res) <- map_names
- if (pm_has_groups(.data)) res <- pm_set_groups(res, pm_get_groups(.data))
- res
-}
pm_starts_with <- function(match, ignore.case = TRUE, vars = pm_peek_vars()) {
grep(pattern = paste0("^", paste0(match, collapse = "|^")), x = vars, ignore.case = ignore.case)
}
@@ -1161,243 +922,18 @@ pm_select_env$get_colnames <- function() colnames(pm_select_env$.data)
pm_select_env$get_nrow <- function() nrow(pm_select_env$.data)
pm_select_env$get_ncol <- function() ncol(pm_select_env$.data)
-pm_slice <- function(.data, ...) {
- pm_check_is_dataframe(.data)
- if ("grouped_data" %in% class(.data)) {
- pm_slice.grouped_data(.data, ...)
- } else {
- pm_slice.default(.data, ...)
+pm_select <- function(.data, ...) {
+ col_pos <- pm_select_positions(.data, ..., .group_pos = TRUE)
+ map_names <- names(col_pos)
+ map_names_length <- nchar(map_names)
+ if (any(map_names_length == 0L)) {
+ no_new_names <- which(map_names_length == 0L)
+ map_names[no_new_names] <- colnames(.data)[no_new_names]
}
-}
-
-pm_slice.data.frame <- function(.data, ...) {
- if (nrow(.data) == 0L) return(.data)
- pos <- pm_slice_positions(.data, ...)
- .data[pos, , drop = FALSE]
-}
-
-pm_slice.grouped_data <- function(.data, ...) {
- pm_apply_grouped_function("pm_slice", .data, drop = TRUE, ...)
-}
-
-
-pm_slice_head <- function(.data, ..., pm_n, prop) {
- if ("grouped_data" %in% class(.data)) {
- pm_slice_head.grouped_data(.data, ...)
- } else {
- pm_slice_head.default(.data, ...)
- }
-}
-
-pm_slice_head.data.frame <- function(.data, ..., pm_n, prop) {
- size <- pm_check_slice_size(pm_n, prop)
- idx <- switch(
- size$type,
- pm_n = function(pm_n) seq2(1, min(size$pm_n, pm_n)),
- prop = function(pm_n) seq2(1, min(size$prop * pm_n, pm_n))
- )
- pm_slice(.data, idx(AMR:::pm_n()))
-}
-
-pm_slice_head.grouped_data <- function(.data, ..., pm_n, prop) {
- pm_apply_grouped_function("pm_slice_head", .data, drop = TRUE, pm_n = pm_n, prop = prop, ...)
-}
-
-pm_slice_tail <- function(.data, ..., pm_n, prop) {
- if ("grouped_data" %in% class(.data)) {
- pm_slice_tail.grouped_data(.data, ...)
- } else {
- pm_slice_tail.default(.data, ...)
- }
-}
-
-pm_slice_tail.data.frame <- function(.data, ..., pm_n, prop) {
- size <- pm_check_slice_size(pm_n, prop)
- idx <- switch(
- size$type,
- pm_n = function(pm_n) seq2(max(pm_n - size$pm_n + 1, 1), pm_n),
- prop = function(pm_n) seq2(max(ceiling(pm_n - size$prop * pm_n) + 1, 1), pm_n)
- )
- pm_slice(.data, idx(AMR:::pm_n()))
-}
-
-pm_slice_tail.grouped_data <- function(.data, ..., pm_n, prop) {
- pm_apply_grouped_function("pm_slice_tail", .data, drop = TRUE, pm_n = pm_n, prop = prop, ...)
-}
-
-pm_slice_min <- function(.data, order_by, ..., pm_n, prop, with_ties = TRUE) {
- if ("grouped_data" %in% class(.data)) {
- pm_slice_min.grouped_data(.data, ...)
- } else {
- pm_slice_min.default(.data, ...)
- }
-}
-
-pm_slice_min.data.frame <- function(.data, order_by, ..., pm_n, prop, with_ties = TRUE) {
- if (missing(order_by)) stop("argument `order_by` is missing, with no default.")
-
- size <- pm_check_slice_size(pm_n, prop)
- idx <- if (isTRUE(with_ties)) {
- switch(
- size$type,
- pm_n = function(x, pm_n) pm_vec_head(order(x), pm_smaller_ranks(x, size$pm_n)),
- prop = function(x, pm_n) pm_vec_head(order(x), pm_smaller_ranks(x, size$prop * pm_n))
- )
- } else {
- switch(
- size$type,
- pm_n = function(x, pm_n) pm_vec_head(order(x), size$pm_n),
- prop = function(x, pm_n) pm_vec_head(order(x), size$prop * pm_n)
- )
- }
- order_by <- .data[, pm_deparse_var(order_by)]
- pm_slice(.data, idx(order_by, AMR:::pm_n()))
-}
-
-pm_slice_min.grouped_data <- function(.data, order_by, ..., pm_n, prop, with_ties = TRUE) {
- pm_eval_env$env <- environment()
- on.exit(rm(list = "env", envir = pm_eval_env), add = TRUE)
- pm_apply_grouped_function(
- "pm_slice_min", .data, drop = TRUE, order_by = order_by, pm_n = pm_n, prop = prop, with_ties = with_ties, ...
- )
-}
-
-pm_slice_max <- function(.data, order_by, ..., pm_n, prop, with_ties = TRUE) {
- if ("grouped_data" %in% class(.data)) {
- pm_slice_max.grouped_data(.data, ...)
- } else {
- pm_slice_max.default(.data, ...)
- }
-}
-
-pm_slice_max.data.frame <- function(.data, order_by, ..., pm_n, prop, with_ties = TRUE) {
- if (missing(order_by)) stop("argument `order_by` is missing, with no default.")
-
- size <- pm_check_slice_size(pm_n, prop)
- idx <- if (isTRUE(with_ties)) {
- switch(
- size$type,
- pm_n = function(x, pm_n) pm_vec_head(order(x, decreasing = TRUE), pm_smaller_ranks(pm_desc(x), size$pm_n)),
- prop = function(x, pm_n) pm_vec_head(order(x, decreasing = TRUE), pm_smaller_ranks(pm_desc(x), size$prop * pm_n))
- )
- } else {
- switch(
- size$type,
- pm_n = function(x, pm_n) pm_vec_head(order(x, decreasing = TRUE), size$pm_n),
- prop = function(x, pm_n) pm_vec_head(order(x, decreasing = TRUE), size$prop * pm_n)
- )
- }
- order_by <- .data[, pm_deparse_var(order_by)]
- pm_slice(.data, idx(order_by, AMR:::pm_n()))
-}
-
-pm_slice_max.grouped_data <- function(.data, order_by, ..., pm_n, prop, with_ties = TRUE) {
- pm_eval_env$env <- environment()
- on.exit(rm(list = "env", envir = pm_eval_env), add = TRUE)
- pm_apply_grouped_function(
- "pm_slice_max", .data, drop = TRUE, order_by = order_by, pm_n = pm_n, prop = prop, with_ties = with_ties, ...
- )
-}
-
-pm_slice_sample <- function(.data, ..., pm_n, prop, weight_by = NULL, replace = FALSE) {
- if ("grouped_data" %in% class(.data)) {
- pm_slice_sample.grouped_data(.data, ...)
- } else {
- pm_slice_sample.default(.data, ...)
- }
-}
-
-pm_slice_sample.data.frame <- function(.data, ..., pm_n, prop, weight_by = NULL, replace = FALSE) {
- size <- pm_check_slice_size(pm_n, prop)
- idx <- switch(
- size$type,
- pm_n = function(x, pm_n) pm_sample_int(pm_n, size$pm_n, replace = replace, wt = x),
- prop = function(x, pm_n) pm_sample_int(pm_n, size$prop * pm_n, replace = replace, wt = x),
- )
- weight_by <- pm_deparse_var(weight_by)
- if (!is.null(weight_by)) weight_by <- .data[, weight_by]
- pm_slice(.data, idx(weight_by, AMR:::pm_n()))
-}
-
-pm_slice_sample.grouped_data <- function(.data, ..., pm_n, prop, weight_by = NULL, replace = FALSE) {
- pm_eval_env$env <- environment()
- on.exit(rm(list = "env", envir = pm_eval_env), add = TRUE)
- pm_apply_grouped_function(
- "pm_slice_sample", .data, drop = TRUE, pm_n = pm_n, prop = prop, weight_by = weight_by, replace = replace, ...
- )
-}
-
-# helpers ----------------------------------------------------------------------
-
-pm_slice_positions <- function(.data, ...) {
- conditions <- pm_dotdotdot(...)
- pm_context$setup(.data)
- on.exit(pm_context$clean(), add = TRUE)
- if (length(conditions) == 0L) return(seq_len(pm_n()))
-
- frame <- parent.frame(2L)
- rows <- lapply(
- conditions,
- function(cond, frame) {
- res <- eval(cond, pm_context$.data, frame)
- if (is.logical(res) && all(is.na(res))) {
- res <- integer()
- } else if (is.numeric(res)) {
- res <- as.integer(res)
- } else if (!is.integer(res)) {
- stop("`pm_slice()` expressions should return indices (positive or negative integers).")
- }
- },
- frame = frame
- )
- rows <- do.call(c, rows)
- if (length(rows) == 0L) {
- # do nothing
- } else if (all(rows >= 0, na.rm = TRUE)) {
- rows <- rows[!is.na(rows) & rows <= pm_n() & rows > 0]
- } else if (all(rows <= 0, na.rm = TRUE)) {
- rows <- setdiff(seq_len(pm_n()), -rows)
- } else {
- stop("`pm_slice()` expressions should return either all positive or all negative.")
- }
- rows
-}
-
-pm_check_slice_size <- function(pm_n, prop) {
- if (missing(pm_n) && missing(prop)) {
- list(type = "pm_n", pm_n = 1L)
- } else if (!missing(pm_n) && missing(prop)) {
- if (!is.numeric(pm_n) || length(pm_n) != 1) {
- stop("`pm_n` must be a single number.")
- }
- if (is.na(pm_n) || pm_n < 0) {
- stop("`pm_n` must be a non-missing positive number.")
- }
-
- list(type = "pm_n", pm_n = pm_n)
- } else if (!missing(prop) && missing(pm_n)) {
- if (!is.numeric(prop) || length(prop) != 1) {
- stop("`prop` must be a single number.")
- }
- if (is.na(prop) || prop < 0) {
- stop("`prop` must be a non-missing positive number.")
- }
- list(type = "prop", prop = prop)
- } else {
- stop("Must supply exactly one of `pm_n` and `prop` arguments.")
- }
-}
-
-pm_sample_int <- function(pm_n, size, replace = FALSE, wt = NULL) {
- if (isTRUE(replace)) {
- sample.int(pm_n, size, prob = wt, replace = TRUE)
- } else {
- sample.int(pm_n, min(size, pm_n), prob = wt)
- }
-}
-
-pm_smaller_ranks <- function(x, y) {
- sum(pm_min_rank(x) <= y, na.rm = TRUE)
+ res <- .data[, col_pos, drop = FALSE]
+ if (!is.null(map_names) && all(col_pos > 0L)) colnames(res) <- map_names
+ if (pm_has_groups(.data)) res <- pm_set_groups(res, pm_get_groups(.data))
+ res
}
pm_summarise <- function(.data, ...) {
pm_check_is_dataframe(.data)
diff --git a/R/ab.R b/R/ab.R
index 1bc1ea9d..1c49e9d6 100755
--- a/R/ab.R
+++ b/R/ab.R
@@ -104,7 +104,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
x_bak_clean <- generalise_antibiotic_name(x_bak_clean)
}
- x <- unique(x_bak_clean)
+ x <- unique(x_bak_clean) # this means that every x is in fact generalise_antibiotic_name(x)
x_new <- rep(NA_character_, length(x))
x_unknown <- character(0)
@@ -174,7 +174,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
# exact LOINC code
loinc_found <- unlist(lapply(AB_lookup$generalised_loinc,
- function(s) generalise_antibiotic_name(x[i]) %in% s))
+ function(s) x[i] %in% s))
found <- antibiotics$ab[loinc_found == TRUE]
if (length(found) > 0) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
@@ -183,7 +183,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
# exact synonym
synonym_found <- unlist(lapply(AB_lookup$generalised_synonyms,
- function(s) generalise_antibiotic_name(x[i]) %in% s))
+ function(s) x[i] %in% s))
found <- antibiotics$ab[synonym_found == TRUE]
if (length(found) > 0) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
@@ -192,7 +192,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
# exact abbreviation
abbr_found <- unlist(lapply(AB_lookup$generalised_abbreviations,
- function(s) generalise_antibiotic_name(x[i]) %in% s))
+ function(s) x[i] %in% s))
found <- antibiotics$ab[abbr_found == TRUE]
if (length(found) > 0) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
@@ -244,7 +244,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
# and try if any synonym starts with it
synonym_found <- unlist(lapply(AB_lookup$generalised_synonyms,
- function(s) any(generalise_antibiotic_name(s) %like% paste0("^", x_spelling))))
+ function(s) any(s %like% paste0("^", x_spelling))))
found <- antibiotics$ab[synonym_found == TRUE]
if (length(found) > 0) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
@@ -532,7 +532,7 @@ generalise_antibiotic_name <- function(x) {
# remove part between brackets if that's followed by another string
x <- gsub("(.*)+ [(].*[)]", "\\1", x)
# keep only max 1 space
- x <- trimws(gsub(" +", " ", x))
+ x <- trimws2(gsub(" +", " ", x))
# non-character, space or number should be a slash
x <- gsub("[^A-Z0-9 -]", "/", x)
# spaces around non-characters must be removed: amox + clav -> amox/clav
diff --git a/R/bug_drug_combinations.R b/R/bug_drug_combinations.R
index 7feee2c8..72faa7ef 100644
--- a/R/bug_drug_combinations.R
+++ b/R/bug_drug_combinations.R
@@ -199,7 +199,7 @@ format.bug_drug_combinations <- function(x,
select_ab_vars() %pm>%
pm_arrange(ab_group, ab_txt)
y <- y %pm>%
- create_var(ab_group = ifelse(y$ab_group != lag(y$ab_group) | is.na(pm_lag(y$ab_group)), y$ab_group, ""))
+ create_var(ab_group = ifelse(y$ab_group != pm_lag(y$ab_group) | is.na(pm_lag(y$ab_group)), y$ab_group, ""))
if (add_ab_group == FALSE) {
y <- y %pm>%
diff --git a/R/mo.R b/R/mo.R
index 10e3c84f..3cc6b72a 100755
--- a/R/mo.R
+++ b/R/mo.R
@@ -109,10 +109,10 @@
#' 3. The level of uncertainty \eqn{U} needed to get to the result, as stated above (1 to 3);
#' 4. The [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) \eqn{L} is the distance between the user input and all taxonomic full names, with the text length of the user input being the maximum distance. A modified version of the Levenshtein distance \eqn{L'} based on the text length of the full name \eqn{F} is calculated as:
#'
-#' \deqn{L' = F - \frac{0.5 \times L}{F}}{L' = F - (0.5 * L) / F}
+#' \deqn{L' = F - \frac{0.5L}{F}}{L' = (F - 0.5L) / F}
#'
#' The final matching score \eqn{M} is calculated as:
-#' \deqn{M = L' \times \frac{1}{P \times K} * \frac{1}{U}}{M = L' * (1 / (P * K)) * (1 / U)}
+#' \deqn{M = L' \times \frac{1}{P K U} = \frac{F - 0.5L}{F P K U}}{M = L' * (1 / (P * K * U)) = (F - 0.5L) / (F * P * K * U)}
#'
#' All matches are sorted descending on their matching score and for all user input values, the top match will be returned.
#' @inheritSection catalogue_of_life Catalogue of Life
diff --git a/R/mo_matching_score.R b/R/mo_matching_score.R
index 54e87ddf..c7297c21 100755
--- a/R/mo_matching_score.R
+++ b/R/mo_matching_score.R
@@ -32,10 +32,10 @@
#' 3. The level of uncertainty \eqn{U} that is needed to get to a result (1 to 3, see [as.mo()]);
#' 4. The [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) \eqn{L} is the distance between the user input and all taxonomic full names, with the text length of the user input being the maximum distance. A modified version of the Levenshtein distance \eqn{L'} based on the text length of the full name \eqn{F} is calculated as:
#'
-#' \deqn{L' = F - \frac{0.5 \times L}{F}}{L' = F - (0.5 * L) / F}
+#' \deqn{L' = F - \frac{0.5L}{F}}{L' = (F - 0.5L) / F}
#'
#' The final matching score \eqn{M} is calculated as:
-#' \deqn{M = L' \times \frac{1}{P \times K} * \frac{1}{U}}{M = L' * (1 / (P * K)) * (1 / U)}
+#' \deqn{M = L' \times \frac{1}{P K U} = \frac{F - 0.5L}{F P K U}}{M = L' * (1 / (P * K * U)) = (F - 0.5L) / (F * P * K * U)}
#'
#' @export
#' @examples
@@ -55,9 +55,18 @@ mo_matching_score <- function(x, fullname, uncertainty = 1) {
levenshtein[i] <- min(as.double(utils::adist(x[i], fullname[i], ignore.case = FALSE)),
nchar(fullname[i]))
}
- # self-made score between 0 and 1 (for % certainty, so 0 means huge distance, 1 means no distance)
- dist <- (nchar(fullname) - 0.5 * levenshtein) / nchar(fullname)
- prevalence_kingdom_index <- tryCatch(MO_lookup[match(fullname, MO_lookup$fullname), "prevalence_kingdom_index", drop = TRUE],
- error = function(e) rep(1, length(fullname)))
- dist * (1 / prevalence_kingdom_index) * (1 / uncertainty)
+
+ # F = length of fullname
+ var_F <- nchar(fullname)
+ # L = modified Levenshtein distance
+ var_L <- levenshtein
+ # P = Prevalence (1 to 3)
+ var_P <- MO_lookup[match(fullname, MO_lookup$fullname), "prevalence", drop = TRUE]
+ # K = kingdom index (Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5)
+ var_K <- MO_lookup[match(fullname, MO_lookup$fullname), "kingdom_index", drop = TRUE]
+ # U = uncertainty level (1 to 3), as per as.mo()
+ var_U <- uncertainty
+
+ # matching score:
+ (var_F - 0.5 * L) / (var_F * var_P * var_K * var_U)
}
diff --git a/R/rsi_calc.R b/R/rsi_calc.R
index 3363137a..237f46a6 100755
--- a/R/rsi_calc.R
+++ b/R/rsi_calc.R
@@ -268,7 +268,7 @@ rsi_calc_df <- function(type, # "proportion", "count" or "both"
res <- do.call(rbind, unname(lapply(grouped, fn, ...)))
if (any(groups %in% colnames(res))) {
class(res) <- c("grouped_data", class(res))
- res <- set_groups(res, groups[groups %in% colnames(res)])
+ res <- pm_set_groups(res, groups[groups %in% colnames(res)])
}
res
}
diff --git a/R/zzz.R b/R/zzz.R
index 75f748cc..8f25e441 100755
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -84,8 +84,6 @@ create_MO_lookup <- function() {
# all the rest
MO_lookup[which(is.na(MO_lookup$kingdom_index)), "kingdom_index"] <- 5
- MO_lookup$prevalence_kingdom_index <- MO_lookup$prevalence * MO_lookup$kingdom_index
-
# use this paste instead of `fullname` to work with Viridans Group Streptococci, etc.
MO_lookup$fullname_lower <- tolower(trimws(paste(MO_lookup$genus,
MO_lookup$species,
diff --git a/data-raw/reproduction_of_poorman.R b/data-raw/reproduction_of_poorman.R
index 1ba5f2f0..b5ec9fc0 100644
--- a/data-raw/reproduction_of_poorman.R
+++ b/data-raw/reproduction_of_poorman.R
@@ -7,9 +7,11 @@ files <- xml2::read_html(paste0("https://github.com/nathaneastwood/poorman/tree/
rvest::html_attr("href")
# get full URLs of all raw R files
-files <- paste0("https://raw.githubusercontent.com", gsub("blob/", "", files[files %like% "/R/.*.R$"]))
+files <- sort(paste0("https://raw.githubusercontent.com", gsub("blob/", "", files[files %like% "/R/.*.R$"])))
# remove files with only pkg specific code
-files <- files[!files %like% "(zzz.R|init.R)"]
+files <- files[!files %like% "(zzz|init)[.]R$"]
+# also, there's a lot of functions we don't use
+files <- files[!files %like% "(slice|glimpse|recode|replace_na|coalesce)[.]R$"]
# add our prepend file, containing info about the source of the data
intro <- readLines("data-raw/poorman_prepend.R")
@@ -68,8 +70,6 @@ contents <- gsub("context", "pm_context", contents, fixed = TRUE)
contents <- gsub("(pm_)+", "pm_", contents)
# special case for pm_distinct(), we need '.keep_all' to work
contents <- gsub("pm_distinct <- function(.data, ..., .keep_all = FALSE)", "pm_distinct <- function(.data, ...)", contents, fixed = TRUE)
-# removes unnecessary calls to package
-contents <- gsub("poorman::", "AMR:::", contents, fixed = TRUE)
# who needs US spelling?
contents <- contents[!grepl("summarize", contents)]
diff --git a/docs/404.html b/docs/404.html
index b623613b..09119032 100644
--- a/docs/404.html
+++ b/docs/404.html
@@ -81,7 +81,7 @@
NEWS.md
-
Note: some changes in this version were suggested by anonymous reviewers from the journal we submitted our manuscipt to. We are those reviewers very grateful for going through our code so thoroughly!
The Levenshtein distance \(L\) is the distance between the user input and all taxonomic full names, with the text length of the user input being the maximum distance. A modified version of the Levenshtein distance \(L'\) based on the text length of the full name \(F\) is calculated as:
$$L' = F - \frac{0.5 \times L}{F}$$
+$$L' = F - \frac{0.5L}{F}$$
The final matching score \(M\) is calculated as: -$$M = L' \times \frac{1}{P \times K} * \frac{1}{U}$$
+$$M = L' \times \frac{1}{P K U} = \frac{F - 0.5L}{F P K U}$$All matches are sorted descending on their matching score and for all user input values, the top match will be returned.
The Levenshtein distance \(L\) is the distance between the user input and all taxonomic full names, with the text length of the user input being the maximum distance. A modified version of the Levenshtein distance \(L'\) based on the text length of the full name \(F\) is calculated as:
$$L' = F - \frac{0.5 \times L}{F}$$
+$$L' = F - \frac{0.5L}{F}$$
The final matching score \(M\) is calculated as: -$$M = L' \times \frac{1}{P \times K} * \frac{1}{U}$$
+$$M = L' \times \frac{1}{P K U} = \frac{F - 0.5L}{F P K U}$$as.mo("E. coli") diff --git a/docs/survey.html b/docs/survey.html index 71a65401..49171f41 100644 --- a/docs/survey.html +++ b/docs/survey.html @@ -81,7 +81,7 @@