diff --git a/.github/workflows/lintr.yaml b/.github/workflows/lintr.yaml index 82bb69ee5..7931193bd 100644 --- a/.github/workflows/lintr.yaml +++ b/.github/workflows/lintr.yaml @@ -62,5 +62,5 @@ jobs: shell: Rscript {0} - name: Lint - run: lintr::lint_package(linters = lintr::with_defaults(line_length_linter = NULL, trailing_whitespace_linter = NULL, object_name_linter = NULL, cyclocomp_linter = NULL, object_usage_linter = NULL, object_length_linter = lintr::object_length_linter(length = 50L)), exclusions = list("R/aa_helper_functions_dplyr.R")) + run: lintr::lint_package(linters = lintr::with_defaults(line_length_linter = NULL, trailing_whitespace_linter = NULL, object_name_linter = NULL, cyclocomp_linter = NULL, object_usage_linter = NULL, object_length_linter = lintr::object_length_linter(length = 50L)), exclusions = list("R/aa_helper_pm_functions.R")) shell: Rscript {0} diff --git a/DESCRIPTION b/DESCRIPTION index e9af8e61c..b8a09a254 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 1.3.0.9022 -Date: 2020-09-18 +Version: 1.3.0.9023 +Date: 2020-09-19 Title: Antimicrobial Resistance Analysis Authors@R: c( person(role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index 3c6dacfbe..3205a48c1 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,5 @@ -# AMR 1.3.0.9022 -## Last updated: 18 September 2020 +# AMR 1.3.0.9023 +## Last updated: 19 September 2020 Note: some changes in this version were suggested by anonymous reviewers from the journal we submitted our manuscipt to. We are those reviewers very grateful for going through our code so thoroughly! diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R index f21d733af..20881558f 100755 --- a/R/aa_helper_functions.R +++ b/R/aa_helper_functions.R @@ -26,7 +26,7 @@ pm_left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { if (is.na(by)) { stop_("no common column found for pm_left_join()") } - join_message(by) + pm_join_message(by) } else if (!is.null(names(by))) { by <- unname(c(names(by), by)) } diff --git a/R/aa_helper_pm_functions.R b/R/aa_helper_pm_functions.R index 50556b9a0..ef9d54a15 100644 --- a/R/aa_helper_pm_functions.R +++ b/R/aa_helper_pm_functions.R @@ -36,7 +36,7 @@ # distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software # is furnished to do so', given that a copyright notice is given in the software. # -# Copyright notice on 18 September 2020, the day this code was downloaded, as found on +# Copyright notice on 19 September 2020, the day this code was downloaded, as found on # https://github.com/nathaneastwood/poorman/blob/52eb6947e0b4430cd588976ed8820013eddf955f/LICENSE: # YEAR: 2020 # COPYRIGHT HOLDER: Nathan Eastwood @@ -67,28 +67,6 @@ pm_between <- function(x, left, right) { if (!is.double(x)) x <- as.numeric(x) x >= as.numeric(left) & x <= as.numeric(right) } -pm_coalesce <- function(...) { - if (missing(..1)) stop("At least one argument must be supplied.") - - vectors <- list(...) - vectors_lens <- unique(lengths(vectors)) - if (length(vectors_lens) > 2L || (length(vectors_lens) == 2L & !1 %in% vectors_lens)) { - stop("Vectors must all be of length 1 and/or pm_n") - } - max_len <- max(vectors_lens) - - len_one <- lengths(vectors) == 1L - vectors[len_one] <- lapply(vectors[len_one], function(x) rep(x, max_len)) - - x <- vectors[[1]] - vectors <- vectors[-1] - - for (i in seq_along(vectors)) { - x_miss <- is.na(x) - x[x_miss] <- vectors[[i]][x_miss] - } - x -} pm_context <- new.env() # Data @@ -288,23 +266,6 @@ pm_filter.grouped_data <- function(.data, ...) { res <- pm_apply_grouped_function("pm_filter", .data, drop = TRUE, ...) res[rows[rows %in% rownames(res)], ] } -pm_glimpse <- function(x, width = getOption("width"), ...) { - if ("grouped_data" %in% class(.data)) { - pm_glimpse.grouped_data(.data, ...) - } else { - pm_glimpse.default(.data, ...) - } -} - -pm_glimpse.default <- function (x, width = getOption("width"), max.level = 3, ...) { - utils::str(x, width = width, max.level = max.level, ...) - invisible(x) -} - -pm_glimpse.data.frame <- function(x, width = getOption("width"), ...) { - utils::str(x, width = width, ...) - invisible(x) -} pm_group_by <- function(.data, ..., .add = FALSE) { pm_check_is_dataframe(.data) pre_groups <- pm_get_groups(.data) @@ -481,6 +442,27 @@ pm_if_else <- function(condition, true, false, missing = NULL) { res } +pm_anti_join <- function(x, y, by = NULL) { + pm_filter_join_worker(x, y, by, type = "anti") +} + +pm_semi_join <- function(x, y, by = NULL) { + pm_filter_join_worker(x, y, by, type = "semi") +} + +pm_filter_join_worker <- function(x, y, by = NULL, type = c("anti", "semi")) { + type <- match.arg(type, choices = c("anti", "semi"), several.ok = FALSE) + if (is.null(by)) { + by <- intersect(names(x), names(y)) + pm_join_message(by) + } + rows <- interaction(x[, by]) %in% interaction(y[, by]) + if (type == "anti") rows <- !rows + res <- x[rows, , drop = FALSE] + rownames(res) <- NULL + res +} + pm_inner_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { pm_join_worker(x = x, y = y, by = by, suffix = suffix, sort = FALSE) } @@ -520,27 +502,6 @@ pm_join_message <- function(by) { message("Joining, by = \"", by, "\"\n", sep = "") } } - -pm_anti_join <- function(x, y, by = NULL) { - pm_filter_join_worker(x, y, by, type = "anti") -} - -pm_semi_join <- function(x, y, by = NULL) { - pm_filter_join_worker(x, y, by, type = "semi") -} - -pm_filter_join_worker <- function(x, y, by = NULL, type = c("anti", "semi")) { - type <- match.arg(type, choices = c("anti", "semi"), several.ok = FALSE) - if (is.null(by)) { - by <- intersect(names(x), names(y)) - pm_join_message(by) - } - rows <- interaction(x[, by]) %in% interaction(y[, by]) - if (type == "anti") rows <- !rows - res <- x[rows,, drop = FALSE] - rownames(res) <- NULL - res -} pm_lag <- function(x, pm_n = 1L, default = NA) { if (inherits(x, "ts")) stop("`x` must be a vector, not a `ts` object, do you want `stats::pm_lag()`?") if (length(pm_n) != 1L || !is.numeric(pm_n) || pm_n < 0L) stop("`pm_n` must be a nonnegative integer scalar") @@ -630,163 +591,6 @@ pm_pull <- function(.data, var = -1) { } .data[, var] } -pm_recode <- function(.x, ..., .default = NULL, .missing = NULL) { - if ("grouped_data" %in% class(.data)) { - pm_recode.grouped_data(.data, ...) - } else { - pm_recode.default(.data, ...) - } -} - -pm_recode.numeric <- function(.x, ..., .default = NULL, .missing = NULL) { - values <- pm_dotdotdot(...) - - nms <- pm_have_name(values) - if (all(nms)) { - vals <- as.double(names(values)) - } else if (all(!nms)) { - vals <- seq_along(values) - } else { - stop("Either all values must be named, or none must be named.") - } - - pm_n <- length(.x) - template <- pm_find_template(values, .default, .missing) - res <- template[rep(NA_integer_, pm_n)] - replaced <- rep(FALSE, pm_n) - - for (i in seq_along(values)) { - res <- pm_replace_with(res, .x == vals[i], values[[i]], paste0("Vector ", i)) - replaced[.x == vals[i]] <- TRUE - } - - .default <- pm_validate_recode_default(.default, .x, res, replaced) - res <- pm_replace_with(res, !replaced & !is.na(.x), .default, "`.default`") - res <- pm_replace_with(res, is.na(.x), .missing, "`.missing`") - res -} - -pm_recode.character <- function(.x, ..., .default = NULL, .missing = NULL) { - .x <- as.character(.x) - values <- pm_dotdotdot(...) - val_names <- names(values) - have_names <- pm_have_name(values) - if (!all(have_names)) { - bad <- which(!have_names) + 1L - stop("Argument", if (length(bad) > 1L) "s", " ", paste(bad, sep = ", "), " must be named, not unnamed.") - } - - pm_n <- length(.x) - template <- pm_find_template(values, .default, .missing) - res <- template[rep(NA_integer_, pm_n)] - replaced <- rep(FALSE, pm_n) - - for (nm in val_names) { - res <- pm_replace_with(res, .x == nm, values[[nm]], paste0("`", nm, "`")) - replaced[.x == nm] <- TRUE - } - - .default <- pm_validate_recode_default(.default, .x, res, replaced) - res <- pm_replace_with(res, !replaced & !is.na(.x), .default, "`.default`") - res <- pm_replace_with(res, is.na(.x), .missing, "`.missing`") - res -} - -pm_recode.factor <- function(.x, ..., .default = NULL, .missing = NULL) { - values <- pm_dotdotdot(...) - if (length(values) == 0) stop("No replacements provided.") - - have_names <- pm_have_name(values) - if (!all(have_names)) { - bad <- which(!have_names) + 1 - stop(bad, " must be named, not unnamed.") - } - if (!is.null(.missing)) { - stop("`.missing` is not supported for factors.") - } - - pm_n <- length(levels(.x)) - template <- pm_find_template(values, .default, .missing) - res <- template[rep(NA_integer_, pm_n)] - replaced <- rep(FALSE, pm_n) - - for (nm in names(values)) { - res <- pm_replace_with(res, levels(.x) == nm, values[[nm]], paste0("`", nm, "`")) - replaced[levels(.x) == nm] <- TRUE - } - .default <- pm_validate_recode_default(.default, .x, res, replaced) - res <- pm_replace_with(res, !replaced, .default, "`.default`") - - if (is.character(res)) { - levels(.x) <- res - .x - } else { - res[as.integer(.x)] - } -} - -pm_have_name <- function(x) { - nms <- names(x) - if (is.null(nms)) rep(FALSE, length(x)) else !(nms == "" | is.na(nms)) -} - -pm_compact <- function(.x) Filter(length, .x) - -pm_find_template <- function(values, .default = NULL, .missing = NULL) { - x <- pm_compact(c(values, .default, .missing)) - if (length(x) == 0L) { - stop("No replacements provided.") - } - x[[1]] -} - -pm_validate_recode_default <- function(default, x, res, replaced) { - default <- pm_recode_default(x, default, res) - if (is.null(default) && sum(replaced & !is.na(x)) < length(res[!is.na(x)])) { - warning( - "Unreplaced values treated as NA as .x is not compatible. ", - "Please specify replacements exhaustively or supply .default", - call. = FALSE - ) - } - default -} - -pm_recode_default <- function(x, default, res) { - if ("grouped_data" %in% class(.data)) { - pm_recode_default.grouped_data(.data, ...) - } else { - pm_recode_default.default(.data, ...) - } -} - -pm_recode_default.default <- function(x, default, res) { - same_type <- identical(typeof(x), typeof(res)) - if (is.null(default) && same_type) x else default -} - -pm_recode_default.factor <- function(x, default, res) { - if (is.null(default)) { - if ((is.character(res) || is.factor(res)) && is.factor(x)) { - levels(x) - } else { - res[NA_integer_] - } - } else { - default - } -} - -pm_recode_factor <- function(.x, ..., .default = NULL, .missing = NULL, .ordered = FALSE) { - recoded <- pm_recode(.x, ..., .default = .default, .missing = .missing) - - values <- pm_dotdotdot(...) - all_levels <- unique(c(values, pm_recode_default(.x, .default, recoded), .missing)) - recoded_levels <- if (is.factor(recoded)) levels(recoded) else unique(recoded) - levels <- intersect(all_levels, recoded_levels) - - factor(recoded, levels, ordered = .ordered) -} pm_set_names <- function(object = nm, nm) { names(object) <- nm object @@ -860,35 +664,6 @@ pm_rename_with <- function(.data, .fn, .cols = pm_everything(), ...) { if (grouped) .data <- pm_set_groups(.data, colnames(.data)[grp_pos]) .data } -pm_replace_na <- function(data, replace, ...) { - if ("grouped_data" %in% class(.data)) { - pm_replace_na.grouped_data(.data, ...) - } else { - pm_replace_na.default(.data, ...) - } -} - -pm_replace_na.default <- function(data, replace = NA, ...) { - pm_check_replacement(replace, deparse(substitute(data))) - data[is.na(data)] <- replace - data -} - -pm_replace_na.data.frame <- function(data, replace = list(), ...) { - stopifnot(is.list(replace)) - replace_vars <- intersect(names(replace), names(data)) - for (var in replace_vars) { - pm_check_replacement(replace[[var]], var) - data[[var]][is.na(data[[var]])] <- replace[[var]] - } - data -} - -pm_check_replacement <- function(x, var) { - pm_n <- length(x) - if (pm_n == 1L) return() - stop("Replacement for `", var, "` is length ", pm_n, ", not length 1") -} pm_replace_with <- function(x, i, val, arg_name) { if (is.null(val)) return(x) pm_check_length(val, x, arg_name) @@ -937,20 +712,6 @@ pm_rownames_to_column <- function(.data, var = "rowname") { rownames(.data) <- NULL .data[, c(var, setdiff(col_names, var))] } - -pm_select <- function(.data, ...) { - col_pos <- pm_select_positions(.data, ..., .group_pos = TRUE) - map_names <- names(col_pos) - map_names_length <- nchar(map_names) - if (any(map_names_length == 0L)) { - no_new_names <- which(map_names_length == 0L) - map_names[no_new_names] <- colnames(.data)[no_new_names] - } - res <- .data[, col_pos, drop = FALSE] - if (!is.null(map_names) && all(col_pos > 0L)) colnames(res) <- map_names - if (pm_has_groups(.data)) res <- pm_set_groups(res, pm_get_groups(.data)) - res -} pm_starts_with <- function(match, ignore.case = TRUE, vars = pm_peek_vars()) { grep(pattern = paste0("^", paste0(match, collapse = "|^")), x = vars, ignore.case = ignore.case) } @@ -1161,243 +922,18 @@ pm_select_env$get_colnames <- function() colnames(pm_select_env$.data) pm_select_env$get_nrow <- function() nrow(pm_select_env$.data) pm_select_env$get_ncol <- function() ncol(pm_select_env$.data) -pm_slice <- function(.data, ...) { - pm_check_is_dataframe(.data) - if ("grouped_data" %in% class(.data)) { - pm_slice.grouped_data(.data, ...) - } else { - pm_slice.default(.data, ...) +pm_select <- function(.data, ...) { + col_pos <- pm_select_positions(.data, ..., .group_pos = TRUE) + map_names <- names(col_pos) + map_names_length <- nchar(map_names) + if (any(map_names_length == 0L)) { + no_new_names <- which(map_names_length == 0L) + map_names[no_new_names] <- colnames(.data)[no_new_names] } -} - -pm_slice.data.frame <- function(.data, ...) { - if (nrow(.data) == 0L) return(.data) - pos <- pm_slice_positions(.data, ...) - .data[pos, , drop = FALSE] -} - -pm_slice.grouped_data <- function(.data, ...) { - pm_apply_grouped_function("pm_slice", .data, drop = TRUE, ...) -} - - -pm_slice_head <- function(.data, ..., pm_n, prop) { - if ("grouped_data" %in% class(.data)) { - pm_slice_head.grouped_data(.data, ...) - } else { - pm_slice_head.default(.data, ...) - } -} - -pm_slice_head.data.frame <- function(.data, ..., pm_n, prop) { - size <- pm_check_slice_size(pm_n, prop) - idx <- switch( - size$type, - pm_n = function(pm_n) seq2(1, min(size$pm_n, pm_n)), - prop = function(pm_n) seq2(1, min(size$prop * pm_n, pm_n)) - ) - pm_slice(.data, idx(AMR:::pm_n())) -} - -pm_slice_head.grouped_data <- function(.data, ..., pm_n, prop) { - pm_apply_grouped_function("pm_slice_head", .data, drop = TRUE, pm_n = pm_n, prop = prop, ...) -} - -pm_slice_tail <- function(.data, ..., pm_n, prop) { - if ("grouped_data" %in% class(.data)) { - pm_slice_tail.grouped_data(.data, ...) - } else { - pm_slice_tail.default(.data, ...) - } -} - -pm_slice_tail.data.frame <- function(.data, ..., pm_n, prop) { - size <- pm_check_slice_size(pm_n, prop) - idx <- switch( - size$type, - pm_n = function(pm_n) seq2(max(pm_n - size$pm_n + 1, 1), pm_n), - prop = function(pm_n) seq2(max(ceiling(pm_n - size$prop * pm_n) + 1, 1), pm_n) - ) - pm_slice(.data, idx(AMR:::pm_n())) -} - -pm_slice_tail.grouped_data <- function(.data, ..., pm_n, prop) { - pm_apply_grouped_function("pm_slice_tail", .data, drop = TRUE, pm_n = pm_n, prop = prop, ...) -} - -pm_slice_min <- function(.data, order_by, ..., pm_n, prop, with_ties = TRUE) { - if ("grouped_data" %in% class(.data)) { - pm_slice_min.grouped_data(.data, ...) - } else { - pm_slice_min.default(.data, ...) - } -} - -pm_slice_min.data.frame <- function(.data, order_by, ..., pm_n, prop, with_ties = TRUE) { - if (missing(order_by)) stop("argument `order_by` is missing, with no default.") - - size <- pm_check_slice_size(pm_n, prop) - idx <- if (isTRUE(with_ties)) { - switch( - size$type, - pm_n = function(x, pm_n) pm_vec_head(order(x), pm_smaller_ranks(x, size$pm_n)), - prop = function(x, pm_n) pm_vec_head(order(x), pm_smaller_ranks(x, size$prop * pm_n)) - ) - } else { - switch( - size$type, - pm_n = function(x, pm_n) pm_vec_head(order(x), size$pm_n), - prop = function(x, pm_n) pm_vec_head(order(x), size$prop * pm_n) - ) - } - order_by <- .data[, pm_deparse_var(order_by)] - pm_slice(.data, idx(order_by, AMR:::pm_n())) -} - -pm_slice_min.grouped_data <- function(.data, order_by, ..., pm_n, prop, with_ties = TRUE) { - pm_eval_env$env <- environment() - on.exit(rm(list = "env", envir = pm_eval_env), add = TRUE) - pm_apply_grouped_function( - "pm_slice_min", .data, drop = TRUE, order_by = order_by, pm_n = pm_n, prop = prop, with_ties = with_ties, ... - ) -} - -pm_slice_max <- function(.data, order_by, ..., pm_n, prop, with_ties = TRUE) { - if ("grouped_data" %in% class(.data)) { - pm_slice_max.grouped_data(.data, ...) - } else { - pm_slice_max.default(.data, ...) - } -} - -pm_slice_max.data.frame <- function(.data, order_by, ..., pm_n, prop, with_ties = TRUE) { - if (missing(order_by)) stop("argument `order_by` is missing, with no default.") - - size <- pm_check_slice_size(pm_n, prop) - idx <- if (isTRUE(with_ties)) { - switch( - size$type, - pm_n = function(x, pm_n) pm_vec_head(order(x, decreasing = TRUE), pm_smaller_ranks(pm_desc(x), size$pm_n)), - prop = function(x, pm_n) pm_vec_head(order(x, decreasing = TRUE), pm_smaller_ranks(pm_desc(x), size$prop * pm_n)) - ) - } else { - switch( - size$type, - pm_n = function(x, pm_n) pm_vec_head(order(x, decreasing = TRUE), size$pm_n), - prop = function(x, pm_n) pm_vec_head(order(x, decreasing = TRUE), size$prop * pm_n) - ) - } - order_by <- .data[, pm_deparse_var(order_by)] - pm_slice(.data, idx(order_by, AMR:::pm_n())) -} - -pm_slice_max.grouped_data <- function(.data, order_by, ..., pm_n, prop, with_ties = TRUE) { - pm_eval_env$env <- environment() - on.exit(rm(list = "env", envir = pm_eval_env), add = TRUE) - pm_apply_grouped_function( - "pm_slice_max", .data, drop = TRUE, order_by = order_by, pm_n = pm_n, prop = prop, with_ties = with_ties, ... - ) -} - -pm_slice_sample <- function(.data, ..., pm_n, prop, weight_by = NULL, replace = FALSE) { - if ("grouped_data" %in% class(.data)) { - pm_slice_sample.grouped_data(.data, ...) - } else { - pm_slice_sample.default(.data, ...) - } -} - -pm_slice_sample.data.frame <- function(.data, ..., pm_n, prop, weight_by = NULL, replace = FALSE) { - size <- pm_check_slice_size(pm_n, prop) - idx <- switch( - size$type, - pm_n = function(x, pm_n) pm_sample_int(pm_n, size$pm_n, replace = replace, wt = x), - prop = function(x, pm_n) pm_sample_int(pm_n, size$prop * pm_n, replace = replace, wt = x), - ) - weight_by <- pm_deparse_var(weight_by) - if (!is.null(weight_by)) weight_by <- .data[, weight_by] - pm_slice(.data, idx(weight_by, AMR:::pm_n())) -} - -pm_slice_sample.grouped_data <- function(.data, ..., pm_n, prop, weight_by = NULL, replace = FALSE) { - pm_eval_env$env <- environment() - on.exit(rm(list = "env", envir = pm_eval_env), add = TRUE) - pm_apply_grouped_function( - "pm_slice_sample", .data, drop = TRUE, pm_n = pm_n, prop = prop, weight_by = weight_by, replace = replace, ... - ) -} - -# helpers ---------------------------------------------------------------------- - -pm_slice_positions <- function(.data, ...) { - conditions <- pm_dotdotdot(...) - pm_context$setup(.data) - on.exit(pm_context$clean(), add = TRUE) - if (length(conditions) == 0L) return(seq_len(pm_n())) - - frame <- parent.frame(2L) - rows <- lapply( - conditions, - function(cond, frame) { - res <- eval(cond, pm_context$.data, frame) - if (is.logical(res) && all(is.na(res))) { - res <- integer() - } else if (is.numeric(res)) { - res <- as.integer(res) - } else if (!is.integer(res)) { - stop("`pm_slice()` expressions should return indices (positive or negative integers).") - } - }, - frame = frame - ) - rows <- do.call(c, rows) - if (length(rows) == 0L) { - # do nothing - } else if (all(rows >= 0, na.rm = TRUE)) { - rows <- rows[!is.na(rows) & rows <= pm_n() & rows > 0] - } else if (all(rows <= 0, na.rm = TRUE)) { - rows <- setdiff(seq_len(pm_n()), -rows) - } else { - stop("`pm_slice()` expressions should return either all positive or all negative.") - } - rows -} - -pm_check_slice_size <- function(pm_n, prop) { - if (missing(pm_n) && missing(prop)) { - list(type = "pm_n", pm_n = 1L) - } else if (!missing(pm_n) && missing(prop)) { - if (!is.numeric(pm_n) || length(pm_n) != 1) { - stop("`pm_n` must be a single number.") - } - if (is.na(pm_n) || pm_n < 0) { - stop("`pm_n` must be a non-missing positive number.") - } - - list(type = "pm_n", pm_n = pm_n) - } else if (!missing(prop) && missing(pm_n)) { - if (!is.numeric(prop) || length(prop) != 1) { - stop("`prop` must be a single number.") - } - if (is.na(prop) || prop < 0) { - stop("`prop` must be a non-missing positive number.") - } - list(type = "prop", prop = prop) - } else { - stop("Must supply exactly one of `pm_n` and `prop` arguments.") - } -} - -pm_sample_int <- function(pm_n, size, replace = FALSE, wt = NULL) { - if (isTRUE(replace)) { - sample.int(pm_n, size, prob = wt, replace = TRUE) - } else { - sample.int(pm_n, min(size, pm_n), prob = wt) - } -} - -pm_smaller_ranks <- function(x, y) { - sum(pm_min_rank(x) <= y, na.rm = TRUE) + res <- .data[, col_pos, drop = FALSE] + if (!is.null(map_names) && all(col_pos > 0L)) colnames(res) <- map_names + if (pm_has_groups(.data)) res <- pm_set_groups(res, pm_get_groups(.data)) + res } pm_summarise <- function(.data, ...) { pm_check_is_dataframe(.data) diff --git a/R/ab.R b/R/ab.R index 1bc1ea9d4..1c49e9d67 100755 --- a/R/ab.R +++ b/R/ab.R @@ -104,7 +104,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) { x_bak_clean <- generalise_antibiotic_name(x_bak_clean) } - x <- unique(x_bak_clean) + x <- unique(x_bak_clean) # this means that every x is in fact generalise_antibiotic_name(x) x_new <- rep(NA_character_, length(x)) x_unknown <- character(0) @@ -174,7 +174,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) { # exact LOINC code loinc_found <- unlist(lapply(AB_lookup$generalised_loinc, - function(s) generalise_antibiotic_name(x[i]) %in% s)) + function(s) x[i] %in% s)) found <- antibiotics$ab[loinc_found == TRUE] if (length(found) > 0) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) @@ -183,7 +183,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) { # exact synonym synonym_found <- unlist(lapply(AB_lookup$generalised_synonyms, - function(s) generalise_antibiotic_name(x[i]) %in% s)) + function(s) x[i] %in% s)) found <- antibiotics$ab[synonym_found == TRUE] if (length(found) > 0) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) @@ -192,7 +192,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) { # exact abbreviation abbr_found <- unlist(lapply(AB_lookup$generalised_abbreviations, - function(s) generalise_antibiotic_name(x[i]) %in% s)) + function(s) x[i] %in% s)) found <- antibiotics$ab[abbr_found == TRUE] if (length(found) > 0) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) @@ -244,7 +244,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) { # and try if any synonym starts with it synonym_found <- unlist(lapply(AB_lookup$generalised_synonyms, - function(s) any(generalise_antibiotic_name(s) %like% paste0("^", x_spelling)))) + function(s) any(s %like% paste0("^", x_spelling)))) found <- antibiotics$ab[synonym_found == TRUE] if (length(found) > 0) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) @@ -532,7 +532,7 @@ generalise_antibiotic_name <- function(x) { # remove part between brackets if that's followed by another string x <- gsub("(.*)+ [(].*[)]", "\\1", x) # keep only max 1 space - x <- trimws(gsub(" +", " ", x)) + x <- trimws2(gsub(" +", " ", x)) # non-character, space or number should be a slash x <- gsub("[^A-Z0-9 -]", "/", x) # spaces around non-characters must be removed: amox + clav -> amox/clav diff --git a/R/bug_drug_combinations.R b/R/bug_drug_combinations.R index 7feee2c8d..72faa7ef3 100644 --- a/R/bug_drug_combinations.R +++ b/R/bug_drug_combinations.R @@ -199,7 +199,7 @@ format.bug_drug_combinations <- function(x, select_ab_vars() %pm>% pm_arrange(ab_group, ab_txt) y <- y %pm>% - create_var(ab_group = ifelse(y$ab_group != lag(y$ab_group) | is.na(pm_lag(y$ab_group)), y$ab_group, "")) + create_var(ab_group = ifelse(y$ab_group != pm_lag(y$ab_group) | is.na(pm_lag(y$ab_group)), y$ab_group, "")) if (add_ab_group == FALSE) { y <- y %pm>% diff --git a/R/mo.R b/R/mo.R index 10e3c84f8..3cc6b72a5 100755 --- a/R/mo.R +++ b/R/mo.R @@ -109,10 +109,10 @@ #' 3. The level of uncertainty \eqn{U} needed to get to the result, as stated above (1 to 3); #' 4. The [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) \eqn{L} is the distance between the user input and all taxonomic full names, with the text length of the user input being the maximum distance. A modified version of the Levenshtein distance \eqn{L'} based on the text length of the full name \eqn{F} is calculated as: #' -#' \deqn{L' = F - \frac{0.5 \times L}{F}}{L' = F - (0.5 * L) / F} +#' \deqn{L' = F - \frac{0.5L}{F}}{L' = (F - 0.5L) / F} #' #' The final matching score \eqn{M} is calculated as: -#' \deqn{M = L' \times \frac{1}{P \times K} * \frac{1}{U}}{M = L' * (1 / (P * K)) * (1 / U)} +#' \deqn{M = L' \times \frac{1}{P K U} = \frac{F - 0.5L}{F P K U}}{M = L' * (1 / (P * K * U)) = (F - 0.5L) / (F * P * K * U)} #' #' All matches are sorted descending on their matching score and for all user input values, the top match will be returned. #' @inheritSection catalogue_of_life Catalogue of Life diff --git a/R/mo_matching_score.R b/R/mo_matching_score.R index 54e87ddfe..c7297c218 100755 --- a/R/mo_matching_score.R +++ b/R/mo_matching_score.R @@ -32,10 +32,10 @@ #' 3. The level of uncertainty \eqn{U} that is needed to get to a result (1 to 3, see [as.mo()]); #' 4. The [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) \eqn{L} is the distance between the user input and all taxonomic full names, with the text length of the user input being the maximum distance. A modified version of the Levenshtein distance \eqn{L'} based on the text length of the full name \eqn{F} is calculated as: #' -#' \deqn{L' = F - \frac{0.5 \times L}{F}}{L' = F - (0.5 * L) / F} +#' \deqn{L' = F - \frac{0.5L}{F}}{L' = (F - 0.5L) / F} #' #' The final matching score \eqn{M} is calculated as: -#' \deqn{M = L' \times \frac{1}{P \times K} * \frac{1}{U}}{M = L' * (1 / (P * K)) * (1 / U)} +#' \deqn{M = L' \times \frac{1}{P K U} = \frac{F - 0.5L}{F P K U}}{M = L' * (1 / (P * K * U)) = (F - 0.5L) / (F * P * K * U)} #' #' @export #' @examples @@ -55,9 +55,18 @@ mo_matching_score <- function(x, fullname, uncertainty = 1) { levenshtein[i] <- min(as.double(utils::adist(x[i], fullname[i], ignore.case = FALSE)), nchar(fullname[i])) } - # self-made score between 0 and 1 (for % certainty, so 0 means huge distance, 1 means no distance) - dist <- (nchar(fullname) - 0.5 * levenshtein) / nchar(fullname) - prevalence_kingdom_index <- tryCatch(MO_lookup[match(fullname, MO_lookup$fullname), "prevalence_kingdom_index", drop = TRUE], - error = function(e) rep(1, length(fullname))) - dist * (1 / prevalence_kingdom_index) * (1 / uncertainty) + + # F = length of fullname + var_F <- nchar(fullname) + # L = modified Levenshtein distance + var_L <- levenshtein + # P = Prevalence (1 to 3) + var_P <- MO_lookup[match(fullname, MO_lookup$fullname), "prevalence", drop = TRUE] + # K = kingdom index (Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5) + var_K <- MO_lookup[match(fullname, MO_lookup$fullname), "kingdom_index", drop = TRUE] + # U = uncertainty level (1 to 3), as per as.mo() + var_U <- uncertainty + + # matching score: + (var_F - 0.5 * L) / (var_F * var_P * var_K * var_U) } diff --git a/R/rsi_calc.R b/R/rsi_calc.R index 3363137ab..237f46a60 100755 --- a/R/rsi_calc.R +++ b/R/rsi_calc.R @@ -268,7 +268,7 @@ rsi_calc_df <- function(type, # "proportion", "count" or "both" res <- do.call(rbind, unname(lapply(grouped, fn, ...))) if (any(groups %in% colnames(res))) { class(res) <- c("grouped_data", class(res)) - res <- set_groups(res, groups[groups %in% colnames(res)]) + res <- pm_set_groups(res, groups[groups %in% colnames(res)]) } res } diff --git a/R/zzz.R b/R/zzz.R index 75f748cc3..8f25e441f 100755 --- a/R/zzz.R +++ b/R/zzz.R @@ -84,8 +84,6 @@ create_MO_lookup <- function() { # all the rest MO_lookup[which(is.na(MO_lookup$kingdom_index)), "kingdom_index"] <- 5 - MO_lookup$prevalence_kingdom_index <- MO_lookup$prevalence * MO_lookup$kingdom_index - # use this paste instead of `fullname` to work with Viridans Group Streptococci, etc. MO_lookup$fullname_lower <- tolower(trimws(paste(MO_lookup$genus, MO_lookup$species, diff --git a/data-raw/reproduction_of_poorman.R b/data-raw/reproduction_of_poorman.R index 1ba5f2f01..b5ec9fc03 100644 --- a/data-raw/reproduction_of_poorman.R +++ b/data-raw/reproduction_of_poorman.R @@ -7,9 +7,11 @@ files <- xml2::read_html(paste0("https://github.com/nathaneastwood/poorman/tree/ rvest::html_attr("href") # get full URLs of all raw R files -files <- paste0("https://raw.githubusercontent.com", gsub("blob/", "", files[files %like% "/R/.*.R$"])) +files <- sort(paste0("https://raw.githubusercontent.com", gsub("blob/", "", files[files %like% "/R/.*.R$"]))) # remove files with only pkg specific code -files <- files[!files %like% "(zzz.R|init.R)"] +files <- files[!files %like% "(zzz|init)[.]R$"] +# also, there's a lot of functions we don't use +files <- files[!files %like% "(slice|glimpse|recode|replace_na|coalesce)[.]R$"] # add our prepend file, containing info about the source of the data intro <- readLines("data-raw/poorman_prepend.R") @@ -68,8 +70,6 @@ contents <- gsub("context", "pm_context", contents, fixed = TRUE) contents <- gsub("(pm_)+", "pm_", contents) # special case for pm_distinct(), we need '.keep_all' to work contents <- gsub("pm_distinct <- function(.data, ..., .keep_all = FALSE)", "pm_distinct <- function(.data, ...)", contents, fixed = TRUE) -# removes unnecessary calls to package -contents <- gsub("poorman::", "AMR:::", contents, fixed = TRUE) # who needs US spelling? contents <- contents[!grepl("summarize", contents)] diff --git a/docs/404.html b/docs/404.html index b623613b4..091190327 100644 --- a/docs/404.html +++ b/docs/404.html @@ -81,7 +81,7 @@ AMR (for R) - 1.3.0.9022 + 1.3.0.9023 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 5faa964a5..dc284ceb7 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -81,7 +81,7 @@ AMR (for R) - 1.3.0.9022 + 1.3.0.9023 diff --git a/docs/articles/index.html b/docs/articles/index.html index a9667d597..0aaa5b9a0 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.3.0.9022 + 1.3.0.9023 diff --git a/docs/authors.html b/docs/authors.html index f8542b4e2..bacef9da2 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -81,7 +81,7 @@ AMR (for R) - 1.3.0.9022 + 1.3.0.9023 diff --git a/docs/index.html b/docs/index.html index a680feac3..070bf9794 100644 --- a/docs/index.html +++ b/docs/index.html @@ -43,7 +43,7 @@ AMR (for R) - 1.3.0.9022 + 1.3.0.9023 diff --git a/docs/news/index.html b/docs/news/index.html index 0d2584004..ba7f4760f 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.3.0.9022 + 1.3.0.9023 @@ -236,13 +236,13 @@ Source: NEWS.md -
-

-AMR 1.3.0.9022 Unreleased +
+

+AMR 1.3.0.9023 Unreleased

-
+

-Last updated: 18 September 2020 +Last updated: 19 September 2020

Note: some changes in this version were suggested by anonymous reviewers from the journal we submitted our manuscipt to. We are those reviewers very grateful for going through our code so thoroughly!

diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 9251612ed..860aaa8a3 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -2,7 +2,7 @@ pandoc: 2.7.3 pkgdown: 1.5.1.9000 pkgdown_sha: eae56f08694abebf93cdfc0dd8e9ede06d8c815f articles: [] -last_built: 2020-09-18T14:05Z +last_built: 2020-09-19T09:53Z urls: reference: https://msberends.github.io/AMR/reference article: https://msberends.github.io/AMR/articles diff --git a/docs/reference/as.mo.html b/docs/reference/as.mo.html index e8c2f31bf..6f74726b2 100644 --- a/docs/reference/as.mo.html +++ b/docs/reference/as.mo.html @@ -82,7 +82,7 @@ AMR (for R) - 1.3.0.9022 + 1.3.0.9023
@@ -376,9 +376,9 @@
  • The Levenshtein distance \(L\) is the distance between the user input and all taxonomic full names, with the text length of the user input being the maximum distance. A modified version of the Levenshtein distance \(L'\) based on the text length of the full name \(F\) is calculated as:

  • -

    $$L' = F - \frac{0.5 \times L}{F}$$

    +

    $$L' = F - \frac{0.5L}{F}$$

    The final matching score \(M\) is calculated as: -$$M = L' \times \frac{1}{P \times K} * \frac{1}{U}$$

    +$$M = L' \times \frac{1}{P K U} = \frac{F - 0.5L}{F P K U}$$

    All matches are sorted descending on their matching score and for all user input values, the top match will be returned.

    Source

    diff --git a/docs/reference/index.html b/docs/reference/index.html index 8e7cc07b3..79a1c00d7 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.3.0.9022 + 1.3.0.9023
    diff --git a/docs/reference/mo_matching_score.html b/docs/reference/mo_matching_score.html index 302a24462..825cb1bb4 100644 --- a/docs/reference/mo_matching_score.html +++ b/docs/reference/mo_matching_score.html @@ -82,7 +82,7 @@ AMR (for R) - 1.3.0.9022 + 1.3.0.9023
    @@ -270,9 +270,9 @@
  • The Levenshtein distance \(L\) is the distance between the user input and all taxonomic full names, with the text length of the user input being the maximum distance. A modified version of the Levenshtein distance \(L'\) based on the text length of the full name \(F\) is calculated as:

  • -

    $$L' = F - \frac{0.5 \times L}{F}$$

    +

    $$L' = F - \frac{0.5L}{F}$$

    The final matching score \(M\) is calculated as: -$$M = L' \times \frac{1}{P \times K} * \frac{1}{U}$$

    +$$M = L' \times \frac{1}{P K U} = \frac{F - 0.5L}{F P K U}$$

    Examples

    as.mo("E. coli")
    diff --git a/docs/survey.html b/docs/survey.html
    index 71a654017..49171f418 100644
    --- a/docs/survey.html
    +++ b/docs/survey.html
    @@ -81,7 +81,7 @@
           
           
             AMR (for R)
    -        1.3.0.9022
    +        1.3.0.9023
           
         
    diff --git a/man/as.mo.Rd b/man/as.mo.Rd index 2db1e728c..8552188be 100644 --- a/man/as.mo.Rd +++ b/man/as.mo.Rd @@ -136,10 +136,10 @@ With ambiguous user input, the returned results are chosen based on their matchi \item The \href{https://en.wikipedia.org/wiki/Levenshtein_distance}{Levenshtein distance} \eqn{L} is the distance between the user input and all taxonomic full names, with the text length of the user input being the maximum distance. A modified version of the Levenshtein distance \eqn{L'} based on the text length of the full name \eqn{F} is calculated as: } -\deqn{L' = F - \frac{0.5 \times L}{F}}{L' = F - (0.5 * L) / F} +\deqn{L' = F - \frac{0.5L}{F}}{L' = (F - 0.5L) / F} The final matching score \eqn{M} is calculated as: -\deqn{M = L' \times \frac{1}{P \times K} * \frac{1}{U}}{M = L' * (1 / (P * K)) * (1 / U)} +\deqn{M = L' \times \frac{1}{P K U} = \frac{F - 0.5L}{F P K U}}{M = L' * (1 / (P * K * U)) = (F - 0.5L) / (F * P * K * U)} All matches are sorted descending on their matching score and for all user input values, the top match will be returned. } diff --git a/man/mo_matching_score.Rd b/man/mo_matching_score.Rd index 67d9d95cb..9eda65e26 100644 --- a/man/mo_matching_score.Rd +++ b/man/mo_matching_score.Rd @@ -25,10 +25,10 @@ The matching score is based on four parameters: \item The \href{https://en.wikipedia.org/wiki/Levenshtein_distance}{Levenshtein distance} \eqn{L} is the distance between the user input and all taxonomic full names, with the text length of the user input being the maximum distance. A modified version of the Levenshtein distance \eqn{L'} based on the text length of the full name \eqn{F} is calculated as: } -\deqn{L' = F - \frac{0.5 \times L}{F}}{L' = F - (0.5 * L) / F} +\deqn{L' = F - \frac{0.5L}{F}}{L' = (F - 0.5L) / F} The final matching score \eqn{M} is calculated as: -\deqn{M = L' \times \frac{1}{P \times K} * \frac{1}{U}}{M = L' * (1 / (P * K)) * (1 / U)} +\deqn{M = L' \times \frac{1}{P K U} = \frac{F - 0.5L}{F P K U}}{M = L' * (1 / (P * K * U)) = (F - 0.5L) / (F * P * K * U)} } \examples{ as.mo("E. coli") diff --git a/tests/testthat.R b/tests/testthat.R index 671ba9e79..a1bf37359 100755 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -23,4 +23,3 @@ library(testthat) library(AMR) test_check("AMR") -unlink("tests/testthat/Rplots.pdf") diff --git a/tests/testthat/test-_all_examples.R b/tests/testthat/test-_all_examples.R index 6997d3db5..d0c3c0bdc 100755 --- a/tests/testthat/test-_all_examples.R +++ b/tests/testthat/test-_all_examples.R @@ -19,19 +19,3 @@ # Visit our website for more info: https://msberends.github.io/AMR. # # ==================================================================== # -# context("All examples") -# -# # run all examples (will take forever) -# exported_functions <- ls("package:AMR") -# -# for (i in seq_len(length(exported_functions))) { -# test_that(paste(exported_functions[i], "works"), { -# skip_on_cran() -# expect_output(suppressWarnings(example(exported_functions[i], -# package = "AMR", -# give.lines = TRUE, -# run.dontrun = TRUE, -# run.donttest = TRUE)), -# label = paste0("Examples of function ", exported_functions[i])) -# }) -# } diff --git a/tests/testthat/test-mic.R b/tests/testthat/test-mic.R index c96985708..f9b689056 100755 --- a/tests/testthat/test-mic.R +++ b/tests/testthat/test-mic.R @@ -38,10 +38,10 @@ test_that("mic works", { expect_warning(as.mic("INVALID VALUE")) - # print plots, should not raise errors - barplot(as.mic(c(1, 2, 4, 8))) - plot(as.mic(c(1, 2, 4, 8))) - print(as.mic(c(1, 2, 4, 8))) + # print plots + expect_success(x <- barplot(as.mic(c(1, 2, 4, 8)))) + expect_success(x <- plot(as.mic(c(1, 2, 4, 8)))) + expect_success(x <- print(as.mic(c(1, 2, 4, 8)))) expect_equal(summary(as.mic(c(2, 8))), structure(c("Class" = "mic", diff --git a/tests/testthat/test-resistance_predict.R b/tests/testthat/test-resistance_predict.R index e66df7af1..01ed43de1 100644 --- a/tests/testthat/test-resistance_predict.R +++ b/tests/testthat/test-resistance_predict.R @@ -35,8 +35,8 @@ test_that("prediction of rsi works", { expect_true(AMX_R[3] < AMX_R[20]) x <- resistance_predict(example_isolates, col_ab = "AMX", year_min = 2010, model = "binomial") - plot(x) - ggplot_rsi_predict(x) + expect_success(y <- plot(x)) + expect_success(y <- ggplot_rsi_predict(x)) expect_error(ggplot_rsi_predict(example_isolates)) library(dplyr) diff --git a/tests/testthat/test-rsi.R b/tests/testthat/test-rsi.R index e1caf561f..5a3169669 100644 --- a/tests/testthat/test-rsi.R +++ b/tests/testthat/test-rsi.R @@ -27,14 +27,14 @@ test_that("rsi works", { expect_true(as.rsi("S") < as.rsi("I")) expect_true(as.rsi("I") < as.rsi("R")) expect_true(is.rsi(as.rsi("S"))) - + # print plots, should not raise errors - barplot(as.rsi(c("S", "I", "R"))) - plot(as.rsi(c("S", "I", "R"))) - print(as.rsi(c("S", "I", "R"))) + expect_success(x <- barplot(as.rsi(c("S", "I", "R")))) + expect_success(x <- plot(as.rsi(c("S", "I", "R")))) + expect_success(x <- print(as.rsi(c("S", "I", "R")))) expect_equal(as.character(as.rsi(c(1:3))), c("S", "I", "R")) - + expect_equal(suppressWarnings(as.logical(as.rsi("INVALID VALUE"))), NA) expect_equal(summary(as.rsi(c("S", "R"))),