diff --git a/DESCRIPTION b/DESCRIPTION index d122f0d14..e9af8e61c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 1.3.0.9021 -Date: 2020-09-14 +Version: 1.3.0.9022 +Date: 2020-09-18 Title: Antimicrobial Resistance Analysis Authors@R: c( person(role = c("aut", "cre"), diff --git a/NAMESPACE b/NAMESPACE index d189314fa..7d68359ed 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -165,6 +165,7 @@ export(mo_genus) export(mo_gramstain) export(mo_info) export(mo_kingdom) +export(mo_matching_score) export(mo_name) export(mo_order) export(mo_phylum) diff --git a/NEWS.md b/NEWS.md index 35fafb448..3c6dacfbe 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,5 @@ -# AMR 1.3.0.9021 -## Last updated: 14 September 2020 +# AMR 1.3.0.9022 +## Last updated: 18 September 2020 Note: some changes in this version were suggested by anonymous reviewers from the journal we submitted our manuscipt to. We are those reviewers very grateful for going through our code so thoroughly! @@ -16,6 +16,7 @@ Note: some changes in this version were suggested by anonymous reviewers from th pull(microorganism) #> [1] "Enterococcus casseliflavus" "Enterococcus gallinarum" ``` +* Support for veterinary ATC codes ### Changed * Although advertised that this package should work under R 3.0.0, we still had a dependency on R 3.6.0. This is fixed, meaning that our package should now work under R 3.0.0. @@ -32,6 +33,7 @@ Note: some changes in this version were suggested by anonymous reviewers from th ``` * Big speed improvement for interpreting MIC values and disk zone diameters. When interpreting 5,000 MIC values of two antibiotics (10,000 values in total), our benchmarks showed a total run time going from 80.7-85.1 seconds to 1.8-2.0 seconds. * Added parameter 'add_intrinsic_resistance' (defaults to `FALSE`), that considers intrinsic resistance according to EUCAST + * Fixed a bug where in EUCAST rules the breakpoint for R would be interpreted as ">=" while this should have been "<" * Added intelligent data cleaning to `as.disk()`, so numbers can also be extracted from text and decimal numbers will always be rounded up: ```r as.disk(c("disk zone: 23.4 mm", 23.4)) @@ -39,7 +41,7 @@ Note: some changes in this version were suggested by anonymous reviewers from th #> [1] 24 24 ``` * Improvements for `as.mo()`: - * Any user input value that could mean more than one taxonomic entry is now considered 'uncertain'. Instead of a warning, a message will be thrown and the accompanying `mo_uncertainties()` has been changed completely; it now prints all possible candidates with their matching score. + * A completely new matching score for ambiguous user input, using `mo_matching_score()`. Any user input value that could mean more than one taxonomic entry is now considered 'uncertain'. Instead of a warning, a message will be thrown and the accompanying `mo_uncertainties()` has been changed completely; it now prints all possible candidates with their matching score. * Big speed improvement for already valid microorganism ID. This also means an significant speed improvement for using `mo_*` functions like `mo_name()` on microoganism IDs. * Added parameter `ignore_pattern` to `as.mo()` which can also be given to `mo_*` functions like `mo_name()`, to exclude known non-relevant input from analysing. This can also be set with the option `AMR_ignore_pattern`. * `get_locale()` now uses at default `Sys.getenv("LANG")` or, if `LANG` is not set, `Sys.getlocale()`. This can be overwritten by setting the option `AMR_locale`. @@ -50,6 +52,10 @@ Note: some changes in this version were suggested by anonymous reviewers from th * Added a feature from AMR 1.1.0 and earlier again, but now without other package dependencies: `tibble` printing support for classes ``, ``, ``, `` and ``. When using `tibble`s containing antimicrobial columns (class ``), "S" will print in green, "I" will print in yellow and "R" will print in red. Microbial IDs (class ``) will emphasise on the genus and species, not on the kingdom. * Names of antiviral agents in data set `antivirals` now have a starting capital letter, like it is the case in the `antibiotics` data set * Updated the documentation of the `WHONET` data set to clarify that all patient names are fictitious +* Small `as.ab()` algorithm improvements +* Fix for combining MIC values with raw numbers, i.e. `c(as.mic(2), 2)` previously failed but now returns a valid MIC class +* `ggplot_rsi()` and `geom_rsi()` gained parameters `minimum` and `language`, to influence the internal use of `rsi_df()` +* Added abbreviation "piptazo" to piperacillin/tazobactam (TZP) ### Other * Removed unnecessary references to the `base` package diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R index 8a8d0702b..f21d733af 100755 --- a/R/aa_helper_functions.R +++ b/R/aa_helper_functions.R @@ -19,41 +19,12 @@ # Visit our website for more info: https://msberends.github.io/AMR. # # ==================================================================== # -# functions from dplyr, will perhaps become poorman -distinct <- function(.data, ..., .keep_all = FALSE) { - check_is_dataframe(.data) - if ("grouped_data" %in% class(.data)) { - distinct.grouped_data(.data, ..., .keep_all = .keep_all) - } else { - distinct.default(.data, ..., .keep_all = .keep_all) - } -} -distinct.default <- function(.data, ..., .keep_all = FALSE) { - names <- rownames(.data) - rownames(.data) <- NULL - if (length(deparse_dots(...)) == 0) { - selected <- .data - } else { - selected <- select(.data, ...) - } - rows <- as.integer(rownames(unique(selected))) - if (isTRUE(.keep_all)) { - res <- .data[rows, , drop = FALSE] - } else { - res <- selected[rows, , drop = FALSE] - } - rownames(res) <- names[rows] - res -} -distinct.grouped_data <- function(.data, ..., .keep_all = FALSE) { - apply_grouped_function(.data, "distinct", ..., .keep_all = .keep_all) -} # faster implementation of left_join than using merge() by poorman - we use match(): -left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { +pm_left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { if (is.null(by)) { by <- intersect(names(x), names(y))[1L] if (is.na(by)) { - stop_("no common column found for left_join()") + stop_("no common column found for pm_left_join()") } join_message(by) } else if (!is.null(names(by))) { @@ -77,17 +48,28 @@ left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { rownames(merged) <- NULL merged } -filter_join_worker <- function(x, y, by = NULL, type = c("anti", "semi")) { - type <- match.arg(type, choices = c("anti", "semi"), several.ok = FALSE) - if (is.null(by)) { - by <- intersect(names(x), names(y)) - join_message(by) +# pm_filter_join_worker <- function(x, y, by = NULL, type = c("anti", "semi")) { +# type <- match.arg(type, choices = c("anti", "semi"), several.ok = FALSE) +# if (is.null(by)) { +# by <- intersect(names(x), names(y)) +# join_message(by) +# } +# rows <- interaction(x[, by]) %in% interaction(y[, by]) +# if (type == "anti") rows <- !rows +# res <- x[rows, , drop = FALSE] +# rownames(res) <- NULL +# res +# } + +quick_case_when <- function(...) { + vectors <- list(...) + split <- lapply(vectors, function(x) unlist(strsplit(paste(deparse(x), collapse = ""), "~", fixed = TRUE))) + for (i in seq_len(length(vectors))) { + if (eval(parse(text = split[[i]][1]), envir = parent.frame())) { + return(eval(parse(text = split[[i]][2]), envir = parent.frame())) + } } - rows <- interaction(x[, by]) %in% interaction(y[, by]) - if (type == "anti") rows <- !rows - res <- x[rows, , drop = FALSE] - rownames(res) <- NULL - res + return(NA) } # No export, no Rd @@ -165,7 +147,7 @@ search_type_in_df <- function(x, type) { if (any(colnames(x) %like% "^(specimen date|specimen_date|spec_date)")) { # WHONET support found <- sort(colnames(x)[colnames(x) %like% "^(specimen date|specimen_date|spec_date)"])[1] - if (!any(class(pull(x, found)) %in% c("Date", "POSIXct"))) { + if (!any(class(pm_pull(x, found)) %in% c("Date", "POSIXct"))) { stop(font_red(paste0("ERROR: Found column `", font_bold(found), "` to be used as input for `col_", type, "`, but this column contains no valid dates. Transform its values to valid dates first.")), call. = FALSE) @@ -461,7 +443,7 @@ font_stripstyle <- function(x) { gsub("(?:(?:\\x{001b}\\[)|\\x{009b})(?:(?:[0-9]{1,3})?(?:(?:;[0-9]{0,3})*)?[A-M|f-m])|\\x{001b}[A-M]", "", x, perl = TRUE) } -progress_estimated <- function(n = 1, n_min = 0, ...) { +progress_ticker <- function(n = 1, n_min = 0, ...) { if (!interactive() || n < n_min) { pb <- list() pb$tick <- function() { diff --git a/R/aa_helper_functions_dplyr.R b/R/aa_helper_functions_dplyr.R deleted file mode 100644 index 8f2c80c33..000000000 --- a/R/aa_helper_functions_dplyr.R +++ /dev/null @@ -1,775 +0,0 @@ -# ==================================================================== # -# TITLE # -# Antimicrobial Resistance (AMR) Analysis # -# # -# SOURCE # -# https://github.com/msberends/AMR # -# # -# LICENCE # -# (c) 2018-2020 Berends MS, Luz CF et al. # -# # -# This R package is free software; you can freely use and distribute # -# it for both personal and commercial purposes under the terms of the # -# GNU General Public License version 2.0 (GNU GPL-2), as published by # -# the Free Software Foundation. # -# # -# We created this package for both routine data analysis and academic # -# research and it was publicly released in the hope that it will be # -# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. # -# Visit our website for more info: https://msberends.github.io/AMR. # -# ==================================================================== # - -# ------------------------------------------------ -# THIS FILE WAS CREATED AUTOMATICALLY! -# Source file: data-raw/reproduction_of_poorman.R -# ------------------------------------------------ - -# Poorman: a package to replace all dplyr functions with base R so we can lose dependency on dplyr. -# These functions were downloaded from https://github.com/nathaneastwood/poorman, -# from this commit: https://github.com/nathaneastwood/poorman/tree/7d76d77f8f7bc663bf30fb5a161abb49801afa17 -# -# All code below was released under MIT license, that permits 'free of charge, to any person obtaining a -# copy of the software and associated documentation files (the "Software"), to deal in the Software -# without restriction, including without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -# is furnished to do so', given that a copyright notice is given in the software. -# -# Copyright notice as found on https://github.com/nathaneastwood/poorman/blob/master/LICENSE on 2 May 2020: -# YEAR: 2020 -# COPYRIGHT HOLDER: Nathan Eastwood - -arrange <- function(.data, ...) { - check_is_dataframe(.data) - if ("grouped_data" %in% class(.data)) { - arrange.grouped_data(.data, ...) - } else { - arrange.default(.data, ...) - } -} - -arrange.default <- function(.data, ...) { - rows <- eval.parent(substitute(with(.data, order(...)))) - .data[rows, , drop = FALSE] -} - -arrange.grouped_data <- function(.data, ...) { - apply_grouped_function(.data, "arrange", ...) -} -between <- function(x, left, right) { - if (!is.null(attr(x, "class")) && !inherits(x, c("Date", "POSIXct"))) { - warning("`between()` called on numeric vector with S3 class") - } - if (!is.double(x)) x <- as.numeric(x) - x >= as.numeric(left) & x <= as.numeric(right) -} -count <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) { - groups <- get_groups(x) - if (!missing(...)) x <- group_by(x, ..., .add = TRUE) - wt <- deparse_var(wt) - res <- do.call(tally, list(x, wt, sort, name)) - if (length(groups) > 0L) res <- do.call(group_by, list(res, as.name(groups))) - res -} - -tally <- function(x, wt = NULL, sort = FALSE, name = NULL) { - name <- check_name(x, name) - wt <- deparse_var(wt) - res <- do.call(summarise, set_names(list(x, as.name(tally_n(x, wt))), c(".data", name))) - res <- ungroup(res) - if (isTRUE(sort)) res <- do.call(arrange, list(res, call("desc", as.name(name)))) - rownames(res) <- NULL - res -} - -add_count <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) { - name <- check_name(x, name) - row_names <- rownames(x) - wt <- deparse_var(wt) - if (!missing(...)) x <- group_by(x, ..., .add = TRUE) - res <- do.call(add_tally, list(x, wt, sort, name)) - res[row_names, ] -} - -add_tally <- function(x, wt = NULL, sort = FALSE, name = NULL) { - wt <- deparse_var(wt) - n <- tally_n(x, wt) - name <- check_name(x, name) - res <- do.call(mutate, set_names(list(x, as.name(n)), c(".data", name))) - - if (isTRUE(sort)) { - do.call(arrange, list(res, call("desc", as.name(name)))) - } else { - res - } -} - -tally_n <- function(x, wt) { - if (is.null(wt) && "n" %in% colnames(x)) { - message("Using `n` as weighting variable") - wt <- "n" - } - context$.data <- x - on.exit(rm(list = ".data", envir = context)) - if (is.null(wt)) { - "n()" - } else { - paste0("sum(", wt, ", na.rm = TRUE)") - } -} - -check_name <- function(df, name) { - if (is.null(name)) { - if ("n" %in% colnames(df)) { - stop( - "Column 'n' is already present in output\n", - "* Use `name = \"new_name\"` to pick a new name" - ) - } - return("n") - } - - if (!is.character(name) || length(name) != 1) { - stop("`name` must be a single string") - } - - name -} -desc <- function(x) -xtfrm(x) -select_env <- new.env() - -peek_vars <- function() { - get(".col_names", envir = select_env) -} - -context <- new.env() - -n <- function() { - do.call(nrow, list(quote(.data)), envir = context) -} -filter <- function(.data, ...) { - check_is_dataframe(.data) - if ("grouped_data" %in% class(.data)) { - filter.grouped_data(.data, ...) - } else { - filter.default(.data, ...) - } -} - -filter.default <- function(.data, ...) { - conditions <- paste(deparse_dots(...), collapse = " & ") - context$.data <- .data - on.exit(rm(.data, envir = context)) - .data[do.call(with, list(.data, str2lang(unname(conditions)))), ] -} - -filter.grouped_data <- function(.data, ...) { - rows <- rownames(.data) - res <- apply_grouped_function(.data, "filter", ...) - res[rows[rows %in% rownames(res)], ] -} -group_by <- function(.data, ..., .add = FALSE) { - check_is_dataframe(.data) - pre_groups <- get_groups(.data) - groups <- deparse_dots(...) - if (isTRUE(.add)) groups <- unique(c(pre_groups, groups)) - unknown <- !(groups %in% colnames(.data)) - if (any(unknown)) stop("Invalid groups: ", groups[unknown]) - structure(.data, class = c("grouped_data", class(.data)), groups = groups) -} - -ungroup <- function(x, ...) { - check_is_dataframe(x) - rm_groups <- deparse_dots(...) - groups <- attr(x, "groups") - if (length(rm_groups) == 0L) rm_groups <- groups - attr(x, "groups") <- groups[!(groups %in% rm_groups)] - if (length(attr(x, "groups")) == 0L) { - attr(x, "groups") <- NULL - class(x) <- class(x)[!(class(x) %in% "grouped_data")] - } - x -} - -get_groups <- function(x) { - attr(x, "groups", exact = TRUE) -} - -has_groups <- function(x) { - groups <- get_groups(x) - if (is.null(groups)) FALSE else TRUE -} - -set_groups <- function(x, groups) { - attr(x, "groups") <- groups - x -} - -apply_grouped_function <- function(.data, fn, ...) { - groups <- get_groups(.data) - grouped <- split_into_groups(.data, groups) - res <- do.call(rbind, unname(lapply(grouped, fn, ...))) - if (any(groups %in% colnames(res))) { - class(res) <- c("grouped_data", class(res)) - attr(res, "groups") <- groups[groups %in% colnames(res)] - } - res -} - -split_into_groups <- function(.data, groups) { - class(.data) <- "data.frame" - group_factors <- lapply(groups, function(x, .data) as.factor(.data[, x]), .data) - res <- split(x = .data, f = group_factors) - res -} - -print.grouped_data <- function(x, ..., digits = NULL, quote = FALSE, right = TRUE, row.names = TRUE, max = NULL) { - class(x) <- "data.frame" - print(x, ..., digits = digits, quote = quote, right = right, row.names = row.names, max = max) - cat("\nGroups: ", paste(attr(x, "groups", exact = TRUE), collapse = ", "), "\n\n") -} -if_else <- function(condition, true, false, missing = NULL) { - if (!is.logical(condition)) stop("`condition` must be a logical vector.") - cls_true <- class(true) - cls_false <- class(false) - cls_missing <- class(missing) - if (!identical(cls_true, cls_false)) { - stop("The class of `true` <", class(true), "> is not the same as the class of `false` <", class(false), ">") - } - if (!is.null(missing) && !identical(cls_true, cls_missing)) { - stop("`missing` must be a ", cls_true, " vector, not a ", cls_missing, " vector.") - } - res <- ifelse(condition, true, false) - if (!is.null(missing)) res[is.na(res)] <- missing - attributes(res) <- attributes(true) - res -} - -inner_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { - join_worker(x = x, y = y, by = by, suffix = suffix, sort = FALSE) -} - -# left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { -# join_worker(x = x, y = y, by = by, suffix = suffix, all.x = TRUE) -# } - -right_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { - join_worker(x = x, y = y, by = by, suffix = suffix, all.y = TRUE) -} - -full_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { - join_worker(x = x, y = y, by = by, suffix = suffix, all = TRUE) -} - -join_worker <- function(x, y, by = NULL, suffix = c(".x", ".y"), ...) { - x[, ".join_id"] <- seq_len(nrow(x)) - if (is.null(by)) { - by <- intersect(names(x), names(y)) - join_message(by) - merged <- merge(x = x, y = y, by = by, suffixes = suffix, ...)[, union(names(x), names(y))] - } else if (is.null(names(by))) { - merged <- merge(x = x, y = y, by = by, suffixes = suffix, ...) - } else { - merged <- merge(x = x, y = y, by.x = names(by), by.y = by, suffixes = suffix, ...) - } - merged <- merged[order(merged[, ".join_id"]), colnames(merged) != ".join_id"] - rownames(merged) <- NULL - merged -} - -join_message <- function(by) { - if (length(by) > 1L) { - message("Joining, by = c(\"", paste0(by, collapse = "\", \""), "\")\n", sep = "") - } else { - message("Joining, by = \"", by, "\"\n", sep = "") - } -} - -anti_join <- function(x, y, by = NULL) { - filter_join_worker(x, y, by, type = "anti") -} - -semi_join <- function(x, y, by = NULL) { - filter_join_worker(x, y, by, type = "semi") -} - -# filter_join_worker <- function(x, y, by = NULL, type = c("anti", "semi")) { -# type <- match.arg(type, choices = c("anti", "semi"), several.ok = FALSE) -# if (is.null(by)) { -# by <- intersect(names(x), names(y)) -# join_message(by) -# } -# rows <- interaction(x[, by]) %in% interaction(y[, by]) -# if (type == "anti") rows <- !rows -# res <- x[rows, ] -# rownames(res) <- NULL -# res -# } -lag <- function (x, n = 1L, default = NA) { - if (inherits(x, "ts")) stop("`x` must be a vector, not a `ts` object, do you want `stats::lag()`?") - if (length(n) != 1L || !is.numeric(n) || n < 0L) stop("`n` must be a nonnegative integer scalar") - if (n == 0L) return(x) - tryCatch( - storage.mode(default) <- typeof(x), - warning = function(w) { - stop("Cannot convert `default` <", typeof(default), "> to `x` <", typeof(x), ">") - } - ) - xlen <- length(x) - n <- pmin(n, xlen) - res <- c(rep(default, n), x[seq_len(xlen - n)]) - attributes(res) <- attributes(x) - res -} - -lead <- function (x, n = 1L, default = NA) { - if (length(n) != 1L || !is.numeric(n) || n < 0L) stop("n must be a nonnegative integer scalar") - if (n == 0L) return(x) - tryCatch( - storage.mode(default) <- typeof(x), - warning = function(w) { - stop("Cannot convert `default` <", typeof(default), "> to `x` <", typeof(x), ">") - } - ) - xlen <- length(x) - n <- pmin(n, xlen) - res <- c(x[-seq_len(n)], rep(default, n)) - attributes(res) <- attributes(x) - res -} -mutate <- function(.data, ...) { - check_is_dataframe(.data) - if ("grouped_data" %in% class(.data)) { - mutate.grouped_data(.data, ...) - } else { - mutate.default(.data, ...) - } -} - -mutate.default <- function(.data, ...) { - conditions <- deparse_dots(...) - cond_names <- names(conditions) - unnamed <- which(nchar(cond_names) == 0L) - if (is.null(cond_names)) { - names(conditions) <- conditions - } else if (length(unnamed) > 0L) { - names(conditions)[unnamed] <- conditions[unnamed] - } - not_matched <- names(conditions)[!names(conditions) %in% names(.data)] - .data[, not_matched] <- NA - context$.data <- .data - on.exit(rm(.data, envir = context)) - for (i in seq_along(conditions)) { - .data[, names(conditions)[i]] <- do.call(with, list(.data, str2lang(unname(conditions)[i]))) - } - .data -} - -mutate.grouped_data <- function(.data, ...) { - rows <- rownames(.data) - res <- apply_grouped_function(.data, "mutate", ...) - res[rows, ] -} -n_distinct <- function(..., na.rm = FALSE) { - res <- c(...) - if (is.list(res)) return(nrow(unique(as.data.frame(res, stringsAsFactors = FALSE)))) - if (isTRUE(na.rm)) res <- res[!is.na(res)] - length(unique(res)) -} -`%>%` <- function(lhs, rhs) { - lhs <- substitute(lhs) - rhs <- substitute(rhs) - eval(as.call(c(rhs[[1L]], lhs, as.list(rhs[-1L]))), envir = parent.frame()) -} -pull <- function(.data, var = -1) { - var_deparse <- deparse_var(var) - col_names <- colnames(.data) - if (!(var_deparse %in% col_names) & grepl("^[[:digit:]]+L|[[:digit:]]", var_deparse)) { - var <- as.integer(gsub("L", "", var_deparse)) - var <- if_else(var < 1L, rev(col_names)[abs(var)], col_names[var]) - } else if (var_deparse %in% col_names) { - var <- var_deparse - } - .data[, var] -} -relocate <- function(.data, ..., .before = NULL, .after = NULL) { - check_is_dataframe(.data) - data_names <- colnames(.data) - col_pos <- select_positions(.data, ...) - - .before <- deparse_var(.before) - .after <- deparse_var(.after) - has_before <- !is.null(.before) - has_after <- !is.null(.after) - - if (has_before && has_after) { - stop("You must supply only one of `.before` and `.after`") - } else if (has_before) { - where <- min(match(.before, data_names)) - col_pos <- c(setdiff(col_pos, where), where) - } else if (has_after) { - where <- max(match(.after, data_names)) - col_pos <- c(where, setdiff(col_pos, where)) - } else { - where <- 1L - col_pos <- union(col_pos, where) - } - lhs <- setdiff(seq(1L, where - 1L), col_pos) - rhs <- setdiff(seq(where + 1L, ncol(.data)), col_pos) - col_pos <- unique(c(lhs, col_pos, rhs)) - col_pos <- col_pos[col_pos <= length(data_names)] - - res <- .data[col_pos] - if (has_groups(.data)) res <- set_groups(res, get_groups(.data)) - res -} -rename <- function(.data, ...) { - check_is_dataframe(.data) - new_names <- names(deparse_dots(...)) - if (length(new_names) == 0L) { - warning("You didn't give any new names") - return(.data) - } - col_pos <- select_positions(.data, ...) - old_names <- colnames(.data)[col_pos] - new_names_zero <- nchar(new_names) == 0L - if (any(new_names_zero)) { - warning("You didn't provide new names for: ", paste0("`", old_names[new_names_zero], collapse = ", "), "`") - new_names[new_names_zero] <- old_names[new_names_zero] - } - colnames(.data)[col_pos] <- new_names - .data -} -rownames_to_column <- function(.data, var = "rowname") { - check_is_dataframe(.data) - col_names <- colnames(.data) - if (var %in% col_names) stop("Column `", var, "` already exists in `.data`") - .data[, var] <- rownames(.data) - rownames(.data) <- NULL - .data[, c(var, setdiff(col_names, var))] -} - -select <- function(.data, ...) { - map <- names(deparse_dots(...)) - col_pos <- select_positions(.data, ..., group_pos = TRUE) - res <- .data[, col_pos, drop = FALSE] - to_map <- nchar(map) > 0L - colnames(res)[to_map] <- map[to_map] - if (has_groups(.data)) res <- set_groups(res, get_groups(.data)) - res -} -starts_with <- function(match, ignore.case = TRUE, vars = peek_vars()) { - grep(pattern = paste0("^", paste0(match, collapse = "|^")), x = vars, ignore.case = ignore.case) -} - -ends_with <- function(match, ignore.case = TRUE, vars = peek_vars()) { - grep(pattern = paste0(paste0(match, collapse = "$|"), "$"), x = vars, ignore.case = ignore.case) -} - -contains <- function(match, ignore.case = TRUE, vars = peek_vars()) { - matches <- lapply( - match, - function(x) { - if (isTRUE(ignore.case)) { - match_u <- toupper(x) - match_l <- tolower(x) - pos_u <- grep(pattern = match_u, x = toupper(vars), fixed = TRUE) - pos_l <- grep(pattern = match_l, x = tolower(vars), fixed = TRUE) - unique(c(pos_l, pos_u)) - } else { - grep(pattern = x, x = vars, fixed = TRUE) - } - } - ) - unique(matches) -} - -matches <- function(match, ignore.case = TRUE, perl = FALSE, vars = peek_vars()) { - grep(pattern = match, x = vars, ignore.case = ignore.case, perl = perl) -} - -num_range <- function(prefix, range, width = NULL, vars = peek_vars()) { - if (!is.null(width)) { - range <- sprintf(paste0("%0", width, "d"), range) - } - find <- paste0(prefix, range) - if (any(duplicated(vars))) { - stop("Column names must be unique") - } else { - x <- match(find, vars) - x[!is.na(x)] - } -} - -all_of <- function(x, vars = peek_vars()) { - x_ <- !x %in% vars - if (any(x_)) { - which_x_ <- which(x_) - if (length(which_x_) == 1L) { - stop("The column ", x[which_x_], " does not exist.") - } else { - stop("The columns ", paste(x[which_x_], collapse = ", "), " do not exist.") - } - } else { - which(vars %in% x) - } -} - -any_of <- function(x, vars = peek_vars()) { - which(vars %in% x) -} - -everything <- function(vars = peek_vars()) { - seq_along(vars) -} - -last_col <- function(offset = 0L, vars = peek_vars()) { - if (!is_wholenumber(offset)) stop("`offset` must be an integer") - n <- length(vars) - if (offset && n <= offset) { - stop("`offset` must be smaller than the number of `vars`") - } else if (n == 0) { - stop("Can't select last column when `vars` is empty") - } else { - n - offset - } -} -select_positions <- function(.data, ..., group_pos = FALSE) { - cols <- eval(substitute(alist(...))) - data_names <- colnames(.data) - select_env$.col_names <- data_names - on.exit(rm(list = ".col_names", envir = select_env)) - exec_env <- parent.frame(2L) - pos <- unlist(lapply(cols, eval_expr, exec_env = exec_env)) - if (isTRUE(group_pos)) { - groups <- get_groups(.data) - missing_groups <- !(groups %in% cols) - if (any(missing_groups)) { - message("Adding missing grouping variables: `", paste(groups[missing_groups], collapse = "`, `"), "`") - pos <- c(match(groups[missing_groups], data_names), pos) - } - } - unique(pos) -} - -eval_expr <- function(x, exec_env) { - type <- typeof(x) - switch( - type, - "integer" = x, - "double" = as.integer(x), - "character" = select_char(x), - "symbol" = select_symbol(x, exec_env = exec_env), - "language" = eval_call(x), - stop("Expressions of type <", typeof(x), "> cannot be evaluated for use when subsetting.") - ) -} - -select_char <- function(expr) { - pos <- match(expr, select_env$.col_names) - if (is.na(pos)) stop("Column `", expr, "` does not exist") - pos -} - -select_symbol <- function(expr, exec_env) { - res <- try(select_char(as.character(expr)), silent = TRUE) - if (inherits(res, "try-error")) { - res <- tryCatch( - select_char(eval(expr, envir = exec_env)), - error = function(e) stop("Column ", expr, " does not exist.") - ) - } - res -} - -eval_call <- function(x) { - type <- as.character(x[[1]]) - switch( - type, - `:` = select_seq(x), - `!` = select_negate(x), - `-` = select_minus(x), - `c` = select_c(x), - `(` = select_bracket(x), - select_context(x) - ) -} - -select_seq <- function(expr) { - x <- eval_expr(expr[[2]]) - y <- eval_expr(expr[[3]]) - x:y -} - -select_negate <- function(expr) { - x <- if (is_negated_colon(expr)) { - expr <- call(":", expr[[2]][[2]], expr[[2]][[3]][[2]]) - eval_expr(expr) - } else { - eval_expr(expr[[2]]) - } - x * -1L -} - -is_negated_colon <- function(expr) { - expr[[1]] == "!" && length(expr[[2]]) > 1L && expr[[2]][[1]] == ":" && expr[[2]][[3]][[1]] == "!" -} - -select_minus <- function(expr) { - x <- eval_expr(expr[[2]]) - x * -1L -} - -select_c <- function(expr) { - lst_expr <- as.list(expr) - lst_expr[[1]] <- NULL - unlist(lapply(lst_expr, eval_expr)) -} - -select_bracket <- function(expr) { - eval_expr(expr[[2]]) -} - -select_context <- function(expr) { - eval(expr, envir = context$.data) -} -slice <- function(.data, ...) { - check_is_dataframe(.data) - if ("grouped_data" %in% class(.data)) { - slice.grouped_data(.data, ...) - } else { - slice.default(.data, ...) - } -} - -slice.default <- function(.data, ...) { - rows <- c(...) - stopifnot(is.numeric(rows) | is.integer(rows)) - if (all(rows > 0L)) rows <- rows[rows <= nrow(.data)] - .data[rows, ] -} - -slice.grouped_data <- function(.data, ...) { - apply_grouped_function(.data, "slice", ...) -} -summarise <- function(.data, ...) { - check_is_dataframe(.data) - if ("grouped_data" %in% class(.data)) { - summarise.grouped_data(.data, ...) - } else { - summarise.default(.data, ...) - } -} - -summarise.default <- function(.data, ...) { - fns <- vapply(substitute(...()), deparse, NA_character_) - context$.data <- .data - on.exit(rm(.data, envir = context)) - if (has_groups(.data)) { - group <- unique(.data[, get_groups(.data), drop = FALSE]) - if (nrow(group) == 0L) return(NULL) - } - res <- lapply(fns, function(x) do.call(with, list(.data, str2lang(x)))) - res <- as.data.frame(res) - fn_names <- names(fns) - colnames(res) <- if (is.null(fn_names)) fns else fn_names - if (has_groups(.data)) res <- cbind(group, res) - res -} - -summarise.grouped_data <- function(.data, ...) { - groups <- get_groups(.data) - res <- apply_grouped_function(.data, "summarise", ...) - res <- res[do.call(order, lapply(groups, function(x) res[, x])), ] - rownames(res) <- NULL - res -} - -summarize <- summarise -summarize.default <- summarise.default -summarize.grouped_data <- summarise.grouped_data -transmute <- function(.data, ...) { - check_is_dataframe(.data) - if ("grouped_data" %in% class(.data)) { - transmute.grouped_data(.data, ...) - } else { - transmute.default(.data, ...) - } -} - -transmute.default <- function(.data, ...) { - conditions <- deparse_dots(...) - mutated <- mutate(.data, ...) - mutated[, names(conditions), drop = FALSE] -} - -transmute.grouped_data <- function(.data, ...) { - rows <- rownames(.data) - res <- apply_grouped_function(.data, "transmute", ...) - res[rows, ] -} -deparse_dots <- function(...) { - vapply(substitute(...()), deparse, NA_character_) -} - -deparse_var <- function(var) { - sub_var <- eval(substitute(substitute(var)), parent.frame()) - if (is.symbol(sub_var)) var <- as.character(sub_var) - var -} - -check_is_dataframe <- function(.data) { - parent_fn <- all.names(sys.call(-1L), max.names = 1L) - if (!is.data.frame(.data)) stop(parent_fn, " must be given a data.frame") - invisible() -} - -is_wholenumber <- function(x) { - x %% 1L == 0L -} - -set_names <- function(object = nm, nm) { - names(object) <- nm - object -} - -cume_dist <- function(x) { - rank(x, ties.method = "max", na.last = "keep") / sum(!is.na(x)) -} - -dense_rank <- function(x) { - match(x, sort(unique(x))) -} - -min_rank <- function(x) { - rank(x, ties.method = "min", na.last = "keep") -} - -ntile <- function (x = row_number(), n) { - if (!missing(x)) x <- row_number(x) - len <- length(x) - sum(is.na(x)) - n <- as.integer(floor(n)) - if (len == 0L) { - rep(NA_integer_, length(x)) - } else { - n_larger <- as.integer(len %% n) - n_smaller <- as.integer(n - n_larger) - size <- len / n - larger_size <- as.integer(ceiling(size)) - smaller_size <- as.integer(floor(size)) - larger_threshold <- larger_size * n_larger - bins <- if_else( - x <= larger_threshold, - (x + (larger_size - 1L)) / larger_size, - (x + (-larger_threshold + smaller_size - 1L)) / smaller_size + n_larger - ) - as.integer(floor(bins)) - } -} - -percent_rank <- function(x) { - (min_rank(x) - 1) / (sum(!is.na(x)) - 1) -} - -row_number <- function(x) { - if (missing(x)) seq_len(n()) else rank(x, ties.method = "first", na.last = "keep") -} diff --git a/R/aa_helper_pm_functions.R b/R/aa_helper_pm_functions.R new file mode 100644 index 000000000..50556b9a0 --- /dev/null +++ b/R/aa_helper_pm_functions.R @@ -0,0 +1,1589 @@ +# ==================================================================== # +# TITLE # +# Antimicrobial Resistance (AMR) Analysis # +# # +# SOURCE # +# https://github.com/msberends/AMR # +# # +# LICENCE # +# (c) 2018-2020 Berends MS, Luz CF et al. # +# # +# This R package is free software; you can freely use and distribute # +# it for both personal and commercial purposes under the terms of the # +# GNU General Public License version 2.0 (GNU GPL-2), as published by # +# the Free Software Foundation. # +# # +# We created this package for both routine data analysis and academic # +# research and it was publicly released in the hope that it will be # +# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. # +# Visit our website for more info: https://msberends.github.io/AMR. # +# ==================================================================== # + +# ------------------------------------------------ +# THIS FILE WAS CREATED AUTOMATICALLY! +# Source file: data-raw/reproduction_of_poorman.R +# ------------------------------------------------ + +# poorman: a package to replace all dplyr functions with base R so we can lose dependency on dplyr. +# These functions were downloaded from https://github.com/nathaneastwood/poorman, +# from this commit: https://github.com/nathaneastwood/poorman/tree/52eb6947e0b4430cd588976ed8820013eddf955f. +# +# All functions are prefixed with 'pm_' to make it obvious that they are dplyr substitutes. +# +# All code below was released under MIT license, that permits 'free of charge, to any person obtaining a +# copy of the software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +# is furnished to do so', given that a copyright notice is given in the software. +# +# Copyright notice on 18 September 2020, the day this code was downloaded, as found on +# https://github.com/nathaneastwood/poorman/blob/52eb6947e0b4430cd588976ed8820013eddf955f/LICENSE: +# YEAR: 2020 +# COPYRIGHT HOLDER: Nathan Eastwood + +pm_arrange <- function(.data, ...) { + pm_check_is_dataframe(.data) + if ("grouped_data" %in% class(.data)) { + pm_arrange.grouped_data(.data, ...) + } else { + pm_arrange.default(.data, ...) + } +} + +pm_arrange.default <- function(.data, ...) { + pm_context$setup(.data) + on.exit(pm_context$clean(), add = TRUE) + rows <- eval(substitute(order(...)), envir = pm_context$.data) + .data[rows, , drop = FALSE] +} + +pm_arrange.grouped_data <- function(.data, ...) { + pm_apply_grouped_function("pm_arrange", .data, drop = TRUE, ...) +} +pm_between <- function(x, left, right) { + if (!is.null(attr(x, "class")) && !inherits(x, c("Date", "POSIXct"))) { + warning("`pm_between()` called on numeric vector with S3 class") + } + if (!is.double(x)) x <- as.numeric(x) + x >= as.numeric(left) & x <= as.numeric(right) +} +pm_coalesce <- function(...) { + if (missing(..1)) stop("At least one argument must be supplied.") + + vectors <- list(...) + vectors_lens <- unique(lengths(vectors)) + if (length(vectors_lens) > 2L || (length(vectors_lens) == 2L & !1 %in% vectors_lens)) { + stop("Vectors must all be of length 1 and/or pm_n") + } + max_len <- max(vectors_lens) + + len_one <- lengths(vectors) == 1L + vectors[len_one] <- lapply(vectors[len_one], function(x) rep(x, max_len)) + + x <- vectors[[1]] + vectors <- vectors[-1] + + for (i in seq_along(vectors)) { + x_miss <- is.na(x) + x[x_miss] <- vectors[[i]][x_miss] + } + x +} +pm_context <- new.env() + +# Data +pm_context$setup <- function(.data) pm_context$.data <- .data +pm_context$get_data <- function() pm_context$.data +pm_context$get_nrow <- function() nrow(pm_context$.data) +pm_context$get_colnames <- function() colnames(pm_context$.data) +pm_context$clean <- function() rm(list = c(".data"), envir = pm_context) + + +pm_n <- function() { + pm_check_group_pm_context("`pm_n()`") + pm_context$get_nrow() +} + +pm_cur_data <- function() { + pm_check_group_pm_context("`pm_cur_data()`") + data <- pm_context$get_data() + data[, !(colnames(data) %in% pm_get_groups(data)), drop = FALSE] +} + +pm_cur_group <- function() { + pm_check_group_pm_context("`pm_cur_group()`") + data <- pm_context$get_data() + res <- data[1L, pm_get_groups(data), drop = FALSE] + rownames(res) <- NULL + res +} + +pm_cur_group_id <- function() { + pm_check_group_pm_context("`pm_cur_group_id()`") + data <- pm_context$get_data() + res <- data[1L, pm_get_groups(data), drop = FALSE] + details <- pm_get_group_details(data) + details[, ".group_id"] <- seq_len(nrow(details)) + res <- suppressMessages(pm_semi_join(details, res)) + list(res[, ".group_id"]) +} + +pm_cur_group_rows <- function() { + pm_check_group_pm_context("`pm_cur_group_rows()`") + data <- pm_context$get_data() + res <- data[1L, pm_get_groups(data), drop = FALSE] + res <- suppressMessages(pm_semi_join(pm_get_group_details(data), res)) + unlist(res[, ".rows"]) +} + +pm_check_group_pm_context <- function(fn) { + if (is.null(pm_context$.data)) { + stop(fn, " must only be used inside poorman verbs") + } +} +pm_count <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) { + pm_groups <- pm_get_groups(x) + if (!missing(...)) x <- pm_group_by(x, ..., .add = TRUE) + wt <- pm_deparse_var(wt) + res <- do.call(pm_tally, list(x, wt, sort, name)) + if (length(pm_groups) > 0L) res <- do.call(pm_group_by, list(res, as.name(pm_groups))) + res +} + +pm_tally <- function(x, wt = NULL, sort = FALSE, name = NULL) { + name <- pm_check_name(x, name) + wt <- pm_deparse_var(wt) + res <- do.call(pm_summarise, pm_set_names(list(x, pm_tally_n(x, wt)), c(".data", name))) + res <- pm_ungroup(res) + if (isTRUE(sort)) res <- do.call(pm_arrange, list(res, call("pm_desc", as.name(name)))) + rownames(res) <- NULL + res +} + +pm_add_count <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) { + name <- pm_check_name(x, name) + row_names <- rownames(x) + wt <- pm_deparse_var(wt) + if (!missing(...)) x <- pm_group_by(x, ..., .add = TRUE) + res <- do.call(pm_add_tally, list(x, wt, sort, name)) + res[row_names, ] +} + +pm_add_tally <- function(x, wt = NULL, sort = FALSE, name = NULL) { + wt <- pm_deparse_var(wt) + pm_n <- pm_tally_n(x, wt) + name <- pm_check_name(x, name) + res <- do.call(pm_mutate, pm_set_names(list(x, pm_n), c(".data", name))) + + if (isTRUE(sort)) { + do.call(pm_arrange, list(res, call("pm_desc", as.name(name)))) + } else { + res + } +} + +pm_tally_n <- function(x, wt) { + if (is.null(wt) && "pm_n" %in% colnames(x)) { + message("Using `pm_n` as weighting variable") + wt <- "pm_n" + } + pm_context$setup(.data = x) + on.exit(pm_context$clean(), add = TRUE) + if (is.null(wt)) { + call("pm_n") + } else { + call("sum", as.name(wt), na.rm = TRUE) + } +} + +pm_check_name <- function(df, name) { + if (is.null(name)) { + if ("pm_n" %in% colnames(df)) { + stop( + "Column 'pm_n' is already present in output\n", + "* Use `name = \"new_name\"` to pick a new name" + ) + } + return("pm_n") + } + + if (!is.character(name) || length(name) != 1) { + stop("`name` must be a single string") + } + + name +} +pm_desc <- function(x) -xtfrm(x) +pm_distinct <- function(.data, ...) { + pm_check_is_dataframe(.data) + if ("grouped_data" %in% class(.data)) { + pm_distinct.grouped_data(.data, ...) + } else { + pm_distinct.default(.data, ...) + } +} + +pm_distinct.default <- function(.data, ..., .keep_all = FALSE) { + if (ncol(.data) == 0L) return(.data[1, ]) + cols <- pm_deparse_dots(...) + col_names <- names(cols) + col_len <- length(cols) + if (is.null(col_names) && col_len > 0L) names(cols) <- cols + if (col_len == 0L) { + res <- .data + } else { + res <- pm_mutate(.data, ...) + col_names <- names(cols) + res <- if (!is.null(col_names)) { + zero_names <- nchar(col_names) == 0L + if (any(zero_names)) { + names(cols)[zero_names] <- cols[zero_names] + col_names <- names(cols) + } + suppressMessages(pm_select(res, col_names)) + } else { + suppressMessages(pm_select(res, cols)) + } + } + res <- unique(res) + if (isTRUE(.keep_all)) { + res <- cbind(res, .data[rownames(res), setdiff(colnames(.data), colnames(res)), drop = FALSE]) + } + common_cols <- c(intersect(colnames(.data), colnames(res)), setdiff(col_names, colnames(.data))) + if (length(common_cols) > 0L) res[, common_cols, drop = FALSE] else res +} + +pm_distinct.grouped_data <- function(.data, ..., .keep_all = FALSE) { + pm_apply_grouped_function("pm_distinct", .data, drop = TRUE, ..., .keep_all = .keep_all) +} +pm_eval_env <- new.env() +pm_filter <- function(.data, ...) { + pm_check_is_dataframe(.data) + if ("grouped_data" %in% class(.data)) { + pm_filter.grouped_data(.data, ...) + } else { + pm_filter.default(.data, ...) + } +} + +pm_filter.default <- function(.data, ...) { + conditions <- pm_dotdotdot(...) + cond_class <- vapply(conditions, typeof, NA_character_) + if (any(cond_class != "language")) stop("Conditions must be logical vectors") + pm_context$setup(.data) + on.exit(pm_context$clean(), add = TRUE) + pm_eval_env$env <- parent.frame() + on.exit(rm(list = "env", envir = pm_eval_env), add = TRUE) + rows <- lapply( + conditions, + function(cond, frame) eval(cond, pm_context$.data, frame), + frame = pm_eval_env$env + ) + rows <- Reduce("&", rows) + .data[rows & !is.na(rows), ] +} + +pm_filter.grouped_data <- function(.data, ...) { + rows <- rownames(.data) + res <- pm_apply_grouped_function("pm_filter", .data, drop = TRUE, ...) + res[rows[rows %in% rownames(res)], ] +} +pm_glimpse <- function(x, width = getOption("width"), ...) { + if ("grouped_data" %in% class(.data)) { + pm_glimpse.grouped_data(.data, ...) + } else { + pm_glimpse.default(.data, ...) + } +} + +pm_glimpse.default <- function (x, width = getOption("width"), max.level = 3, ...) { + utils::str(x, width = width, max.level = max.level, ...) + invisible(x) +} + +pm_glimpse.data.frame <- function(x, width = getOption("width"), ...) { + utils::str(x, width = width, ...) + invisible(x) +} +pm_group_by <- function(.data, ..., .add = FALSE) { + pm_check_is_dataframe(.data) + pre_groups <- pm_get_groups(.data) + pm_groups <- pm_deparse_dots(...) + if (isTRUE(.add)) pm_groups <- unique(c(pre_groups, pm_groups)) + unknown <- !(pm_groups %in% colnames(.data)) + if (any(unknown)) stop("Invalid pm_groups: ", pm_groups[unknown]) + class(.data) <- c("grouped_data", class(.data)) + pm_set_groups(.data, pm_groups) +} + +pm_ungroup <- function(x, ...) { + pm_check_is_dataframe(x) + rm_groups <- pm_deparse_dots(...) + pm_groups <- pm_get_groups(x) + if (length(rm_groups) == 0L) rm_groups <- pm_groups + x <- pm_set_groups(x, pm_groups[!(pm_groups %in% rm_groups)]) + if (length(attr(x, "pm_groups")) == 0L) { + attr(x, "pm_groups") <- NULL + class(x) <- class(x)[!(class(x) %in% "grouped_data")] + } + x +} + +pm_set_groups <- function(x, pm_groups) { + attr(x, "pm_groups") <- if (is.null(pm_groups) || length(pm_groups) == 0L) { + NULL + } else { + pm_group_data_worker(x, pm_groups) + } + x +} + +pm_get_groups <- function(x) { + pm_groups <- attr(x, "pm_groups", exact = TRUE) + if (is.null(pm_groups)) character(0) else colnames(pm_groups)[!colnames(pm_groups) %in% c(".group_id", ".rows")] +} + +pm_get_group_details <- function(x) { + pm_groups <- attr(x, "pm_groups", exact = TRUE) + if (is.null(pm_groups)) character(0) else pm_groups +} + +pm_has_groups <- function(x) { + pm_groups <- pm_get_groups(x) + if (length(pm_groups) == 0L) FALSE else TRUE +} + +pm_apply_grouped_function <- function(fn, .data, drop = FALSE, ...) { + pm_groups <- pm_get_groups(.data) + grouped <- pm_split_into_groups(.data, pm_groups, drop) + res <- do.call(rbind, unname(lapply(grouped, fn, ...))) + if (any(pm_groups %in% colnames(res))) { + class(res) <- c("grouped_data", class(res)) + res <- pm_set_groups(res, pm_groups[pm_groups %in% colnames(res)]) + } + res +} + +pm_print.grouped_data <- function(x, ..., digits = NULL, quote = FALSE, right = TRUE, row.names = TRUE, max = NULL) { + class(x) <- "data.frame" + print(x, ..., digits = digits, quote = quote, right = right, row.names = row.names, max = max) + cat("\nGroups: ", paste(pm_get_groups(x), collapse = ", "), "\n\n") +} + +pm_group_data <- function(.data) { + if (!pm_has_groups(.data)) return(data.frame(.rows = I(list(seq_len(nrow(.data)))))) + pm_groups <- pm_get_groups(.data) + pm_group_data_worker(.data, pm_groups) +} + +pm_group_data_worker <- function(.data, pm_groups) { + res <- unique(.data[, pm_groups, drop = FALSE]) + class(res) <- "data.frame" + nrow_res <- nrow(res) + rows <- rep(list(NA), nrow_res) + for (i in seq_len(nrow_res)) { + rows[[i]] <- which(interaction(.data[, pm_groups]) %in% interaction(res[i, pm_groups])) + } + res$`.rows` <- rows + res <- res[do.call(order, lapply(pm_groups, function(x) res[, x])), , drop = FALSE] + rownames(res) <- NULL + res +} + +pm_group_rows <- function(.data) { + pm_group_data(.data)[[".rows"]] +} + +pm_group_indices <- function(.data) { + if (!pm_has_groups(.data)) return(rep(1L, nrow(.data))) + pm_groups <- pm_get_groups(.data) + res <- unique(.data[, pm_groups, drop = FALSE]) + res <- res[do.call(order, lapply(pm_groups, function(x) res[, x])), , drop = FALSE] + class(res) <- "data.frame" + nrow_data <- nrow(.data) + rows <- rep(NA, nrow_data) + for (i in seq_len(nrow_data)) { + rows[i] <- which(interaction(res[, pm_groups]) %in% interaction(.data[i, pm_groups])) + } + rows +} + +pm_group_vars <- function(x) { + pm_get_groups(x) +} + +pm_groups <- function(x) { + lapply(pm_get_groups(x), as.symbol) +} + +pm_group_size <- function(x) { + lengths(pm_group_rows(x)) +} + +pm_n_groups <- function(x) { + nrow(pm_group_data(x)) +} +pm_group_split <- function(.data, ..., .keep = TRUE) { + dots_len <- ...length() > 0L + if (pm_has_groups(.data) && isTRUE(dots_len)) { + warning("... is ignored in pm_group_split(), please use pm_group_by(..., .add = TRUE) %pm>% pm_group_split()") + } + if (!pm_has_groups(.data) && isTRUE(dots_len)) { + .data <- pm_group_by(.data, ...) + } + if (!pm_has_groups(.data) && isFALSE(dots_len)) { + return(list(.data)) + } + pm_context$setup(.data) + on.exit(pm_context$clean(), add = TRUE) + pm_groups <- pm_get_groups(.data) + attr(pm_context$.data, "pm_groups") <- NULL + res <- pm_split_into_groups(pm_context$.data, pm_groups) + names(res) <- NULL + if (isFALSE(.keep)) { + res <- lapply(res, function(x) x[, !colnames(x) %in% pm_groups]) + } + any_empty <- unlist(lapply(res, function(x) !(nrow(x) == 0L))) + res[any_empty] +} + +pm_group_keys <- function(.data) { + pm_groups <- pm_get_groups(.data) + pm_context$setup(.data) + res <- pm_context$.data[, pm_context$get_colnames() %in% pm_groups, drop = FALSE] + res <- res[!duplicated(res), , drop = FALSE] + if (nrow(res) == 0L) return(res) + class(res) <- "data.frame" + res <- res[do.call(order, lapply(pm_groups, function(x) res[, x])), , drop = FALSE] + rownames(res) <- NULL + res +} + +pm_split_into_groups <- function(.data, pm_groups, drop = FALSE, ...) { + class(.data) <- "data.frame" + group_factors <- lapply(pm_groups, function(x, .data) as.factor(.data[, x]), .data) + split(x = .data, f = group_factors, drop = drop, ...) +} +pm_if_else <- function(condition, true, false, missing = NULL) { + if (!is.logical(condition)) stop("`condition` must be a logical vector.") + cls_true <- class(true) + cls_false <- class(false) + cls_missing <- class(missing) + if (!identical(cls_true, cls_false)) { + stop("The class of `true` <", class(true), "> is not the same as the class of `false` <", class(false), ">") + } + if (!is.null(missing) && !identical(cls_true, cls_missing)) { + stop("`missing` must be a ", cls_true, " vector, not a ", cls_missing, " vector.") + } + res <- ifelse(condition, true, false) + if (!is.null(missing)) res[is.na(res)] <- missing + attributes(res) <- attributes(true) + res +} + +pm_inner_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { + pm_join_worker(x = x, y = y, by = by, suffix = suffix, sort = FALSE) +} + +# pm_left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { +# pm_join_worker(x = x, y = y, by = by, suffix = suffix, all.x = TRUE) +# } + +pm_right_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { + pm_join_worker(x = x, y = y, by = by, suffix = suffix, all.y = TRUE) +} + +pm_full_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { + pm_join_worker(x = x, y = y, by = by, suffix = suffix, all = TRUE) +} + +pm_join_worker <- function(x, y, by = NULL, suffix = c(".x", ".y"), ...) { + x[, ".join_id"] <- seq_len(nrow(x)) + if (is.null(by)) { + by <- intersect(names(x), names(y)) + pm_join_message(by) + merged <- merge(x = x, y = y, by = by, suffixes = suffix, ...)[, union(names(x), names(y))] + } else if (is.null(names(by))) { + merged <- merge(x = x, y = y, by = by, suffixes = suffix, ...) + } else { + merged <- merge(x = x, y = y, by.x = names(by), by.y = by, suffixes = suffix, ...) + } + merged <- merged[order(merged[, ".join_id"]), colnames(merged) != ".join_id"] + rownames(merged) <- NULL + merged +} + +pm_join_message <- function(by) { + if (length(by) > 1L) { + message("Joining, by = c(\"", paste0(by, collapse = "\", \""), "\")\n", sep = "") + } else { + message("Joining, by = \"", by, "\"\n", sep = "") + } +} + +pm_anti_join <- function(x, y, by = NULL) { + pm_filter_join_worker(x, y, by, type = "anti") +} + +pm_semi_join <- function(x, y, by = NULL) { + pm_filter_join_worker(x, y, by, type = "semi") +} + +pm_filter_join_worker <- function(x, y, by = NULL, type = c("anti", "semi")) { + type <- match.arg(type, choices = c("anti", "semi"), several.ok = FALSE) + if (is.null(by)) { + by <- intersect(names(x), names(y)) + pm_join_message(by) + } + rows <- interaction(x[, by]) %in% interaction(y[, by]) + if (type == "anti") rows <- !rows + res <- x[rows,, drop = FALSE] + rownames(res) <- NULL + res +} +pm_lag <- function(x, pm_n = 1L, default = NA) { + if (inherits(x, "ts")) stop("`x` must be a vector, not a `ts` object, do you want `stats::pm_lag()`?") + if (length(pm_n) != 1L || !is.numeric(pm_n) || pm_n < 0L) stop("`pm_n` must be a nonnegative integer scalar") + if (pm_n == 0L) return(x) + tryCatch( + storage.mode(default) <- typeof(x), + warning = function(w) { + stop("Cannot convert `default` <", typeof(default), "> to `x` <", typeof(x), ">") + } + ) + xlen <- length(x) + pm_n <- pmin(pm_n, xlen) + res <- c(rep(default, pm_n), x[seq_len(xlen - pm_n)]) + attributes(res) <- attributes(x) + res +} + +pm_lead <- function(x, pm_n = 1L, default = NA) { + if (length(pm_n) != 1L || !is.numeric(pm_n) || pm_n < 0L) stop("pm_n must be a nonnegative integer scalar") + if (pm_n == 0L) return(x) + tryCatch( + storage.mode(default) <- typeof(x), + warning = function(w) { + stop("Cannot convert `default` <", typeof(default), "> to `x` <", typeof(x), ">") + } + ) + xlen <- length(x) + pm_n <- pmin(pm_n, xlen) + res <- c(x[-seq_len(pm_n)], rep(default, pm_n)) + attributes(res) <- attributes(x) + res +} +pm_mutate <- function(.data, ...) { + pm_check_is_dataframe(.data) + if ("grouped_data" %in% class(.data)) { + pm_mutate.grouped_data(.data, ...) + } else { + pm_mutate.default(.data, ...) + } +} + +pm_mutate.default <- function(.data, ...) { + conditions <- pm_dotdotdot(..., .impute_names = TRUE) + .data[, setdiff(names(conditions), names(.data))] <- NA + pm_context$setup(.data) + on.exit(pm_context$clean(), add = TRUE) + for (i in seq_along(conditions)) { + pm_context$.data[, names(conditions)[i]] <- eval(conditions[[i]], envir = pm_context$.data) + } + pm_context$.data +} + +pm_mutate.grouped_data <- function(.data, ...) { + rows <- rownames(.data) + res <- pm_apply_grouped_function("pm_mutate", .data, drop = TRUE, ...) + res[rows, ] +} +pm_n_distinct <- function(..., na.rm = FALSE) { + res <- c(...) + if (is.list(res)) return(nrow(unique(as.data.frame(res, stringsAsFactors = FALSE)))) + if (isTRUE(na.rm)) res <- res[!is.na(res)] + length(unique(res)) +} +pm_na_if <- function(x, y) { + y_len <- length(y) + x_len <- length(x) + if (!(y_len %in% c(1L, x_len))) stop("`y` must be length ", x_len, " (same as `x`) or 1, not ", y_len) + x[x == y] <- NA + x +} +pm_near <- function(x, y, tol = .Machine$double.eps^0.5) { + abs(x - y) < tol +} +`%pm>%` <- function(lhs, rhs) { + lhs <- substitute(lhs) + rhs <- substitute(rhs) + eval(as.call(c(rhs[[1L]], lhs, as.list(rhs[-1L]))), envir = parent.frame()) +} +pm_pull <- function(.data, var = -1) { + var_deparse <- pm_deparse_var(var) + col_names <- colnames(.data) + if (!(var_deparse %in% col_names) & grepl("^[[:digit:]]+L|[[:digit:]]", var_deparse)) { + var <- as.integer(gsub("L", "", var_deparse)) + var <- pm_if_else(var < 1L, rev(col_names)[abs(var)], col_names[var]) + } else if (var_deparse %in% col_names) { + var <- var_deparse + } + .data[, var] +} +pm_recode <- function(.x, ..., .default = NULL, .missing = NULL) { + if ("grouped_data" %in% class(.data)) { + pm_recode.grouped_data(.data, ...) + } else { + pm_recode.default(.data, ...) + } +} + +pm_recode.numeric <- function(.x, ..., .default = NULL, .missing = NULL) { + values <- pm_dotdotdot(...) + + nms <- pm_have_name(values) + if (all(nms)) { + vals <- as.double(names(values)) + } else if (all(!nms)) { + vals <- seq_along(values) + } else { + stop("Either all values must be named, or none must be named.") + } + + pm_n <- length(.x) + template <- pm_find_template(values, .default, .missing) + res <- template[rep(NA_integer_, pm_n)] + replaced <- rep(FALSE, pm_n) + + for (i in seq_along(values)) { + res <- pm_replace_with(res, .x == vals[i], values[[i]], paste0("Vector ", i)) + replaced[.x == vals[i]] <- TRUE + } + + .default <- pm_validate_recode_default(.default, .x, res, replaced) + res <- pm_replace_with(res, !replaced & !is.na(.x), .default, "`.default`") + res <- pm_replace_with(res, is.na(.x), .missing, "`.missing`") + res +} + +pm_recode.character <- function(.x, ..., .default = NULL, .missing = NULL) { + .x <- as.character(.x) + values <- pm_dotdotdot(...) + val_names <- names(values) + have_names <- pm_have_name(values) + if (!all(have_names)) { + bad <- which(!have_names) + 1L + stop("Argument", if (length(bad) > 1L) "s", " ", paste(bad, sep = ", "), " must be named, not unnamed.") + } + + pm_n <- length(.x) + template <- pm_find_template(values, .default, .missing) + res <- template[rep(NA_integer_, pm_n)] + replaced <- rep(FALSE, pm_n) + + for (nm in val_names) { + res <- pm_replace_with(res, .x == nm, values[[nm]], paste0("`", nm, "`")) + replaced[.x == nm] <- TRUE + } + + .default <- pm_validate_recode_default(.default, .x, res, replaced) + res <- pm_replace_with(res, !replaced & !is.na(.x), .default, "`.default`") + res <- pm_replace_with(res, is.na(.x), .missing, "`.missing`") + res +} + +pm_recode.factor <- function(.x, ..., .default = NULL, .missing = NULL) { + values <- pm_dotdotdot(...) + if (length(values) == 0) stop("No replacements provided.") + + have_names <- pm_have_name(values) + if (!all(have_names)) { + bad <- which(!have_names) + 1 + stop(bad, " must be named, not unnamed.") + } + if (!is.null(.missing)) { + stop("`.missing` is not supported for factors.") + } + + pm_n <- length(levels(.x)) + template <- pm_find_template(values, .default, .missing) + res <- template[rep(NA_integer_, pm_n)] + replaced <- rep(FALSE, pm_n) + + for (nm in names(values)) { + res <- pm_replace_with(res, levels(.x) == nm, values[[nm]], paste0("`", nm, "`")) + replaced[levels(.x) == nm] <- TRUE + } + .default <- pm_validate_recode_default(.default, .x, res, replaced) + res <- pm_replace_with(res, !replaced, .default, "`.default`") + + if (is.character(res)) { + levels(.x) <- res + .x + } else { + res[as.integer(.x)] + } +} + +pm_have_name <- function(x) { + nms <- names(x) + if (is.null(nms)) rep(FALSE, length(x)) else !(nms == "" | is.na(nms)) +} + +pm_compact <- function(.x) Filter(length, .x) + +pm_find_template <- function(values, .default = NULL, .missing = NULL) { + x <- pm_compact(c(values, .default, .missing)) + if (length(x) == 0L) { + stop("No replacements provided.") + } + x[[1]] +} + +pm_validate_recode_default <- function(default, x, res, replaced) { + default <- pm_recode_default(x, default, res) + if (is.null(default) && sum(replaced & !is.na(x)) < length(res[!is.na(x)])) { + warning( + "Unreplaced values treated as NA as .x is not compatible. ", + "Please specify replacements exhaustively or supply .default", + call. = FALSE + ) + } + default +} + +pm_recode_default <- function(x, default, res) { + if ("grouped_data" %in% class(.data)) { + pm_recode_default.grouped_data(.data, ...) + } else { + pm_recode_default.default(.data, ...) + } +} + +pm_recode_default.default <- function(x, default, res) { + same_type <- identical(typeof(x), typeof(res)) + if (is.null(default) && same_type) x else default +} + +pm_recode_default.factor <- function(x, default, res) { + if (is.null(default)) { + if ((is.character(res) || is.factor(res)) && is.factor(x)) { + levels(x) + } else { + res[NA_integer_] + } + } else { + default + } +} + +pm_recode_factor <- function(.x, ..., .default = NULL, .missing = NULL, .ordered = FALSE) { + recoded <- pm_recode(.x, ..., .default = .default, .missing = .missing) + + values <- pm_dotdotdot(...) + all_levels <- unique(c(values, pm_recode_default(.x, .default, recoded), .missing)) + recoded_levels <- if (is.factor(recoded)) levels(recoded) else unique(recoded) + levels <- intersect(all_levels, recoded_levels) + + factor(recoded, levels, ordered = .ordered) +} +pm_set_names <- function(object = nm, nm) { + names(object) <- nm + object +} + +pm_vec_head <- function(x, pm_n = 6L, ...) { + stopifnot(length(pm_n) == 1L) + pm_n <- if (pm_n < 0L) max(length(x) + pm_n, 0L) else min(pm_n, length(x)) + x[seq_len(pm_n)] +} +pm_relocate <- function(.data, ..., .before = NULL, .after = NULL) { + pm_check_is_dataframe(.data) + data_names <- colnames(.data) + col_pos <- pm_select_positions(.data, ...) + + .before <- pm_deparse_var(.before) + .after <- pm_deparse_var(.after) + has_before <- !is.null(.before) + has_after <- !is.null(.after) + + if (has_before && has_after) { + stop("You must supply only one of `.before` and `.after`") + } else if (has_before) { + pm_where <- min(match(.before, data_names)) + col_pos <- c(setdiff(col_pos, pm_where), pm_where) + } else if (has_after) { + pm_where <- max(match(.after, data_names)) + col_pos <- c(pm_where, setdiff(col_pos, pm_where)) + } else { + pm_where <- 1L + col_pos <- union(col_pos, pm_where) + } + lhs <- setdiff(seq(1L, pm_where - 1L), col_pos) + rhs <- setdiff(seq(pm_where + 1L, ncol(.data)), col_pos) + col_pos <- unique(c(lhs, col_pos, rhs)) + col_pos <- col_pos[col_pos <= length(data_names)] + + res <- .data[col_pos] + if (pm_has_groups(.data)) res <- pm_set_groups(res, pm_get_groups(.data)) + res +} +pm_rename <- function(.data, ...) { + pm_check_is_dataframe(.data) + new_names <- names(pm_deparse_dots(...)) + if (length(new_names) == 0L) { + warning("You didn't give any new names") + return(.data) + } + col_pos <- pm_select_positions(.data, ...) + old_names <- colnames(.data)[col_pos] + new_names_zero <- nchar(new_names) == 0L + if (any(new_names_zero)) { + warning("You didn't provide new names for: ", paste0("`", old_names[new_names_zero], collapse = ", "), "`") + new_names[new_names_zero] <- old_names[new_names_zero] + } + colnames(.data)[col_pos] <- new_names + .data +} + +pm_rename_with <- function(.data, .fn, .cols = pm_everything(), ...) { + if (!is.function(.fn)) stop("`", .fn, "` is not a valid function") + grouped <- inherits(.data, "grouped_data") + if (grouped) grp_pos <- which(colnames(.data) %in% pm_group_vars(.data)) + col_pos <- eval(substitute(pm_select_positions(.data, .cols))) + cols <- colnames(.data)[col_pos] + new_cols <- .fn(cols, ...) + if (any(duplicated(new_cols))) { + stop("New names must be unique however `", deparse(substitute(.fn)), "` returns duplicate column names") + } + colnames(.data)[col_pos] <- new_cols + if (grouped) .data <- pm_set_groups(.data, colnames(.data)[grp_pos]) + .data +} +pm_replace_na <- function(data, replace, ...) { + if ("grouped_data" %in% class(.data)) { + pm_replace_na.grouped_data(.data, ...) + } else { + pm_replace_na.default(.data, ...) + } +} + +pm_replace_na.default <- function(data, replace = NA, ...) { + pm_check_replacement(replace, deparse(substitute(data))) + data[is.na(data)] <- replace + data +} + +pm_replace_na.data.frame <- function(data, replace = list(), ...) { + stopifnot(is.list(replace)) + replace_vars <- intersect(names(replace), names(data)) + for (var in replace_vars) { + pm_check_replacement(replace[[var]], var) + data[[var]][is.na(data[[var]])] <- replace[[var]] + } + data +} + +pm_check_replacement <- function(x, var) { + pm_n <- length(x) + if (pm_n == 1L) return() + stop("Replacement for `", var, "` is length ", pm_n, ", not length 1") +} +pm_replace_with <- function(x, i, val, arg_name) { + if (is.null(val)) return(x) + pm_check_length(val, x, arg_name) + pm_check_type(val, x, arg_name) + pm_check_class(val, x, arg_name) + i[is.na(i)] <- FALSE + if (length(val) == 1L) { + x[i] <- val + } + else { + x[i] <- val[i] + } + x +} + +pm_check_length <- function(x, y, arg_name) { + length_x <- length(x) + length_y <- length(y) + if (all(length_x %in% c(1L, length_y))) return() + if (length_y == 1) { + stop(arg_name, " must be length 1, not ", paste(length_x, sep = ", ")) + } else { + stop(arg_name, " must be length ", length_y, " or 1, not ", length_x) + } +} + +pm_check_type <- function(x, y, arg_name) { + x_type <- typeof(x) + y_type <- typeof(y) + if (identical(x_type, y_type)) return() + stop(arg_name, " must be `", y_type, "`, not `", x_type, "`") +} + +pm_check_class <- function(x, y, arg_name) { + if (!is.object(x)) return() + exp_classes <- class(y) + out_classes <- class(x) + if (identical(out_classes, exp_classes)) return() + stop(arg_name, " must have class `", exp_classes, "`, not class `", out_classes, "`") +} +pm_rownames_to_column <- function(.data, var = "rowname") { + pm_check_is_dataframe(.data) + col_names <- colnames(.data) + if (var %in% col_names) stop("Column `", var, "` already exists in `.data`") + .data[, var] <- rownames(.data) + rownames(.data) <- NULL + .data[, c(var, setdiff(col_names, var))] +} + +pm_select <- function(.data, ...) { + col_pos <- pm_select_positions(.data, ..., .group_pos = TRUE) + map_names <- names(col_pos) + map_names_length <- nchar(map_names) + if (any(map_names_length == 0L)) { + no_new_names <- which(map_names_length == 0L) + map_names[no_new_names] <- colnames(.data)[no_new_names] + } + res <- .data[, col_pos, drop = FALSE] + if (!is.null(map_names) && all(col_pos > 0L)) colnames(res) <- map_names + if (pm_has_groups(.data)) res <- pm_set_groups(res, pm_get_groups(.data)) + res +} +pm_starts_with <- function(match, ignore.case = TRUE, vars = pm_peek_vars()) { + grep(pattern = paste0("^", paste0(match, collapse = "|^")), x = vars, ignore.case = ignore.case) +} + +pm_ends_with <- function(match, ignore.case = TRUE, vars = pm_peek_vars()) { + grep(pattern = paste0(paste0(match, collapse = "$|"), "$"), x = vars, ignore.case = ignore.case) +} + +pm_contains <- function(match, ignore.case = TRUE, vars = pm_peek_vars()) { + pm_matches <- lapply( + match, + function(x) { + if (isTRUE(ignore.case)) { + match_u <- toupper(x) + match_l <- tolower(x) + pos_u <- grep(pattern = match_u, x = toupper(vars), fixed = TRUE) + pos_l <- grep(pattern = match_l, x = tolower(vars), fixed = TRUE) + unique(c(pos_l, pos_u)) + } else { + grep(pattern = x, x = vars, fixed = TRUE) + } + } + ) + unique(unlist(pm_matches)) +} + +pm_matches <- function(match, ignore.case = TRUE, perl = FALSE, vars = pm_peek_vars()) { + grep(pattern = match, x = vars, ignore.case = ignore.case, perl = perl) +} + +pm_num_range <- function(prefix, range, width = NULL, vars = pm_peek_vars()) { + if (!is.null(width)) { + range <- sprintf(paste0("%0", width, "d"), range) + } + find <- paste0(prefix, range) + if (any(duplicated(vars))) { + stop("Column names must be unique") + } else { + x <- match(find, vars) + x[!is.na(x)] + } +} + +pm_all_of <- function(x, vars = pm_peek_vars()) { + x_ <- !x %in% vars + if (any(x_)) { + which_x_ <- which(x_) + if (length(which_x_) == 1L) { + stop("The column ", x[which_x_], " does not exist.") + } else { + stop("The columns ", paste(x[which_x_], collapse = ", "), " do not exist.") + } + } else { + which(vars %in% x) + } +} + +pm_any_of <- function(x, vars = pm_peek_vars()) { + which(vars %in% x) +} + +pm_everything <- function(vars = pm_peek_vars()) { + seq_along(vars) +} + +pm_last_col <- function(offset = 0L, vars = pm_peek_vars()) { + if (!pm_is_wholenumber(offset)) stop("`offset` must be an integer") + pm_n <- length(vars) + if (offset && pm_n <= offset) { + stop("`offset` must be smaller than the number of `vars`") + } else if (pm_n == 0) { + stop("Can't pm_select last column when `vars` is empty") + } else { + pm_n - offset + } +} + +pm_peek_vars <- function() { + pm_select_env$get_colnames() +} +pm_select_positions <- function(.data, ..., .group_pos = FALSE) { + cols <- pm_dotdotdot(...) + pm_select_env$setup(.data = .data, calling_frame = parent.frame(2L)) + on.exit(pm_select_env$clean(), add = TRUE) + data_names <- pm_select_env$get_colnames() + pos <- unlist(lapply(cols, pm_eval_expr)) + col_len <- pm_select_env$get_ncol() + if (any(pos > col_len)) { + oor <- pos[which(pos > col_len)] + oor_len <- length(oor) + stop( + "Location", if (oor_len > 1) "s " else " ", pm_collapse_to_sentence(oor), + if (oor_len > 1) " don't " else " doesn't ", "exist. There are only ", col_len, " columns." + ) + } + if (isTRUE(.group_pos)) { + pm_groups <- pm_get_groups(.data) + missing_groups <- !(pm_groups %in% cols) + if (any(missing_groups)) { + sel_missing <- pm_groups[missing_groups] + message("Adding missing grouping variables: `", paste(sel_missing, collapse = "`, `"), "`") + readd <- match(sel_missing, data_names) + if (length(names(cols)) > 0L) names(readd) <- data_names[readd] + pos <- c(readd, pos) + } + } + pos[!duplicated(pos)] +} + +pm_eval_expr <- function(x) { + type <- typeof(x) + switch( + type, + "integer" = x, + "double" = as.integer(x), + "character" = pm_select_char(x), + "symbol" = pm_select_symbol(x), + "language" = pm_eval_call(x), + stop("Expressions of type <", typeof(x), "> cannot be evaluated for use when subsetting.") + ) +} + +pm_select_char <- function(expr) { + pos <- match(expr, pm_select_env$get_colnames()) + if (is.na(pos)) stop("Column `", expr, "` does not exist") + pos +} + +pm_select_symbol <- function(expr) { + expr_name <- as.character(expr) + if (grepl("^is\\.", expr_name) && pm_is_function(expr)) { + stop( + "Predicate functions must be wrapped in `pm_where()`.\n\n", + sprintf(" data %%pm>%% pm_select(pm_where(%s))", expr_name) + ) + } + res <- try(pm_select_char(as.character(expr)), silent = TRUE) + if (inherits(res, "try-error")) { + res <- tryCatch( + unlist(lapply(eval(expr, envir = pm_select_env$calling_frame), pm_eval_expr)), + error = function(e) stop("Column ", expr, " does not exist.") + ) + } + res +} + +pm_eval_call <- function(x) { + type <- as.character(x[[1]]) + switch( + type, + `:` = pm_select_seq(x), + `!` = pm_select_negate(x), + `-` = pm_select_minus(x), + `c` = pm_select_c(x), + `(` = pm_select_bracket(x), + pm_select_pm_context(x) + ) +} + +pm_select_seq <- function(expr) { + x <- pm_eval_expr(expr[[2]]) + y <- pm_eval_expr(expr[[3]]) + x:y +} + +pm_select_negate <- function(expr) { + x <- if (pm_is_negated_colon(expr)) { + expr <- call(":", expr[[2]][[2]], expr[[2]][[3]][[2]]) + pm_eval_expr(expr) + } else { + pm_eval_expr(expr[[2]]) + } + x * -1L +} + +pm_is_negated_colon <- function(expr) { + expr[[1]] == "!" && length(expr[[2]]) > 1L && expr[[2]][[1]] == ":" && expr[[2]][[3]][[1]] == "!" +} + +pm_select_minus <- function(expr) { + x <- pm_eval_expr(expr[[2]]) + x * -1L +} + +pm_select_c <- function(expr) { + lst_expr <- as.list(expr) + lst_expr[[1]] <- NULL + unlist(lapply(lst_expr, pm_eval_expr)) +} + +pm_select_bracket <- function(expr) { + pm_eval_expr(expr[[2]]) +} + +pm_select_pm_context <- function(expr) { + eval(expr, envir = pm_select_env$.data) +} + +pm_select_env <- new.env() +pm_select_env$setup <- function(.data, calling_frame) { + pm_select_env$.data <- .data + pm_select_env$calling_frame <- calling_frame +} +pm_select_env$clean <- function() { + rm(list = c(".data", "calling_frame"), envir = pm_select_env) +} +pm_select_env$get_colnames <- function() colnames(pm_select_env$.data) +pm_select_env$get_nrow <- function() nrow(pm_select_env$.data) +pm_select_env$get_ncol <- function() ncol(pm_select_env$.data) + +pm_slice <- function(.data, ...) { + pm_check_is_dataframe(.data) + if ("grouped_data" %in% class(.data)) { + pm_slice.grouped_data(.data, ...) + } else { + pm_slice.default(.data, ...) + } +} + +pm_slice.data.frame <- function(.data, ...) { + if (nrow(.data) == 0L) return(.data) + pos <- pm_slice_positions(.data, ...) + .data[pos, , drop = FALSE] +} + +pm_slice.grouped_data <- function(.data, ...) { + pm_apply_grouped_function("pm_slice", .data, drop = TRUE, ...) +} + + +pm_slice_head <- function(.data, ..., pm_n, prop) { + if ("grouped_data" %in% class(.data)) { + pm_slice_head.grouped_data(.data, ...) + } else { + pm_slice_head.default(.data, ...) + } +} + +pm_slice_head.data.frame <- function(.data, ..., pm_n, prop) { + size <- pm_check_slice_size(pm_n, prop) + idx <- switch( + size$type, + pm_n = function(pm_n) seq2(1, min(size$pm_n, pm_n)), + prop = function(pm_n) seq2(1, min(size$prop * pm_n, pm_n)) + ) + pm_slice(.data, idx(AMR:::pm_n())) +} + +pm_slice_head.grouped_data <- function(.data, ..., pm_n, prop) { + pm_apply_grouped_function("pm_slice_head", .data, drop = TRUE, pm_n = pm_n, prop = prop, ...) +} + +pm_slice_tail <- function(.data, ..., pm_n, prop) { + if ("grouped_data" %in% class(.data)) { + pm_slice_tail.grouped_data(.data, ...) + } else { + pm_slice_tail.default(.data, ...) + } +} + +pm_slice_tail.data.frame <- function(.data, ..., pm_n, prop) { + size <- pm_check_slice_size(pm_n, prop) + idx <- switch( + size$type, + pm_n = function(pm_n) seq2(max(pm_n - size$pm_n + 1, 1), pm_n), + prop = function(pm_n) seq2(max(ceiling(pm_n - size$prop * pm_n) + 1, 1), pm_n) + ) + pm_slice(.data, idx(AMR:::pm_n())) +} + +pm_slice_tail.grouped_data <- function(.data, ..., pm_n, prop) { + pm_apply_grouped_function("pm_slice_tail", .data, drop = TRUE, pm_n = pm_n, prop = prop, ...) +} + +pm_slice_min <- function(.data, order_by, ..., pm_n, prop, with_ties = TRUE) { + if ("grouped_data" %in% class(.data)) { + pm_slice_min.grouped_data(.data, ...) + } else { + pm_slice_min.default(.data, ...) + } +} + +pm_slice_min.data.frame <- function(.data, order_by, ..., pm_n, prop, with_ties = TRUE) { + if (missing(order_by)) stop("argument `order_by` is missing, with no default.") + + size <- pm_check_slice_size(pm_n, prop) + idx <- if (isTRUE(with_ties)) { + switch( + size$type, + pm_n = function(x, pm_n) pm_vec_head(order(x), pm_smaller_ranks(x, size$pm_n)), + prop = function(x, pm_n) pm_vec_head(order(x), pm_smaller_ranks(x, size$prop * pm_n)) + ) + } else { + switch( + size$type, + pm_n = function(x, pm_n) pm_vec_head(order(x), size$pm_n), + prop = function(x, pm_n) pm_vec_head(order(x), size$prop * pm_n) + ) + } + order_by <- .data[, pm_deparse_var(order_by)] + pm_slice(.data, idx(order_by, AMR:::pm_n())) +} + +pm_slice_min.grouped_data <- function(.data, order_by, ..., pm_n, prop, with_ties = TRUE) { + pm_eval_env$env <- environment() + on.exit(rm(list = "env", envir = pm_eval_env), add = TRUE) + pm_apply_grouped_function( + "pm_slice_min", .data, drop = TRUE, order_by = order_by, pm_n = pm_n, prop = prop, with_ties = with_ties, ... + ) +} + +pm_slice_max <- function(.data, order_by, ..., pm_n, prop, with_ties = TRUE) { + if ("grouped_data" %in% class(.data)) { + pm_slice_max.grouped_data(.data, ...) + } else { + pm_slice_max.default(.data, ...) + } +} + +pm_slice_max.data.frame <- function(.data, order_by, ..., pm_n, prop, with_ties = TRUE) { + if (missing(order_by)) stop("argument `order_by` is missing, with no default.") + + size <- pm_check_slice_size(pm_n, prop) + idx <- if (isTRUE(with_ties)) { + switch( + size$type, + pm_n = function(x, pm_n) pm_vec_head(order(x, decreasing = TRUE), pm_smaller_ranks(pm_desc(x), size$pm_n)), + prop = function(x, pm_n) pm_vec_head(order(x, decreasing = TRUE), pm_smaller_ranks(pm_desc(x), size$prop * pm_n)) + ) + } else { + switch( + size$type, + pm_n = function(x, pm_n) pm_vec_head(order(x, decreasing = TRUE), size$pm_n), + prop = function(x, pm_n) pm_vec_head(order(x, decreasing = TRUE), size$prop * pm_n) + ) + } + order_by <- .data[, pm_deparse_var(order_by)] + pm_slice(.data, idx(order_by, AMR:::pm_n())) +} + +pm_slice_max.grouped_data <- function(.data, order_by, ..., pm_n, prop, with_ties = TRUE) { + pm_eval_env$env <- environment() + on.exit(rm(list = "env", envir = pm_eval_env), add = TRUE) + pm_apply_grouped_function( + "pm_slice_max", .data, drop = TRUE, order_by = order_by, pm_n = pm_n, prop = prop, with_ties = with_ties, ... + ) +} + +pm_slice_sample <- function(.data, ..., pm_n, prop, weight_by = NULL, replace = FALSE) { + if ("grouped_data" %in% class(.data)) { + pm_slice_sample.grouped_data(.data, ...) + } else { + pm_slice_sample.default(.data, ...) + } +} + +pm_slice_sample.data.frame <- function(.data, ..., pm_n, prop, weight_by = NULL, replace = FALSE) { + size <- pm_check_slice_size(pm_n, prop) + idx <- switch( + size$type, + pm_n = function(x, pm_n) pm_sample_int(pm_n, size$pm_n, replace = replace, wt = x), + prop = function(x, pm_n) pm_sample_int(pm_n, size$prop * pm_n, replace = replace, wt = x), + ) + weight_by <- pm_deparse_var(weight_by) + if (!is.null(weight_by)) weight_by <- .data[, weight_by] + pm_slice(.data, idx(weight_by, AMR:::pm_n())) +} + +pm_slice_sample.grouped_data <- function(.data, ..., pm_n, prop, weight_by = NULL, replace = FALSE) { + pm_eval_env$env <- environment() + on.exit(rm(list = "env", envir = pm_eval_env), add = TRUE) + pm_apply_grouped_function( + "pm_slice_sample", .data, drop = TRUE, pm_n = pm_n, prop = prop, weight_by = weight_by, replace = replace, ... + ) +} + +# helpers ---------------------------------------------------------------------- + +pm_slice_positions <- function(.data, ...) { + conditions <- pm_dotdotdot(...) + pm_context$setup(.data) + on.exit(pm_context$clean(), add = TRUE) + if (length(conditions) == 0L) return(seq_len(pm_n())) + + frame <- parent.frame(2L) + rows <- lapply( + conditions, + function(cond, frame) { + res <- eval(cond, pm_context$.data, frame) + if (is.logical(res) && all(is.na(res))) { + res <- integer() + } else if (is.numeric(res)) { + res <- as.integer(res) + } else if (!is.integer(res)) { + stop("`pm_slice()` expressions should return indices (positive or negative integers).") + } + }, + frame = frame + ) + rows <- do.call(c, rows) + if (length(rows) == 0L) { + # do nothing + } else if (all(rows >= 0, na.rm = TRUE)) { + rows <- rows[!is.na(rows) & rows <= pm_n() & rows > 0] + } else if (all(rows <= 0, na.rm = TRUE)) { + rows <- setdiff(seq_len(pm_n()), -rows) + } else { + stop("`pm_slice()` expressions should return either all positive or all negative.") + } + rows +} + +pm_check_slice_size <- function(pm_n, prop) { + if (missing(pm_n) && missing(prop)) { + list(type = "pm_n", pm_n = 1L) + } else if (!missing(pm_n) && missing(prop)) { + if (!is.numeric(pm_n) || length(pm_n) != 1) { + stop("`pm_n` must be a single number.") + } + if (is.na(pm_n) || pm_n < 0) { + stop("`pm_n` must be a non-missing positive number.") + } + + list(type = "pm_n", pm_n = pm_n) + } else if (!missing(prop) && missing(pm_n)) { + if (!is.numeric(prop) || length(prop) != 1) { + stop("`prop` must be a single number.") + } + if (is.na(prop) || prop < 0) { + stop("`prop` must be a non-missing positive number.") + } + list(type = "prop", prop = prop) + } else { + stop("Must supply exactly one of `pm_n` and `prop` arguments.") + } +} + +pm_sample_int <- function(pm_n, size, replace = FALSE, wt = NULL) { + if (isTRUE(replace)) { + sample.int(pm_n, size, prob = wt, replace = TRUE) + } else { + sample.int(pm_n, min(size, pm_n), prob = wt) + } +} + +pm_smaller_ranks <- function(x, y) { + sum(pm_min_rank(x) <= y, na.rm = TRUE) +} +pm_summarise <- function(.data, ...) { + pm_check_is_dataframe(.data) + if ("grouped_data" %in% class(.data)) { + pm_summarise.grouped_data(.data, ...) + } else { + pm_summarise.default(.data, ...) + } +} + +pm_summarise.default <- function(.data, ...) { + fns <- pm_dotdotdot(...) + pm_context$setup(.data) + on.exit(pm_context$clean(), add = TRUE) + pm_groups_exist <- pm_has_groups(pm_context$.data) + if (pm_groups_exist) { + group <- unique(pm_context$.data[, pm_get_groups(pm_context$.data), drop = FALSE]) + } + res <- lapply( + fns, + function(x) { + x_res <- do.call(with, list(pm_context$.data, x)) + if (is.list(x_res)) I(x_res) else x_res + } + ) + res <- as.data.frame(res) + fn_names <- names(fns) + colnames(res) <- if (is.null(fn_names)) fns else fn_names + if (pm_groups_exist) res <- cbind(group, res, row.names = NULL) + res +} + +pm_summarise.grouped_data <- function(.data, ...) { + pm_groups <- pm_get_groups(.data) + res <- pm_apply_grouped_function("pm_summarise", .data, drop = TRUE, ...) + res <- res[do.call(order, lapply(pm_groups, function(x) res[, x])), ] + rownames(res) <- NULL + res +} + +pm_transmute <- function(.data, ...) { + pm_check_is_dataframe(.data) + if ("grouped_data" %in% class(.data)) { + pm_transmute.grouped_data(.data, ...) + } else { + pm_transmute.default(.data, ...) + } +} + +pm_transmute.default <- function(.data, ...) { + conditions <- pm_deparse_dots(...) + mutated <- pm_mutate(.data, ...) + mutated[, names(conditions), drop = FALSE] +} + +pm_transmute.grouped_data <- function(.data, ...) { + rows <- rownames(.data) + res <- pm_apply_grouped_function("pm_transmute", .data, drop = TRUE, ...) + res[rows, ] +} +pm_dotdotdot <- function(..., .impute_names = FALSE) { + dots <- eval(substitute(alist(...))) + if (isTRUE(.impute_names)) { + pm_deparse_dots <- lapply(dots, deparse) + names_dots <- names(dots) + unnamed <- if (is.null(names_dots)) rep(TRUE, length(dots)) else nchar(names_dots) == 0L + names(dots)[unnamed] <- pm_deparse_dots[unnamed] + } + dots +} + +pm_deparse_dots <- function(...) { + vapply(substitute(...()), deparse, NA_character_) +} + +pm_deparse_var <- function(var, frame = if (is.null(pm_eval_env$env)) parent.frame() else pm_eval_env$env) { + sub_var <- eval(substitute(substitute(var)), frame) + if (is.symbol(sub_var)) var <- as.character(sub_var) + var +} + +pm_check_is_dataframe <- function(.data) { + parent_fn <- all.names(sys.call(-1L), max.names = 1L) + if (!is.data.frame(.data)) stop(parent_fn, " must be given a data.frame") + invisible() +} + +pm_is_wholenumber <- function(x) { + x %% 1L == 0L +} + +pm_seq2 <- function (from, to) { + if (length(from) != 1) stop("`from` must be length one") + if (length(to) != 1) stop("`to` must be length one") + if (from > to) integer() else seq.int(from, to) +} + +pm_is_function <- function(x, frame) { + res <- tryCatch( + is.function(x), + warning = function(w) FALSE, + error = function(e) FALSE + ) + if (isTRUE(res)) return(res) + res <- tryCatch( + is.function(eval(x)), + warning = function(w) FALSE, + error = function(e) FALSE + ) + if (isTRUE(res)) return(res) + res <- tryCatch( + is.function(eval(as.symbol(deparse(substitute(x))))), + warning = function(w) FALSE, + error = function(e) FALSE + ) + if (isTRUE(res)) return(res) + FALSE +} + +pm_collapse_to_sentence <- function(x) { + len_x <- length(x) + if (len_x == 0L) { + stop("Length of `x` is 0") + } else if (len_x == 1L) { + as.character(x) + } else if (len_x == 2L) { + paste(x, collapse = " and ") + } else { + paste(paste(x[1:(len_x - 1)], collapse = ", "), x[len_x], sep = " and ") + } +} +pm_where <- function(fn) { + if (!pm_is_function(fn)) { + stop(pm_deparse_var(fn), " is not a valid predicate function.") + } + preds <- unlist(lapply( + pm_select_env$.data, + function(x, fn) { + do.call("fn", list(x)) + }, + fn + )) + if (!is.logical(preds)) stop("`pm_where()` must be used with functions that return `TRUE` or `FALSE`.") + data_cols <- pm_select_env$get_colnames() + cols <- data_cols[preds] + which(data_cols %in% cols) +} + +pm_cume_dist <- function(x) { + rank(x, ties.method = "max", na.last = "keep") / sum(!is.na(x)) +} + +pm_dense_rank <- function(x) { + match(x, sort(unique(x))) +} + +pm_min_rank <- function(x) { + rank(x, ties.method = "min", na.last = "keep") +} + +pm_ntile <- function(x = pm_row_number(), pm_n) { + if (!missing(x)) x <- pm_row_number(x) + len <- length(x) - sum(is.na(x)) + pm_n <- as.integer(floor(pm_n)) + if (len == 0L) { + rep(NA_integer_, length(x)) + } else { + pm_n_larger <- as.integer(len %% pm_n) + pm_n_smaller <- as.integer(pm_n - pm_n_larger) + size <- len / pm_n + larger_size <- as.integer(ceiling(size)) + smaller_size <- as.integer(floor(size)) + larger_threshold <- larger_size * pm_n_larger + bins <- pm_if_else( + x <= larger_threshold, + (x + (larger_size - 1L)) / larger_size, + (x + (-larger_threshold + smaller_size - 1L)) / smaller_size + pm_n_larger + ) + as.integer(floor(bins)) + } +} + +pm_percent_rank <- function(x) { + (pm_min_rank(x) - 1) / (sum(!is.na(x)) - 1) +} + +pm_row_number <- function(x) { + if (missing(x)) seq_len(pm_n()) else rank(x, ties.method = "first", na.last = "keep") +} diff --git a/R/ab.R b/R/ab.R index ae1d0153e..1bc1ea9d4 100755 --- a/R/ab.R +++ b/R/ab.R @@ -47,9 +47,9 @@ #' #' European Commission Public Health PHARMACEUTICALS - COMMUNITY REGISTER: \url{http://ec.europa.eu/health/documents/community-register/html/atc.htm} #' @aliases ab -#' @return Character (vector) with class [`ab`]. Unknown values will return `NA`. +#' @return A [character] [vector] with additional class [`ab`] #' @seealso -#' * [antibiotics] for the dataframe that is being used to determine ATCs +#' * [antibiotics] for the [data.frame] that is being used to determine ATCs #' * [ab_from_text()] for a function to retrieve antimicrobial drugs from clinical text (from health care records) #' @inheritSection AMR Reference data publicly available #' @inheritSection AMR Read more on our website! @@ -101,23 +101,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) { x <- gsub('"', "", x, fixed = TRUE) x_bak_clean <- x if (already_regex == FALSE) { - # remove suffices - x_bak_clean <- gsub("_(MIC|RSI|DIS[CK])$", "", x_bak_clean) - # remove disk concentrations, like LVX_NM -> LVX - x_bak_clean <- gsub("_[A-Z]{2}[0-9_.]{0,3}$", "", x_bak_clean) - # remove part between brackets if that's followed by another string - x_bak_clean <- gsub("(.*)+ [(].*[)]", "\\1", x_bak_clean) - # keep only max 1 space - x_bak_clean <- trimws(gsub(" +", " ", x_bak_clean)) - # non-character, space or number should be a slash - x_bak_clean <- gsub("[^A-Z0-9 -]", "/", x_bak_clean) - # spaces around non-characters must be removed: amox + clav -> amox/clav - x_bak_clean <- gsub("(.*[A-Z0-9]) ([^A-Z0-9].*)", "\\1\\2", x_bak_clean) - x_bak_clean <- gsub("(.*[^A-Z0-9]) ([A-Z0-9].*)", "\\1\\2", x_bak_clean) - # remove hyphen after a starting "co" - x_bak_clean <- gsub("^CO-", "CO", x_bak_clean) - # replace text 'and' with a slash - x_bak_clean <- gsub(" AND ", "/", x_bak_clean) + x_bak_clean <- generalise_antibiotic_name(x_bak_clean) } x <- unique(x_bak_clean) @@ -133,7 +117,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) { } if (initial_search == TRUE) { - progress <- progress_estimated(n = length(x), n_min = ifelse(isTRUE(info), 25, length(x) + 1)) # start if n >= 25 + progress <- progress_ticker(n = length(x), n_min = ifelse(isTRUE(info), 25, length(x) + 1)) # start if n >= 25 on.exit(close(progress)) } @@ -161,7 +145,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) { } # exact name - found <- antibiotics[which(toupper(antibiotics$name) == x[i]), ]$ab + found <- antibiotics[which(AB_lookup$generalised_name == x[i]), ]$ab if (length(found) > 0) { x_new[i] <- found[1L] next @@ -189,8 +173,8 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) { } # exact LOINC code - loinc_found <- unlist(lapply(antibiotics$loinc, - function(s) x[i] %in% s)) + loinc_found <- unlist(lapply(AB_lookup$generalised_loinc, + function(s) generalise_antibiotic_name(x[i]) %in% s)) found <- antibiotics$ab[loinc_found == TRUE] if (length(found) > 0) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) @@ -198,8 +182,8 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) { } # exact synonym - synonym_found <- unlist(lapply(antibiotics$synonyms, - function(s) x[i] %in% toupper(s))) + synonym_found <- unlist(lapply(AB_lookup$generalised_synonyms, + function(s) generalise_antibiotic_name(x[i]) %in% s)) found <- antibiotics$ab[synonym_found == TRUE] if (length(found) > 0) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) @@ -207,8 +191,8 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) { } # exact abbreviation - abbr_found <- unlist(lapply(antibiotics$abbreviations, - function(a) x[i] %in% toupper(a))) + abbr_found <- unlist(lapply(AB_lookup$generalised_abbreviations, + function(s) generalise_antibiotic_name(x[i]) %in% s)) found <- antibiotics$ab[abbr_found == TRUE] if (length(found) > 0) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) @@ -246,21 +230,21 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) { } # try if name starts with it - found <- antibiotics[which(antibiotics$name %like% paste0("^", x_spelling)), ]$ab + found <- antibiotics[which(AB_lookup$generalised_name %like% paste0("^", x_spelling)), ]$ab if (length(found) > 0) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } # try if name ends with it - found <- antibiotics[which(antibiotics$name %like% paste0(x_spelling, "$")), ]$ab + found <- antibiotics[which(AB_lookup$generalised_name %like% paste0(x_spelling, "$")), ]$ab if (nchar(x[i]) >= 4 & length(found) > 0) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } # and try if any synonym starts with it - synonym_found <- unlist(lapply(antibiotics$synonyms, - function(s) any(s %like% paste0("^", x_spelling)))) + synonym_found <- unlist(lapply(AB_lookup$generalised_synonyms, + function(s) any(generalise_antibiotic_name(s) %like% paste0("^", x_spelling)))) found <- antibiotics$ab[synonym_found == TRUE] if (length(found) > 0) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) @@ -291,7 +275,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) { } # transform back from other languages and try again - x_translated <- paste(lapply(strsplit(x[i], "[^A-Z0-9 ]"), + x_translated <- paste(lapply(strsplit(x[i], "[^A-Z0-9]"), function(y) { for (i in seq_len(length(y))) { y[i] <- ifelse(tolower(y[i]) %in% tolower(translations_file$replacement), @@ -299,7 +283,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) { !isFALSE(translations_file$fixed)), "pattern"], y[i]) } - y + generalise_antibiotic_name(y) })[[1]], collapse = "/") x_translated_guess <- suppressWarnings(as.ab(x_translated, initial_search = FALSE)) @@ -317,7 +301,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) { y_name, y[i]) } - y + generalise_antibiotic_name(y) })[[1]], collapse = "/") x_translated_guess <- suppressWarnings(as.ab(x_translated, initial_search = FALSE)) @@ -449,9 +433,9 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) { call. = FALSE) } - x_result <- data.frame(x = x_bak_clean, stringsAsFactors = FALSE) %>% - left_join(data.frame(x = x, x_new = x_new, stringsAsFactors = FALSE), by = "x") %>% - pull(x_new) + x_result <- data.frame(x = x_bak_clean, stringsAsFactors = FALSE) %pm>% + pm_left_join(data.frame(x = x, x_new = x_new, stringsAsFactors = FALSE), by = "x") %pm>% + pm_pull(x_new) if (length(x_result) == 0) { x_result <- NA_character_ @@ -538,3 +522,25 @@ c.ab <- function(x, ...) { attributes(y) <- attributes(x) class_integrity_check(y, "antimicrobial code", antibiotics$ab) } + +generalise_antibiotic_name <- function(x) { + x <- toupper(x) + # remove suffices + x <- gsub("_(MIC|RSI|DIS[CK])$", "", x) + # remove disk concentrations, like LVX_NM -> LVX + x <- gsub("_[A-Z]{2}[0-9_.]{0,3}$", "", x) + # remove part between brackets if that's followed by another string + x <- gsub("(.*)+ [(].*[)]", "\\1", x) + # keep only max 1 space + x <- trimws(gsub(" +", " ", x)) + # non-character, space or number should be a slash + x <- gsub("[^A-Z0-9 -]", "/", x) + # spaces around non-characters must be removed: amox + clav -> amox/clav + x <- gsub("(.*[A-Z0-9]) ([^A-Z0-9].*)", "\\1\\2", x) + x <- gsub("(.*[^A-Z0-9]) ([A-Z0-9].*)", "\\1\\2", x) + # remove hyphen after a starting "co" + x <- gsub("^CO-", "CO", x) + # replace operators with a space + x <- gsub("(/| AND | WITH | W/|[+]|[-])+", " ", x) + x +} diff --git a/R/ab_from_text.R b/R/ab_from_text.R index c360c203e..1bc196780 100644 --- a/R/ab_from_text.R +++ b/R/ab_from_text.R @@ -47,7 +47,7 @@ #' With using `collapse`, this function will return a [character]:\cr #' `df %>% mutate(abx = ab_from_text(clinical_text, collapse = "|"))` #' @export -#' @return A [list], or a [character] if `collapse` is not `NULL` +#' @return A [list], or a [character] if `collapse` is not `NULL` #' @inheritSection AMR Read more on our website! #' @examples #' # mind the bad spelling of amoxicillin in this line, @@ -97,7 +97,7 @@ ab_from_text <- function(text, text <- tolower(as.character(text)) text_split_all <- strsplit(text, "[ ;.,:\\|]") - progress <- progress_estimated(n = length(text_split_all), n_min = 5) + progress <- progress_ticker(n = length(text_split_all), n_min = 5) on.exit(close(progress)) if (type %like% "(drug|ab|anti)") { diff --git a/R/ab_property.R b/R/ab_property.R index a7afbea25..61cd0309f 100644 --- a/R/ab_property.R +++ b/R/ab_property.R @@ -38,10 +38,10 @@ #' @rdname ab_property #' @name ab_property #' @return -#' - An [`integer`] in case of [ab_cid()] -#' - A named [`list`] in case of [ab_info()] and multiple [ab_synonyms()]/[ab_tradenames()] -#' - A [`double`] in case of [ab_ddd()] -#' - A [`character`] in all other cases +#' - An [integer] in case of [ab_cid()] +#' - A named [list] in case of [ab_info()] and multiple [ab_synonyms()]/[ab_tradenames()] +#' - A [double] in case of [ab_ddd()] +#' - A [character] in all other cases #' @export #' @seealso [antibiotics] #' @inheritSection AMR Reference data publicly available @@ -231,9 +231,9 @@ ab_validate <- function(x, property, ...) { error = function(e) stop(e$message, call. = FALSE)) x_bak <- x if (!all(x %in% antibiotics[, property])) { - x <- data.frame(ab = as.ab(x, ...), stringsAsFactors = FALSE) %>% - left_join(antibiotics, by = "ab") %>% - pull(property) + x <- data.frame(ab = as.ab(x, ...), stringsAsFactors = FALSE) %pm>% + pm_left_join(antibiotics, by = "ab") %pm>% + pm_pull(property) } if (property == "ab") { return(structure(x, class = property)) diff --git a/R/age.R b/R/age.R index 60191f378..9ec75281f 100755 --- a/R/age.R +++ b/R/age.R @@ -135,7 +135,7 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE) { #' filter(mo == as.mo("E. coli")) %>% #' group_by(age_group = age_groups(age)) %>% #' select(age_group, CIP) %>% -#' ggplot_rsi(x = "age_group") +#' ggplot_rsi(x = "age_group", minimum = 0) #' } age_groups <- function(x, split_at = c(12, 25, 55, 75), na.rm = FALSE) { stop_ifnot(is.numeric(x), "`x` must be numeric, not ", paste0(class(x), collapse = "/")) diff --git a/R/atc_online.R b/R/atc_online.R index fcf178bef..d18487ab9 100644 --- a/R/atc_online.R +++ b/R/atc_online.R @@ -21,12 +21,13 @@ #' Get ATC properties from WHOCC website #' +#' Gets data from the WHO to determine properties of an ATC (e.g. an antibiotic), such as the name, defined daily dose (DDD) or standard unit. #' @inheritSection lifecycle Stable lifecycle -#' @description Gets data from the WHO to determine properties of an ATC (e.g. an antibiotic) like name, defined daily dose (DDD) or standard unit. #' @param atc_code a character or character vector with ATC code(s) of antibiotic(s) #' @param property property of an ATC code. Valid values are `"ATC"`, `"Name"`, `"DDD"`, `"U"` (`"unit"`), `"Adm.R"`, `"Note"` and `groups`. For this last option, all hierarchical groups of an ATC code will be returned, see Examples. #' @param administration type of administration when using `property = "Adm.R"`, see Details -#' @param url url of website of the WHO. The sign `%s` can be used as a placeholder for ATC codes. +#' @param url url of website of the WHOCC. The sign `%s` can be used as a placeholder for ATC codes. +#' @param url_vet url of website of the WHOCC for veterinary medicine. The sign `%s` can be used as a placeholder for ATC_vet codes (that all start with "Q"). #' @param ... parameters to pass on to `atc_property` #' @details #' Options for parameter `administration`: @@ -74,7 +75,8 @@ atc_online_property <- function(atc_code, property, administration = "O", - url = "https://www.whocc.no/atc_ddd_index/?code=%s&showdescription=no") { + url = "https://www.whocc.no/atc_ddd_index/?code=%s&showdescription=no", + url_vet = "https://www.whocc.no/atcvet/atcvet_index/?code=%s&showdescription=no") { has_internet <- import_fn("has_internet", "curl") html_attr <- import_fn("html_attr", "rvest") @@ -122,25 +124,31 @@ atc_online_property <- function(atc_code, returnvalue <- rep(NA_character_, length(atc_code)) } - progress <- progress_estimated(n = length(atc_code), 3) + progress <- progress_ticker(n = length(atc_code), 3) on.exit(close(progress)) for (i in seq_len(length(atc_code))) { progress$tick() - - atc_url <- sub("%s", atc_code[i], url, fixed = TRUE) + + if (atc_code[i] %like% "^Q") { + # veterinary drugs, ATC_vet codes start with a "Q" + atc_url <- url_vet + } else { + atc_url <- url + } + atc_url <- sub("%s", atc_code[i], atc_url, fixed = TRUE) if (property == "groups") { - tbl <- read_html(atc_url) %>% - html_node("#content") %>% - html_children() %>% + tbl <- read_html(atc_url) %pm>% + html_node("#content") %pm>% + html_children() %pm>% html_node("a") # get URLS of items - hrefs <- tbl %>% html_attr("href") + hrefs <- tbl %pm>% html_attr("href") # get text of items - texts <- tbl %>% html_text() + texts <- tbl %pm>% html_text() # select only text items where URL like "code=" texts <- texts[grepl("?code=", tolower(hrefs), fixed = TRUE)] # last one is antibiotics, skip it @@ -148,9 +156,9 @@ atc_online_property <- function(atc_code, returnvalue <- c(list(texts), returnvalue) } else { - tbl <- read_html(atc_url) %>% - html_nodes("table") %>% - html_table(header = TRUE) %>% + tbl <- read_html(atc_url) %pm>% + html_nodes("table") %pm>% + html_table(header = TRUE) %pm>% as.data.frame(stringsAsFactors = FALSE) # case insensitive column names diff --git a/R/availability.R b/R/availability.R index 0430e3083..2baed04b4 100644 --- a/R/availability.R +++ b/R/availability.R @@ -23,10 +23,10 @@ #' #' Easy check for data availability of all columns in a data set. This makes it easy to get an idea of which antimicrobial combinations can be used for calculation with e.g. [susceptibility()] and [resistance()]. #' @inheritSection lifecycle Stable lifecycle -#' @param tbl a [`data.frame`] or [`list`] +#' @param tbl a [data.frame] or [list] #' @param width number of characters to present the visual availability, defaults to filling the width of the console -#' @details The function returns a [`data.frame`] with columns `"resistant"` and `"visual_resistance"`. The values in that columns are calculated with [resistance()]. -#' @return [`data.frame`] with column names of `tbl` as row names +#' @details The function returns a [data.frame] with columns `"resistant"` and `"visual_resistance"`. The values in that columns are calculated with [resistance()]. +#' @return [data.frame] with column names of `tbl` as row names #' @inheritSection AMR Read more on our website! #' @export #' @examples diff --git a/R/bug_drug_combinations.R b/R/bug_drug_combinations.R index 672d4b6b6..7feee2c8d 100644 --- a/R/bug_drug_combinations.R +++ b/R/bug_drug_combinations.R @@ -35,7 +35,7 @@ #' @details The function [format()] calculates the resistance per bug-drug combination. Use `combine_IR = FALSE` (default) to test R vs. S+I and `combine_IR = TRUE` to test R+I vs. S. #' @export #' @rdname bug_drug_combinations -#' @return The function [bug_drug_combinations()] returns a [`data.frame`] with columns "mo", "ab", "S", "I", "R" and "total". +#' @return The function [bug_drug_combinations()] returns a [data.frame] with columns "mo", "ab", "S", "I", "R" and "total". #' @source \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition}, 2014, *Clinical and Laboratory Standards Institute (CLSI)*. . #' @inheritSection AMR Read more on our website! #' @examples @@ -160,32 +160,33 @@ format.bug_drug_combinations <- function(x, .data } - y <- x %>% + y <- x %pm>% create_var(ab = as.ab(x$ab), - ab_txt = give_ab_name(ab = x$ab, format = translate_ab, language = language)) %>% - group_by(ab, ab_txt, mo) %>% - summarise(isolates = sum(isolates, na.rm = TRUE), - total = sum(total, na.rm = TRUE)) %>% - ungroup() + ab_txt = give_ab_name(ab = x$ab, format = translate_ab, language = language)) %pm>% + pm_group_by(ab, ab_txt, mo) %pm>% + pm_summarise(isolates = sum(isolates, na.rm = TRUE), + total = sum(total, na.rm = TRUE)) %pm>% + pm_ungroup() - y <- y %>% + y <- y %pm>% create_var(txt = paste0(percentage(y$isolates / y$total, decimal.mark = decimal.mark, big.mark = big.mark), " (", trimws(format(y$isolates, big.mark = big.mark)), "/", - trimws(format(y$total, big.mark = big.mark)), ")")) %>% - select(ab, ab_txt, mo, txt) %>% - arrange(mo) + trimws(format(y$total, big.mark = big.mark)), ")")) %pm>% + pm_select(ab, ab_txt, mo, txt) %pm>% + pm_arrange(mo) # replace tidyr::pivot_wider() from here for (i in unique(y$mo)) { mo_group <- y[which(y$mo == i), c("ab", "txt")] colnames(mo_group) <- c("ab", i) rownames(mo_group) <- NULL - y <- y %>% - left_join(mo_group, by = "ab") + y <- y %pm>% + pm_left_join(mo_group, by = "ab") } - y <- y %>% - distinct(ab, .keep_all = TRUE) %>% - select(-mo, -txt) %>% + y <<- y + y <- y %pm>% + pm_distinct(ab, .keep_all = TRUE) %pm>% + pm_select(-mo, -txt) %pm>% # replace tidyr::pivot_wider() until here remove_NAs() @@ -193,21 +194,22 @@ format.bug_drug_combinations <- function(x, .data[, c("ab_group", "ab_txt", colnames(.data)[!colnames(.data) %in% c("ab_group", "ab_txt", "ab")])] } - y <- y %>% - create_var(ab_group = ab_group(y$ab, language = language)) %>% - select_ab_vars() %>% - arrange(ab_group, ab_txt) - y <- y %>% - create_var(ab_group = ifelse(y$ab_group != lag(y$ab_group) | is.na(lag(y$ab_group)), y$ab_group, "")) + y <- y %pm>% + create_var(ab_group = ab_group(y$ab, language = language)) %pm>% + select_ab_vars() %pm>% + pm_arrange(ab_group, ab_txt) + y <- y %pm>% + create_var(ab_group = ifelse(y$ab_group != lag(y$ab_group) | is.na(pm_lag(y$ab_group)), y$ab_group, "")) if (add_ab_group == FALSE) { - y <- y %>% - select(-ab_group) %>% - rename("Drug" = ab_txt) + y <- y %pm>% + pm_select(-ab_group) %pm>% + pm_rename("Drug" = ab_txt) colnames(y)[1] <- translate_AMR(colnames(y)[1], language = get_locale(), only_unknown = FALSE) } else { - y <- y %>% rename("Group" = ab_group, - "Drug" = ab_txt) + y <- y %pm>% + pm_rename("Group" = ab_group, + "Drug" = ab_txt) colnames(y)[1:2] <- translate_AMR(colnames(y)[1:2], language = get_locale(), only_unknown = FALSE) } diff --git a/R/catalogue_of_life.R b/R/catalogue_of_life.R index 100b5af54..602ce30b6 100755 --- a/R/catalogue_of_life.R +++ b/R/catalogue_of_life.R @@ -80,7 +80,7 @@ NULL #' This function returns information about the included data from the Catalogue of Life. #' @seealso [microorganisms] #' @details For DSMZ, see [microorganisms]. -#' @return a [`list`], which prints in pretty format +#' @return a [list], which prints in pretty format #' @inheritSection catalogue_of_life Catalogue of Life #' @inheritSection AMR Read more on our website! #' @export @@ -92,12 +92,12 @@ catalogue_of_life_version <- function() { lst <- list(catalogue_of_life = list(version = gsub("{year}", catalogue_of_life$year, catalogue_of_life$version, fixed = TRUE), url = gsub("{year}", catalogue_of_life$year, catalogue_of_life$url_CoL, fixed = TRUE), - n = nrow(filter(microorganisms, source == "CoL"))), + n = nrow(pm_filter(microorganisms, source == "CoL"))), deutsche_sammlung_von_mikroorganismen_und_zellkulturen = list(version = "Prokaryotic Nomenclature Up-to-Date from DSMZ", url = catalogue_of_life$url_DSMZ, yearmonth = catalogue_of_life$yearmonth_DSMZ, - n = nrow(filter(microorganisms, source == "DSMZ"))), + n = nrow(pm_filter(microorganisms, source == "DSMZ"))), total_included = list( n_total_species = nrow(microorganisms), diff --git a/R/count.R b/R/count.R index 068c825b6..967dedeb8 100755 --- a/R/count.R +++ b/R/count.R @@ -37,7 +37,7 @@ #' The function [count_df()] takes any variable from `data` that has an [`rsi`] class (created with [as.rsi()]) and counts the number of S's, I's and R's. It also supports grouped variables. The function [rsi_df()] works exactly like [count_df()], but adds the percentage of S, I and R. #' @inheritSection proportion Combination therapy #' @seealso [`proportion_*`][proportion] to calculate microbial resistance and susceptibility. -#' @return An [`integer`] +#' @return An [integer] #' @rdname count #' @name count #' @export diff --git a/R/data.R b/R/data.R index ba40b4395..b2080bff8 100755 --- a/R/data.R +++ b/R/data.R @@ -23,7 +23,7 @@ #' #' Two data sets containing all antibiotics/antimycotics and antivirals. Use [as.ab()] or one of the [ab_property()] functions to retrieve values from the [antibiotics] data set. Three identifiers are included in this data set: an antibiotic ID (`ab`, primarily used in this package) as defined by WHONET/EARS-Net, an ATC code (`atc`) as defined by the WHO, and a Compound ID (`cid`) as found in PubChem. Other properties in this data set are derived from one or more of these codes. #' @format -#' ### For the [antibiotics] data set: a [`data.frame`] with `r nrow(antibiotics)` observations and `r ncol(antibiotics)` variables: +#' ### For the [antibiotics] data set: a [data.frame] with `r nrow(antibiotics)` observations and `r ncol(antibiotics)` variables: #' - `ab`\cr Antibiotic ID as used in this package (like `AMC`), using the official EARS-Net (European Antimicrobial Resistance Surveillance Network) codes where available #' - `atc`\cr ATC code (Anatomical Therapeutic Chemical) as defined by the WHOCC, like `J01CR02` #' - `cid`\cr Compound ID as found in PubChem @@ -39,7 +39,7 @@ #' - `iv_units`\cr Units of `iv_ddd` #' - `loinc`\cr All LOINC codes (Logical Observation Identifiers Names and Codes) associated with the name of the antimicrobial agent. Use [ab_loinc()] to retrieve them quickly, see [ab_property()]. #' -#' ### For the [antivirals] data set: a [`data.frame`] with `r nrow(antivirals)` observations and `r ncol(antivirals)` variables: +#' ### For the [antivirals] data set: a [data.frame] with `r nrow(antivirals)` observations and `r ncol(antivirals)` variables: #' - `atc`\cr ATC code (Anatomical Therapeutic Chemical) as defined by the WHOCC #' - `cid`\cr Compound ID as found in PubChem #' - `name`\cr Official name as used by WHONET/EARS-Net or the WHO @@ -81,7 +81,7 @@ #' #' A data set containing the microbial taxonomy of six kingdoms from the Catalogue of Life. MO codes can be looked up using [as.mo()]. #' @inheritSection catalogue_of_life Catalogue of Life -#' @format A [`data.frame`] with `r format(nrow(microorganisms), big.mark = ",")` observations and `r ncol(microorganisms)` variables: +#' @format A [data.frame] with `r format(nrow(microorganisms), big.mark = ",")` observations and `r ncol(microorganisms)` variables: #' - `mo`\cr ID of microorganism as used by this package #' - `fullname`\cr Full name, like `"Escherichia coli"` #' - `kingdom`, `phylum`, `class`, `order`, `family`, `genus`, `species`, `subspecies`\cr Taxonomic rank of the microorganism @@ -99,7 +99,7 @@ #' - 1 entry of *Blastocystis* (*Blastocystis hominis*), although it officially does not exist (Noel *et al.* 2005, PMID 15634993) #' - 5 other 'undefined' entries (unknown, unknown Gram negatives, unknown Gram positives, unknown yeast and unknown fungus) #' - 6 families under the Enterobacterales order, according to Adeolu *et al.* (2016, PMID 27620848), that are not (yet) in the Catalogue of Life -#' - `r format(nrow(filter(microorganisms, source == "DSMZ")), big.mark = ",")` species from the DSMZ (Deutsche Sammlung von Mikroorganismen und Zellkulturen) since the DSMZ contain the latest taxonomic information based on recent publications +#' - `r format(nrow(subset(microorganisms, source == "DSMZ")), big.mark = ",")` species from the DSMZ (Deutsche Sammlung von Mikroorganismen und Zellkulturen) since the DSMZ contain the latest taxonomic information based on recent publications #' #' ### Direct download #' This data set is available as 'flat file' for use even without R - you can find the file here: @@ -136,7 +136,7 @@ catalogue_of_life <- list( #' #' A data set containing old (previously valid or accepted) taxonomic names according to the Catalogue of Life. This data set is used internally by [as.mo()]. #' @inheritSection catalogue_of_life Catalogue of Life -#' @format A [`data.frame`] with `r format(nrow(microorganisms.old), big.mark = ",")` observations and `r ncol(microorganisms.old)` variables: +#' @format A [data.frame] with `r format(nrow(microorganisms.old), big.mark = ",")` observations and `r ncol(microorganisms.old)` variables: #' - `fullname`\cr Old full taxonomic name of the microorganism #' - `fullname_new`\cr New full taxonomic name of the microorganism #' - `ref`\cr Author(s) and year of concerning scientific publication @@ -152,7 +152,7 @@ catalogue_of_life <- list( #' Data set with `r format(nrow(microorganisms.codes), big.mark = ",")` common microorganism codes #' #' A data set containing commonly used codes for microorganisms, from laboratory systems and WHONET. Define your own with [set_mo_source()]. They will all be searched when using [as.mo()] and consequently all the [`mo_*`][mo_property()] functions. -#' @format A [`data.frame`] with `r format(nrow(microorganisms.codes), big.mark = ",")` observations and `r ncol(microorganisms.codes)` variables: +#' @format A [data.frame] with `r format(nrow(microorganisms.codes), big.mark = ",")` observations and `r ncol(microorganisms.codes)` variables: #' - `code`\cr Commonly used code of a microorganism #' - `mo`\cr ID of the microorganism in the [microorganisms] data set #' @inheritSection AMR Reference data publicly available @@ -164,7 +164,7 @@ catalogue_of_life <- list( #' Data set with `r format(nrow(example_isolates), big.mark = ",")` example isolates #' #' A data set containing `r format(nrow(example_isolates), big.mark = ",")` microbial isolates with their full antibiograms. The data set reflects reality and can be used to practice AMR analysis. For examples, please read [the tutorial on our website](https://msberends.github.io/AMR/articles/AMR.html). -#' @format A [`data.frame`] with `r format(nrow(example_isolates), big.mark = ",")` observations and `r ncol(example_isolates)` variables: +#' @format A [data.frame] with `r format(nrow(example_isolates), big.mark = ",")` observations and `r ncol(example_isolates)` variables: #' - `date`\cr date of receipt at the laboratory #' - `hospital_id`\cr ID of the hospital, from A to D #' - `ward_icu`\cr logical to determine if ward is an intensive care unit @@ -182,7 +182,7 @@ catalogue_of_life <- list( #' Data set with unclean data #' #' A data set containing `r format(nrow(example_isolates_unclean), big.mark = ",")` microbial isolates that are not cleaned up and consequently not ready for AMR analysis. This data set can be used for practice. -#' @format A [`data.frame`] with `r format(nrow(example_isolates_unclean), big.mark = ",")` observations and `r ncol(example_isolates_unclean)` variables: +#' @format A [data.frame] with `r format(nrow(example_isolates_unclean), big.mark = ",")` observations and `r ncol(example_isolates_unclean)` variables: #' - `patient_id`\cr ID of the patient #' - `date`\cr date of receipt at the laboratory #' - `hospital`\cr ID of the hospital, from A to C @@ -195,7 +195,7 @@ catalogue_of_life <- list( #' Data set with `r format(nrow(WHONET), big.mark = ",")` isolates - WHONET example #' #' This example data set has the exact same structure as an export file from WHONET. Such files can be used with this package, as this example data set shows. The antibiotic results are from our [example_isolates] data set. All patient names are created using online surname generators and are only in place for practice purposes. -#' @format A [`data.frame`] with `r format(nrow(WHONET), big.mark = ",")` observations and `r ncol(WHONET)` variables: +#' @format A [data.frame] with `r format(nrow(WHONET), big.mark = ",")` observations and `r ncol(WHONET)` variables: #' - `Identification number`\cr ID of the sample #' - `Specimen number`\cr ID of the specimen #' - `Organism`\cr Name of the microorganism. Before analysis, you should transform this to a valid microbial class, using [as.mo()]. @@ -229,7 +229,7 @@ catalogue_of_life <- list( #' Data set for R/SI interpretation #' #' Data set to interpret MIC and disk diffusion to R/SI values. Included guidelines are CLSI (`r min(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "CLSI")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "CLSI")$guideline)))`) and EUCAST (`r min(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "EUCAST")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "EUCAST")$guideline)))`). Use [as.rsi()] to transform MICs or disks measurements to R/SI values. -#' @format A [`data.frame`] with `r format(nrow(rsi_translation), big.mark = ",")` observations and `r ncol(rsi_translation)` variables: +#' @format A [data.frame] with `r format(nrow(rsi_translation), big.mark = ",")` observations and `r ncol(rsi_translation)` variables: #' - `guideline`\cr Name of the guideline #' - `method`\cr Either "MIC" or "DISK" #' - `site`\cr Body site, e.g. "Oral" or "Respiratory" @@ -249,7 +249,7 @@ catalogue_of_life <- list( #' Data set with bacterial intrinsic resistance #' #' Data set containing defined intrinsic resistance by EUCAST of all bug-drug combinations. -#' @format A [`data.frame`] with `r format(nrow(intrinsic_resistant), big.mark = ",")` observations and `r ncol(intrinsic_resistant)` variables: +#' @format A [data.frame] with `r format(nrow(intrinsic_resistant), big.mark = ",")` observations and `r ncol(intrinsic_resistant)` variables: #' - `microorganism`\cr Name of the microorganism #' - `antibiotic`\cr Name of the antibiotic drug #' @details The repository of this `AMR` package contains a file comprising this exact data set: . This file **allows for machine reading EUCAST guidelines about intrinsic resistance**, which is almost impossible with the Excel and PDF files distributed by EUCAST. The file is updated automatically. diff --git a/R/disk.R b/R/disk.R index 54d67fcdd..143e2f48d 100644 --- a/R/disk.R +++ b/R/disk.R @@ -27,7 +27,7 @@ #' @param x vector #' @param na.rm a logical indicating whether missing values should be removed #' @details Interpret disk values as RSI values with [as.rsi()]. It supports guidelines from EUCAST and CLSI. -#' @return An [`integer`] with additional new class [`disk`] +#' @return An [integer] with additional class [`disk`] #' @aliases disk #' @export #' @seealso [as.rsi()] @@ -54,7 +54,7 @@ #' } as.disk <- function(x, na.rm = FALSE) { if (!is.disk(x)) { - x <- x %>% unlist() + x <- x %pm>% unlist() if (na.rm == TRUE) { x <- x[!is.na(x)] } @@ -89,8 +89,8 @@ as.disk <- function(x, na.rm = FALSE) { na_after <- length(x[is.na(x)]) if (na_before != na_after) { - list_missing <- x.bak[is.na(x) & !is.na(x.bak)] %>% - unique() %>% + list_missing <- x.bak[is.na(x) & !is.na(x.bak)] %pm>% + unique() %pm>% sort() list_missing <- paste0('"', list_missing, '"', collapse = ", ") warning(na_after - na_before, " results truncated (", diff --git a/R/eucast_rules.R b/R/eucast_rules.R index cf1db6376..baadef37e 100755 --- a/R/eucast_rules.R +++ b/R/eucast_rules.R @@ -136,7 +136,7 @@ EUCAST_VERSION_EXPERT_RULES <- "3.1, 2016" #' @aliases EUCAST #' @rdname eucast_rules #' @export -#' @return The input of `x`, possibly with edited values of antibiotics. Or, if `verbose = TRUE`, a [`data.frame`] with all original and new values of the affected bug-drug combinations. +#' @return The input of `x`, possibly with edited values of antibiotics. Or, if `verbose = TRUE`, a [data.frame] with all original and new values of the affected bug-drug combinations. #' @source #' - EUCAST Expert Rules. Version 2.0, 2012. \cr #' Leclercq et al. **EUCAST expert rules in antimicrobial susceptibility testing.** *Clin Microbiol Infect.* 2013;19(2):141-60. \cr @@ -442,8 +442,8 @@ eucast_rules <- function(x, warning = function(w) { if (w$message %like% "invalid factor level") { xyz <- sapply(cols, function(col) { - x_original[, col] <<- factor(x = as.character(pull(x_original, col)), levels = c(to, levels(pull(x_original, col)))) - x[, col] <<- factor(x = as.character(pull(x, col)), levels = c(to, levels(pull(x, col)))) + x_original[, col] <<- factor(x = as.character(pm_pull(x_original, col)), levels = c(to, levels(pm_pull(x_original, col)))) + x[, col] <<- factor(x = as.character(pm_pull(x, col)), levels = c(to, levels(pm_pull(x, col)))) invisible() }) x_original[rows, cols] <<- to @@ -492,12 +492,12 @@ eucast_rules <- function(x, rule_name = font_stripstyle(rule[3]), stringsAsFactors = FALSE) colnames(verbose_new) <- c("row", "col", "mo_fullname", "old", "new", "rule", "rule_group", "rule_name") - verbose_new <- verbose_new %>% filter(old != new | is.na(old)) + verbose_new <- verbose_new %pm>% pm_filter(old != new | is.na(old)) # save changes to data set 'verbose_info' verbose_info <<- rbind(verbose_info, verbose_new) # count adds and changes - track_changes$added <- track_changes$added + verbose_new %>% filter(is.na(old)) %>% nrow() - track_changes$changed <- track_changes$changed + verbose_new %>% filter(!is.na(old)) %>% nrow() + track_changes$added <- track_changes$added + verbose_new %pm>% pm_filter(is.na(old)) %pm>% nrow() + track_changes$changed <- track_changes$changed + verbose_new %pm>% pm_filter(!is.na(old)) %pm>% nrow() } # after the applied changes: return list with counts of added and changed return(track_changes) @@ -520,13 +520,13 @@ eucast_rules <- function(x, # save original table, with the new .rowid column x_original.bak <- x # keep only unique rows for MO and ABx - x <- x %>% distinct(`.rowid`, .keep_all = TRUE) + x <- x %pm>% pm_distinct(`.rowid`, .keep_all = TRUE) x_original <- x # join to microorganisms data set x <- as.data.frame(x, stringsAsFactors = FALSE) x[, col_mo] <- as.mo(x[, col_mo, drop = TRUE]) - x <- x %>% + x <- x %pm>% left_join_microorganisms(by = col_mo, suffix = c("_oldcols", "")) x$gramstain <- mo_gramstain(x[, col_mo, drop = TRUE], language = NULL) x$genus_species <- paste(x$genus, x$species) @@ -568,12 +568,12 @@ eucast_rules <- function(x, y[y != "" & y %in% colnames(df)] } get_antibiotic_names <- function(x) { - x <- x %>% - strsplit(",") %>% - unlist() %>% - trimws() %>% - sapply(function(x) if (x %in% antibiotics$ab) ab_name(x, language = NULL, tolower = TRUE) else x) %>% - sort() %>% + x <- x %pm>% + strsplit(",") %pm>% + unlist() %pm>% + trimws() %pm>% + sapply(function(x) if (x %in% antibiotics$ab) ab_name(x, language = NULL, tolower = TRUE) else x) %pm>% + sort() %pm>% paste(collapse = ", ") x <- gsub("_", " ", x, fixed = TRUE) x <- gsub("except CAZ", paste("except", ab_name("CAZ", language = NULL, tolower = TRUE)), x, fixed = TRUE) @@ -856,17 +856,17 @@ eucast_rules <- function(x, wouldve <- "" } - verbose_info <- verbose_info %>% - arrange(row, rule_group, rule_name, col) + verbose_info <- verbose_info %pm>% + pm_arrange(row, rule_group, rule_name, col) cat(paste0("\n", font_grey(strrep("-", options()$width - 1)), "\n")) cat(font_bold(paste("The rules", paste0(wouldve, "affected"), - formatnr(n_distinct(verbose_info$row)), + formatnr(pm_n_distinct(verbose_info$row)), "out of", formatnr(nrow(x_original)), "rows, making a total of", formatnr(nrow(verbose_info)), "edits\n"))) - n_added <- verbose_info %>% filter(is.na(old)) %>% nrow() - n_changed <- verbose_info %>% filter(!is.na(old)) %>% nrow() + n_added <- verbose_info %pm>% pm_filter(is.na(old)) %pm>% nrow() + n_changed <- verbose_info %pm>% pm_filter(!is.na(old)) %pm>% nrow() # print added values ---- if (n_added == 0) { @@ -875,15 +875,15 @@ eucast_rules <- function(x, colour <- font_green # is function } cat(colour(paste0("=> ", wouldve, "added ", - font_bold(formatnr(verbose_info %>% - filter(is.na(old)) %>% + font_bold(formatnr(verbose_info %pm>% + pm_filter(is.na(old)) %pm>% nrow()), "test results"), "\n"))) if (n_added > 0) { - added_summary <- verbose_info %>% - filter(is.na(old)) %>% - group_by(new) %>% - summarise(n = n()) + added_summary <- verbose_info %pm>% + pm_filter(is.na(old)) %pm>% + pm_group_by(new) %pm>% + pm_summarise(n = pm_n()) cat(paste(" -", paste0(formatnr(added_summary$n), " test result", ifelse(added_summary$n > 1, "s", ""), " added as ", added_summary$new), collapse = "\n")) @@ -899,15 +899,15 @@ eucast_rules <- function(x, cat("\n") } cat(colour(paste0("=> ", wouldve, "changed ", - font_bold(formatnr(verbose_info %>% - filter(!is.na(old)) %>% + font_bold(formatnr(verbose_info %pm>% + pm_filter(!is.na(old)) %pm>% nrow()), "test results"), "\n"))) if (n_changed > 0) { - changed_summary <- verbose_info %>% - filter(!is.na(old)) %>% - group_by(old, new) %>% - summarise(n = n()) + changed_summary <- verbose_info %pm>% + pm_filter(!is.na(old)) %pm>% + pm_group_by(old, new) %pm>% + pm_summarise(n = pm_n()) cat(paste(" -", paste0(formatnr(changed_summary$n), " test result", ifelse(changed_summary$n > 1, "s", ""), " changed from ", changed_summary$old, " to ", changed_summary$new), collapse = "\n")) @@ -936,8 +936,8 @@ eucast_rules <- function(x, # reset original attributes x_original <- x_original[, c(col_mo, cols_ab, ".rowid"), drop = FALSE] x_original.bak <- x_original.bak[, setdiff(colnames(x_original.bak), c(col_mo, cols_ab)), drop = FALSE] - x_original.bak <- x_original.bak %>% - left_join(x_original, by = ".rowid") + x_original.bak <- x_original.bak %pm>% + pm_left_join(x_original, by = ".rowid") x_original.bak <- x_original.bak[, old_cols, drop = FALSE] attributes(x_original.bak) <- old_attributes x_original.bak diff --git a/R/filter_ab_class.R b/R/filter_ab_class.R index 2249697e3..11abcc1d1 100644 --- a/R/filter_ab_class.R +++ b/R/filter_ab_class.R @@ -336,14 +336,14 @@ find_ab_group <- function(ab_class) { "macrolide", "tetracycline"), paste0(ab_class, "s"), - antibiotics %>% + antibiotics %pm>% subset(group %like% ab_class | atc_group1 %like% ab_class | - atc_group2 %like% ab_class) %>% - pull(group) %>% - unique() %>% - tolower() %>% - sort() %>% + atc_group2 %like% ab_class) %pm>% + pm_pull(group) %pm>% + unique() %pm>% + tolower() %pm>% + sort() %pm>% paste(collapse = "/") ) } diff --git a/R/first_isolate.R b/R/first_isolate.R index 4403ee20d..ae1ff5961 100755 --- a/R/first_isolate.R +++ b/R/first_isolate.R @@ -23,7 +23,7 @@ #' #' Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type. #' @inheritSection lifecycle Stable lifecycle -#' @param x a [`data.frame`] containing isolates. +#' @param x a [data.frame] containing isolates. #' @param col_date column name of the result date (or date that is was received on the lab), defaults to the first column of with a date class #' @param col_patient_id column name of the unique IDs of the patients, defaults to the first column that starts with 'patient' or 'patid' (case insensitive) #' @param col_mo column name of the IDs of the microorganisms (see [as.mo()]), defaults to the first column of class [`mo`]. Values will be coerced using [as.mo()]. @@ -147,7 +147,7 @@ first_isolate <- function(x, dots <- unlist(list(...)) if (length(dots) != 0) { # backwards compatibility with old parameters - dots.names <- dots %>% names() + dots.names <- dots %pm>% names() if ("filter_specimen" %in% dots.names) { specimen_group <- dots[which(dots.names == "filter_specimen")] } @@ -269,16 +269,16 @@ first_isolate <- function(x, row.end <- nrow(x) } else { # filtering on specimen and only analyse these rows to save time - x <- x[order(pull(x, col_specimen), + x <- x[order(pm_pull(x, col_specimen), x$newvar_patient_id, x$newvar_genus_species, x$newvar_date), ] rownames(x) <- NULL suppressWarnings( - row.start <- which(x %>% pull(col_specimen) == specimen_group) %>% min(na.rm = TRUE) + row.start <- which(x %pm>% pm_pull(col_specimen) == specimen_group) %pm>% min(na.rm = TRUE) ) suppressWarnings( - row.end <- which(x %>% pull(col_specimen) == specimen_group) %>% max(na.rm = TRUE) + row.end <- which(x %pm>% pm_pull(col_specimen) == specimen_group) %pm>% max(na.rm = TRUE) ) } @@ -319,8 +319,8 @@ first_isolate <- function(x, } # Analysis of first isolate ---- - x$other_pat_or_mo <- ifelse(x$newvar_patient_id == lag(x$newvar_patient_id) & - x$newvar_genus_species == lag(x$newvar_genus_species), + x$other_pat_or_mo <- ifelse(x$newvar_patient_id == pm_lag(x$newvar_patient_id) & + x$newvar_genus_species == pm_lag(x$newvar_genus_species), FALSE, TRUE) x$episode_group <- paste(x$newvar_patient_id, x$newvar_genus_species) @@ -349,13 +349,13 @@ first_isolate <- function(x, type_param <- type x$other_key_ab <- !key_antibiotics_equal(y = x$newvar_key_ab, - z = lag(x$newvar_key_ab), + z = pm_lag(x$newvar_key_ab), type = type_param, ignore_I = ignore_I, points_threshold = points_threshold, info = info) # with key antibiotics - x$newvar_first_isolate <- if_else(x$newvar_row_index_sorted >= row.start & + x$newvar_first_isolate <- pm_if_else(x$newvar_row_index_sorted >= row.start & x$newvar_row_index_sorted <= row.end & x$newvar_genus_species != "" & (x$other_pat_or_mo | x$more_than_episode_ago | x$other_key_ab), @@ -364,7 +364,7 @@ first_isolate <- function(x, } else { # no key antibiotics - x$newvar_first_isolate <- if_else(x$newvar_row_index_sorted >= row.start & + x$newvar_first_isolate <- pm_if_else(x$newvar_row_index_sorted >= row.start & x$newvar_row_index_sorted <= row.end & x$newvar_genus_species != "" & (x$other_pat_or_mo | x$more_than_episode_ago), @@ -413,8 +413,14 @@ first_isolate <- function(x, if (info == TRUE) { n_found <- sum(x$newvar_first_isolate, na.rm = TRUE) - p_found_total <- percentage(n_found / nrow(x[which(!is.na(x$newvar_mo)), , drop = FALSE])) - p_found_scope <- percentage(n_found / scope.size) + p_found_total <- percentage(n_found / nrow(x[which(!is.na(x$newvar_mo)), , drop = FALSE]), digits = 1) + p_found_scope <- percentage(n_found / scope.size, digits = 1) + if (!p_found_total %like% "[.]") { + p_found_total <- gsub("%", ".0%", p_found_total, fixed = TRUE) + } + if (!p_found_scope %like% "[.]") { + p_found_scope <- gsub("%", ".0%", p_found_scope, fixed = TRUE) + } # mark up number of found n_found <- format(n_found, big.mark = big.mark, decimal.mark = decimal.mark) if (p_found_total != p_found_scope) { diff --git a/R/ggplot_rsi.R b/R/ggplot_rsi.R index dcf4e0725..044c23c99 100755 --- a/R/ggplot_rsi.R +++ b/R/ggplot_rsi.R @@ -23,7 +23,7 @@ #' #' Use these functions to create bar plots for antimicrobial resistance analysis. All functions rely on [ggplot2][ggplot2::ggplot()] functions. #' @inheritSection lifecycle Maturing lifecycle -#' @param data a [`data.frame`] with column(s) of class [`rsi`] (see [as.rsi()]) +#' @param data a [data.frame] with column(s) of class [`rsi`] (see [as.rsi()]) #' @param position position adjustment of bars, either `"fill"`, `"stack"` or `"dodge"` #' @param x variable to show on x axis, either `"antibiotic"` (default) or `"interpretation"` or a grouping variable #' @param fill variable to categorise using the plots legend, either `"antibiotic"` (default) or `"interpretation"` or a grouping variable @@ -147,6 +147,7 @@ ggplot_rsi <- function(data, translate_ab = "name", combine_SI = TRUE, combine_IR = FALSE, + minimum = 30, language = get_locale(), nrow = NULL, colours = c(S = "#61a8ff", @@ -194,6 +195,7 @@ ggplot_rsi <- function(data, p <- ggplot2::ggplot(data = data) + geom_rsi(position = position, x = x, fill = fill, translate_ab = translate_ab, + minimum = minimum, language = language, combine_SI = combine_SI, combine_IR = combine_IR, ...) + theme_rsi() @@ -215,6 +217,8 @@ ggplot_rsi <- function(data, p <- p + labels_rsi_count(position = position, x = x, translate_ab = translate_ab, + minimum = minimum, + language = language, combine_SI = combine_SI, combine_IR = combine_IR, datalabels.size = datalabels.size, @@ -240,13 +244,14 @@ geom_rsi <- function(position = NULL, x = c("antibiotic", "interpretation"), fill = "interpretation", translate_ab = "name", + minimum = 30, language = get_locale(), combine_SI = TRUE, combine_IR = FALSE, ...) { stop_ifnot_installed("ggplot2") - stop_if(is.data.frame(position), "`position` is invalid. Did you accidentally use '%>%' instead of '+'?") + stop_if(is.data.frame(position), "`position` is invalid. Did you accidentally use '%pm>%' instead of '+'?") y <- "value" if (missing(position) | is.null(position)) { @@ -280,6 +285,7 @@ geom_rsi <- function(position = NULL, rsi_df(data = x, translate_ab = translate_ab, language = language, + minimum = minimum, combine_SI = combine_SI, combine_IR = combine_IR) }) @@ -365,6 +371,8 @@ theme_rsi <- function() { labels_rsi_count <- function(position = NULL, x = "antibiotic", translate_ab = "name", + minimum = 30, + language = get_locale(), combine_SI = TRUE, combine_IR = FALSE, datalabels.size = 3, @@ -389,12 +397,14 @@ labels_rsi_count <- function(position = NULL, transformed <- rsi_df(data = x, translate_ab = translate_ab, combine_SI = combine_SI, - combine_IR = combine_IR) + combine_IR = combine_IR, + minimum = minimum, + language = language) transformed$gr <- transformed[, x_name, drop = TRUE] - transformed %>% - group_by(gr) %>% - mutate(lbl = paste0("n=", isolates)) %>% - ungroup() %>% - select(-gr) + transformed %pm>% + pm_group_by(gr) %pm>% + pm_mutate(lbl = paste0("n=", isolates)) %pm>% + pm_ungroup() %pm>% + pm_select(-gr) }) } diff --git a/R/guess_ab_col.R b/R/guess_ab_col.R index 55256e2d5..0efa35ef0 100755 --- a/R/guess_ab_col.R +++ b/R/guess_ab_col.R @@ -23,7 +23,7 @@ #' #' This tries to find a column name in a data set based on information from the [antibiotics] data set. Also supports WHONET abbreviations. #' @inheritSection lifecycle Maturing lifecycle -#' @param x a [`data.frame`] +#' @param x a [data.frame] #' @param search_string a text to search `x` for, will be checked with [as.ab()] if this value is not a column in `x` #' @param verbose a logical to indicate whether additional info should be printed #' @details You can look for an antibiotic (trade) name or abbreviation and it will search `x` and the [antibiotics] data set for any column containing a name or code of that antibiotic. **Longer columns names take precendence over shorter column names.** @@ -82,7 +82,7 @@ guess_ab_col <- function(x = NULL, search_string = NULL, verbose = FALSE) { } else { # sort colnames on length - longest first - cols <- colnames(x[, x %>% colnames() %>% nchar() %>% order() %>% rev()]) + cols <- colnames(x[, x %pm>% colnames() %pm>% nchar() %pm>% order() %pm>% rev()]) df_trans <- data.frame(cols = cols, abs = suppressWarnings(as.ab(cols)), stringsAsFactors = FALSE) @@ -147,7 +147,7 @@ get_column_abx <- function(x, names(x) <- df_trans$abcode # add from self-defined dots (...): - # such as get_column_abx(example_isolates %>% rename(thisone = AMX), amox = "thisone") + # such as get_column_abx(example_isolates %pm>% rename(thisone = AMX), amox = "thisone") dots <- list(...) if (length(dots) > 0) { newnames <- suppressWarnings(as.ab(names(dots), info = FALSE)) diff --git a/R/join_microorganisms.R b/R/join_microorganisms.R index 3afd9224d..8224a4670 100755 --- a/R/join_microorganisms.R +++ b/R/join_microorganisms.R @@ -30,7 +30,7 @@ #' @param by a variable to join by - if left empty will search for a column with class [`mo`] (created with [as.mo()]) or will be `"mo"` if that column name exists in `x`, could otherwise be a column name of `x` with values that exist in `microorganisms$mo` (like `by = "bacteria_id"`), or another column in [microorganisms] (but then it should be named, like `by = c("my_genus_species" = "fullname")`) #' @param suffix if there are non-joined duplicate variables in `x` and `y`, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2. #' @param ... ignored -#' @details **Note:** As opposed to the `join()` functions of `dplyr`, [`character`] vectors are supported and at default existing columns will get a suffix `"2"` and the newly joined columns will not get a suffix. +#' @details **Note:** As opposed to the `join()` functions of `dplyr`, [character] vectors are supported and at default existing columns will get a suffix `"2"` and the newly joined columns will not get a suffix. #' #' These functions rely on [merge()], a base R function to do joins. #' @inheritSection AMR Read more on our website! @@ -61,7 +61,7 @@ inner_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) { x <- checked$x by <- checked$by join <- suppressWarnings( - inner_join(x = x, y = microorganisms, by = by, suffix = suffix, ...) + pm_inner_join(x = x, y = microorganisms, by = by, suffix = suffix, ...) ) if (NROW(join) > NROW(x)) { warning("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.") @@ -80,7 +80,7 @@ left_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) { x <- checked$x by <- checked$by join <- suppressWarnings( - left_join(x = x, y = microorganisms, by = by, suffix = suffix, ...) + pm_left_join(x = x, y = microorganisms, by = by, suffix = suffix, ...) ) if (NROW(join) > NROW(x)) { warning("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.") @@ -99,7 +99,7 @@ right_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) { x <- checked$x by <- checked$by join <- suppressWarnings( - right_join(x = x, y = microorganisms, by = by, suffix = suffix, ...) + pm_right_join(x = x, y = microorganisms, by = by, suffix = suffix, ...) ) if (NROW(join) > NROW(x)) { warning("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.") @@ -118,7 +118,7 @@ full_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) { x <- checked$x by <- checked$by join <- suppressWarnings( - full_join(x = x, y = microorganisms, by = by, suffix = suffix, ...) + pm_full_join(x = x, y = microorganisms, by = by, suffix = suffix, ...) ) if (NROW(join) > NROW(x)) { warning("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.") @@ -137,7 +137,7 @@ semi_join_microorganisms <- function(x, by = NULL, ...) { x <- checked$x by <- checked$by join <- suppressWarnings( - semi_join(x = x, y = microorganisms, by = by, ...) + pm_semi_join(x = x, y = microorganisms, by = by, ...) ) class(join) <- x_class join @@ -153,7 +153,7 @@ anti_join_microorganisms <- function(x, by = NULL, ...) { x <- checked$x by <- checked$by join <- suppressWarnings( - anti_join(x = x, y = microorganisms, by = by, ...) + pm_anti_join(x = x, y = microorganisms, by = by, ...) ) class(join) <- x_class join diff --git a/R/key_antibiotics.R b/R/key_antibiotics.R index 5acdce79a..22d08f6cc 100755 --- a/R/key_antibiotics.R +++ b/R/key_antibiotics.R @@ -125,7 +125,7 @@ key_antibiotics <- function(x, dots <- unlist(list(...)) if (length(dots) != 0) { # backwards compatibility with old parameters - dots.names <- dots %>% names() + dots.names <- dots %pm>% names() if ("info" %in% dots.names) { warnings <- dots[which(dots.names == "info")] } @@ -162,7 +162,7 @@ key_antibiotics <- function(x, if (!all(col.list %in% colnames(x))) { if (warnings == TRUE) { warning("Some columns do not exist and will be ignored: ", - col.list.bak[!(col.list %in% colnames(x))] %>% toString(), + col.list.bak[!(col.list %in% colnames(x))] %pm>% toString(), ".\nTHIS MAY STRONGLY INFLUENCE THE OUTCOME.", immediate. = TRUE, call. = FALSE) @@ -218,7 +218,7 @@ key_antibiotics <- function(x, x$key_ab <- NA_character_ # Gram + - x$key_ab <- if_else(x$gramstain == "Gram-positive", + x$key_ab <- pm_if_else(x$gramstain == "Gram-positive", tryCatch(apply(X = x[, gram_positive], MARGIN = 1, FUN = function(x) paste(x, collapse = "")), @@ -226,7 +226,7 @@ key_antibiotics <- function(x, x$key_ab) # Gram - - x$key_ab <- if_else(x$gramstain == "Gram-negative", + x$key_ab <- pm_if_else(x$gramstain == "Gram-negative", tryCatch(apply(X = x[, gram_negative], MARGIN = 1, FUN = function(x) paste(x, collapse = "")), @@ -236,7 +236,7 @@ key_antibiotics <- function(x, # format key_abs <- toupper(gsub("[^SIR]", ".", gsub("(NA|NULL)", ".", x$key_ab))) - if (n_distinct(key_abs) == 1) { + if (pm_n_distinct(key_abs) == 1) { warning("No distinct key antibiotics determined.", call. = FALSE) } @@ -266,7 +266,7 @@ key_antibiotics_equal <- function(y, result <- logical(length(x)) if (info_needed == TRUE) { - p <- progress_estimated(length(x)) + p <- progress_ticker(length(x)) on.exit(close(p)) } @@ -315,10 +315,10 @@ key_antibiotics_equal <- function(y, # - S|R <-> R|S is 1 point # use the levels of as.rsi (S = 1, I = 2, R = 3) - suppressWarnings(x_split <- x_split %>% as.rsi() %>% as.double()) - suppressWarnings(y_split <- y_split %>% as.rsi() %>% as.double()) + suppressWarnings(x_split <- x_split %pm>% as.rsi() %pm>% as.double()) + suppressWarnings(y_split <- y_split %pm>% as.rsi() %pm>% as.double()) - points <- (x_split - y_split) %>% abs() %>% sum(na.rm = TRUE) / 2 + points <- (x_split - y_split) %pm>% abs() %pm>% sum(na.rm = TRUE) / 2 result[i] <- points >= points_threshold } else { diff --git a/R/kurtosis.R b/R/kurtosis.R index 542423427..16673aafe 100755 --- a/R/kurtosis.R +++ b/R/kurtosis.R @@ -23,7 +23,7 @@ #' #' @description Kurtosis is a measure of the "tailedness" of the probability distribution of a real-valued random variable. #' @inheritSection lifecycle Questioning lifecycle -#' @param x a vector of values, a [`matrix`] or a [`data.frame`] +#' @param x a vector of values, a [`matrix`] or a [data.frame] #' @param na.rm a logical value indicating whether `NA` values should be stripped before the computation proceeds. #' @seealso [skewness()] #' @rdname kurtosis diff --git a/R/like.R b/R/like.R index de95aaff0..155dbf981 100755 --- a/R/like.R +++ b/R/like.R @@ -24,7 +24,7 @@ #' Convenient wrapper around [grep()] to match a pattern: `x %like% pattern`. It always returns a [`logical`] vector and is always case-insensitive (use `x %like_case% pattern` for case-sensitive matching). Also, `pattern` can be as long as `x` to compare items of each index in both vectors, or they both can have the same length to iterate over all cases. #' @inheritSection lifecycle Stable lifecycle #' @param x a character vector where matches are sought, or an object which can be coerced by [as.character()] to a character vector. -#' @param pattern a character string containing a regular expression (or [`character`] string for `fixed = TRUE`) to be matched in the given character vector. Coerced by [as.character()] to a character string if possible. If a [`character`] vector of length 2 or more is supplied, the first element is used with a warning. +#' @param pattern a character string containing a regular expression (or [character] string for `fixed = TRUE`) to be matched in the given character vector. Coerced by [as.character()] to a character string if possible. If a [character] vector of length 2 or more is supplied, the first element is used with a warning. #' @param ignore.case if `FALSE`, the pattern matching is *case sensitive* and if `TRUE`, case is ignored during matching. #' @return A [`logical`] vector #' @name like diff --git a/R/mdro.R b/R/mdro.R index ded703447..2d64e1653 100755 --- a/R/mdro.R +++ b/R/mdro.R @@ -52,13 +52,13 @@ #' @inheritSection as.rsi Interpretation of R and S/I #' @return #' - CMI 2012 paper - function [mdr_cmi2012()] or [mdro()]:\cr -#' Ordered [`factor`] with levels `Negative` < `Multi-drug-resistant (MDR)` < `Extensively drug-resistant (XDR)` < `Pandrug-resistant (PDR)` +#' Ordered [factor] with levels `Negative` < `Multi-drug-resistant (MDR)` < `Extensively drug-resistant (XDR)` < `Pandrug-resistant (PDR)` #' - TB guideline - function [mdr_tb()] or [`mdro(..., guideline = "TB")`][mdro()]:\cr -#' Ordered [`factor`] with levels `Negative` < `Mono-resistant` < `Poly-resistant` < `Multi-drug-resistant` < `Extensively drug-resistant` +#' Ordered [factor] with levels `Negative` < `Mono-resistant` < `Poly-resistant` < `Multi-drug-resistant` < `Extensively drug-resistant` #' - German guideline - function [mrgn()] or [`mdro(..., guideline = "MRGN")`][mdro()]:\cr -#' Ordered [`factor`] with levels `Negative` < `3MRGN` < `4MRGN` +#' Ordered [factor] with levels `Negative` < `3MRGN` < `4MRGN` #' - Everything else:\cr -#' Ordered [`factor`] with levels `Negative` < `Positive, unconfirmed` < `Positive`. The value `"Positive, unconfirmed"` means that, according to the guideline, it is not entirely sure if the isolate is multi-drug resistant and this should be confirmed with additional (e.g. molecular) tests +#' Ordered [factor] with levels `Negative` < `Positive, unconfirmed` < `Positive`. The value `"Positive, unconfirmed"` means that, according to the guideline, it is not entirely sure if the isolate is multi-drug resistant and this should be confirmed with additional (e.g. molecular) tests #' @rdname mdro #' @aliases MDR XDR PDR BRMO 3MRGN 4MRGN #' @export diff --git a/R/mic.R b/R/mic.R index 1cb55136d..a51fb1b20 100755 --- a/R/mic.R +++ b/R/mic.R @@ -19,15 +19,15 @@ # Visit our website for more info: https://msberends.github.io/AMR. # # ==================================================================== # -#' Transform input to minimum inhibitory concentrations +#' Transform input to minimum inhibitory concentrations (MIC) #' -#' This transforms a vector to a new class [`mic`], which is an ordered [`factor`] with valid minimum inhibitory concentrations (MIC) as levels. Invalid MIC values will be translated as `NA` with a warning. +#' This transforms a vector to a new class [`mic`], which is an ordered [factor] with valid minimum inhibitory concentrations (MIC) as levels. Invalid MIC values will be translated as `NA` with a warning. #' @inheritSection lifecycle Stable lifecycle #' @rdname as.mic #' @param x vector #' @param na.rm a logical indicating whether missing values should be removed #' @details To interpret MIC values as RSI values, use [as.rsi()] on MIC values. It supports guidelines from EUCAST and CLSI. -#' @return Ordered [`factor`] with new class [`mic`] +#' @return Ordered [factor] with additional class [`mic`] #' @aliases mic #' @export #' @seealso [as.rsi()] @@ -55,7 +55,7 @@ as.mic <- function(x, na.rm = FALSE) { if (is.mic(x)) { x } else { - x <- x %>% unlist() + x <- x %pm>% unlist() if (na.rm == TRUE) { x <- x[!is.na(x)] } @@ -109,13 +109,13 @@ as.mic <- function(x, na.rm = FALSE) { c(t(sapply(ops, function(x) paste0(x, c(10:98)[9:98 %% 2 == TRUE])))), c(t(sapply(ops, function(x) paste0(x, sort(c(2 ^ c(7:10), 80 * c(2:12)))))))) - na_before <- x[is.na(x) | x == ""] %>% length() + na_before <- x[is.na(x) | x == ""] %pm>% length() x[!x %in% lvls] <- NA - na_after <- x[is.na(x) | x == ""] %>% length() + na_after <- x[is.na(x) | x == ""] %pm>% length() if (na_before != na_after) { - list_missing <- x.bak[is.na(x) & !is.na(x.bak) & x.bak != ""] %>% - unique() %>% + list_missing <- x.bak[is.na(x) & !is.na(x.bak) & x.bak != ""] %pm>% + unique() %pm>% sort() list_missing <- paste0('"', list_missing, '"', collapse = ", ") warning(na_after - na_before, " results truncated (", @@ -196,15 +196,15 @@ print.mic <- function(x, ...) { #' @noRd summary.mic <- function(object, ...) { x <- object - n_total <- x %>% length() + n_total <- length(x) x <- x[!is.na(x)] - n <- x %>% length() - c( - "Class" = "mic", - "" = n_total - n, - "Min." = sort(x)[1] %>% as.character(), - "Max." = sort(x)[n] %>% as.character() - ) + n <- length(x) + value <- c("Class" = "mic", + "" = n_total - n, + "Min." = as.character(sort(x)[1]), + "Max." = as.character(sort(x)[n])) + class(value) <- c("summaryDefault", "table") + value } #' @method plot mic @@ -283,7 +283,7 @@ barplot.mic <- function(height, #' @export #' @noRd c.mic <- function(x, ...) { - y <- NextMethod() - attributes(y) <- attributes(x) - y + y <- unlist(lapply(list(...), as.character)) + x <- as.character(x) + as.mic(c(x, y)) } diff --git a/R/mo.R b/R/mo.R index 382afef75..10e3c84f8 100755 --- a/R/mo.R +++ b/R/mo.R @@ -23,7 +23,7 @@ #' #' Use this function to determine a valid microorganism ID ([`mo`]). Determination is done using intelligent rules and the complete taxonomic kingdoms Bacteria, Chromista, Protozoa, Archaea and most microbial species from the kingdom Fungi (see Source). The input can be almost anything: a full name (like `"Staphylococcus aureus"`), an abbreviated name (like `"S. aureus"`), an abbreviation known in the field (like `"MRSA"`), or just a genus. Please see *Examples*. #' @inheritSection lifecycle Stable lifecycle -#' @param x a character vector or a [`data.frame`] with one or two columns +#' @param x a character vector or a [data.frame] with one or two columns #' @param Becker a logical to indicate whether *Staphylococci* should be categorised into coagulase-negative *Staphylococci* ("CoNS") and coagulase-positive *Staphylococci* ("CoPS") instead of their own species, according to Karsten Becker *et al.* (1,2). Note that this does not include species that were newly named after these publications, like *S. caeli*. #' #' This excludes *Staphylococcus aureus* at default, use `Becker = "all"` to also categorise *S. aureus* as "CoPS". @@ -31,7 +31,7 @@ #' #' This excludes *Enterococci* at default (who are in group D), use `Lancefield = "all"` to also categorise all *Enterococci* as group D. #' @param allow_uncertain a number between `0` (or `"none"`) and `3` (or `"all"`), or `TRUE` (= `2`) or `FALSE` (= `0`) to indicate whether the input should be checked for less probable results, please see *Details* -#' @param reference_df a [`data.frame`] to be used for extra reference when translating `x` to a valid [`mo`]. See [set_mo_source()] and [get_mo_source()] to automate the usage of your own codes (e.g. used in your analysis or organisation). +#' @param reference_df a [data.frame] to be used for extra reference when translating `x` to a valid [`mo`]. See [set_mo_source()] and [get_mo_source()] to automate the usage of your own codes (e.g. used in your analysis or organisation). #' @param ignore_pattern a regular expression (case-insensitive) of which all matches in `x` must return `NA`. This can be convenient to exclude known non-relevant input and can also be set with the option `AMR_ignore_pattern`, e.g. `options(AMR_ignore_pattern = "(not reported|contaminated flora)")`. #' @param language language to translate text like "no growth", which defaults to the system language (see [get_locale()]) #' @param ... other parameters passed on to functions @@ -69,7 +69,7 @@ #' 2. Taxonomic kingdom: the function starts with determining Bacteria, then Fungi, then Protozoa, then others; #' 3. Breakdown of input values to identify possible matches. #' -#' This will lead to the effect that e.g. `"E. coli"` (a microorganism highly prevalent in humans) will return the microbial ID of *Escherichia coli* and not *Entamoeba coli* (a microorganism less prevalent in humans), although the latter would alphabetically come first. +#' This will lead to the effect that e.g. `"E. coli"` (a microorganism highly prevalent in humans) will return the microbial ID of *Escherichia coli* and not *Entamoeba coli* (a microorganism less prevalent in humans), although the latter would alphabetically come first. #' #' ## Coping with uncertain results #' @@ -87,9 +87,9 @@ #' - `"Fluoroquinolone-resistant Neisseria gonorrhoeae"`. The first word will be stripped, after which the function will try to find a match. A warning will be thrown that the result *Neisseria gonorrhoeae* (``r as.mo("Neisseria gonorrhoeae")``) needs review. #' #' There are three helper functions that can be run after using the [as.mo()] function: -#' - Use [mo_uncertainties()] to get a [`data.frame`] that prints in a pretty format with all taxonomic names that were guessed. The output contains a score that is based on the human pathogenic prevalence and the [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) between the user input and the full taxonomic name. -#' - Use [mo_failures()] to get a [`character`] [`vector`] with all values that could not be coerced to a valid value. -#' - Use [mo_renamed()] to get a [`data.frame`] with all values that could be coerced based on old, previously accepted taxonomic names. +#' - Use [mo_uncertainties()] to get a [data.frame] that prints in a pretty format with all taxonomic names that were guessed. The output contains the matching score for all matches (see *Background on matching score*). +#' - Use [mo_failures()] to get a [character] [vector] with all values that could not be coerced to a valid value. +#' - Use [mo_renamed()] to get a [data.frame] with all values that could be coerced based on old, previously accepted taxonomic names. #' #' ## Microbial prevalence of pathogens in humans #' @@ -100,6 +100,21 @@ #' Group 2 consists of all microorganisms where the taxonomic phylum is Proteobacteria, Firmicutes, Actinobacteria or Sarcomastigophora, or where the taxonomic genus is *Aspergillus*, *Bacteroides*, *Candida*, *Capnocytophaga*, *Chryseobacterium*, *Cryptococcus*, *Elisabethkingia*, *Flavobacterium*, *Fusobacterium*, *Giardia*, *Leptotrichia*, *Mycoplasma*, *Prevotella*, *Rhodotorula*, *Treponema*, *Trichophyton* or *Ureaplasma*. This group consequently contains all less common and rare human pathogens. #' #' Group 3 (least prevalent microorganisms) consists of all other microorganisms. This group contains microorganisms most probably not found in humans. +#' +#' ## Background on matching scores +#' With ambiguous user input, the returned results are chosen based on their matching score using [mo_matching_score()]. This matching score is based on four parameters: +#' +#' 1. The prevalence \eqn{P} is categorised into group 1, 2 and 3 as stated above; +#' 2. A kingdom index \eqn{K} is set as follows: Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, and all others = 5; +#' 3. The level of uncertainty \eqn{U} needed to get to the result, as stated above (1 to 3); +#' 4. The [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) \eqn{L} is the distance between the user input and all taxonomic full names, with the text length of the user input being the maximum distance. A modified version of the Levenshtein distance \eqn{L'} based on the text length of the full name \eqn{F} is calculated as: +#' +#' \deqn{L' = F - \frac{0.5 \times L}{F}}{L' = F - (0.5 * L) / F} +#' +#' The final matching score \eqn{M} is calculated as: +#' \deqn{M = L' \times \frac{1}{P \times K} * \frac{1}{U}}{M = L' * (1 / (P * K)) * (1 / U)} +#' +#' All matches are sorted descending on their matching score and for all user input values, the top match will be returned. #' @inheritSection catalogue_of_life Catalogue of Life # (source as a section here, so it can be inherited by other man pages:) #' @section Source: @@ -108,8 +123,8 @@ #' 3. Lancefield RC **A serological differentiation of human and other groups of hemolytic streptococci**. 1933. J Exp Med. 57(4): 571–95. #' 4. Catalogue of Life: Annual Checklist (public online taxonomic database), (check included annual version with [catalogue_of_life_version()]). #' @export -#' @return A [`character`] [`vector`] with additional class [`mo`] -#' @seealso [microorganisms] for the [`data.frame`] that is being used to determine ID's. +#' @return A [character] [vector] with additional class [`mo`] +#' @seealso [microorganisms] for the [data.frame] that is being used to determine ID's. #' #' The [mo_property()] functions (like [mo_genus()], [mo_gramstain()]) to get properties based on the returned code. #' @inheritSection AMR Reference data publicly available @@ -218,7 +233,7 @@ as.mo <- function(x, # has valid own reference_df # (data.table not faster here) - reference_df <- reference_df %>% filter(!is.na(mo)) + reference_df <- reference_df %pm>% pm_filter(!is.na(mo)) # keep only first two columns, second must be mo if (colnames(reference_df)[1] == "mo") { reference_df <- reference_df[, c(2, 1)] @@ -231,9 +246,9 @@ as.mo <- function(x, reference_df[] <- lapply(reference_df, as.character) ) suppressWarnings( - y <- data.frame(x = x, stringsAsFactors = FALSE) %>% - left_join(reference_df, by = "x") %>% - pull("mo") + y <- data.frame(x = x, stringsAsFactors = FALSE) %pm>% + pm_left_join(reference_df, by = "x") %pm>% + pm_pull("mo") ) } else if (all(x %in% MO_lookup$mo) @@ -315,7 +330,10 @@ exec_as.mo <- function(x, res_df <- haystack[which(eval(substitute(needle), envir = haystack, enclos = parent.frame())), , drop = FALSE] if (NROW(res_df) > 1 & uncertainty != -1) { # sort the findings on matching score - res_df <- res_df[order(mo_matching_score(input, res_df[, "fullname", drop = TRUE]), decreasing = TRUE), , drop = FALSE] + scores <- mo_matching_score(x = input, + fullname = res_df[, "fullname", drop = TRUE], + uncertainty = uncertainty) + res_df <- res_df[order(scores, decreasing = TRUE), , drop = FALSE] } res <- as.character(res_df[, column, drop = TRUE]) if (length(res) == 0) { @@ -402,7 +420,7 @@ exec_as.mo <- function(x, if (!is.null(reference_df)) { mo_source_isvalid(reference_df) - reference_df <- reference_df %>% filter(!is.na(mo)) + reference_df <- reference_df %pm>% pm_filter(!is.na(mo)) # keep only first two columns, second must be mo if (colnames(reference_df)[1] == "mo") { reference_df <- reference_df[, c(2, 1)] @@ -580,7 +598,7 @@ exec_as.mo <- function(x, } if (initial_search == TRUE) { - progress <- progress_estimated(n = length(x), n_min = 25) # start if n >= 25 + progress <- progress_ticker(n = length(x), n_min = 25) # start if n >= 25 on.exit(close(progress)) } @@ -955,9 +973,9 @@ exec_as.mo <- function(x, if (nchar(g.x_backup_without_spp) <= 6) { x_length <- nchar(g.x_backup_without_spp) x_split <- paste0("^", - g.x_backup_without_spp %>% substr(1, x_length / 2), + g.x_backup_without_spp %pm>% substr(1, x_length / 2), ".* ", - g.x_backup_without_spp %>% substr((x_length / 2) + 1, x_length)) + g.x_backup_without_spp %pm>% substr((x_length / 2) + 1, x_length)) found <- lookup(fullname_lower %like_case% x_split, haystack = data_to_check) if (!is.na(found)) { @@ -1149,7 +1167,7 @@ exec_as.mo <- function(x, if (isTRUE(debug)) { cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (6) try to strip off half an element from end and check the remains\n")) } - x_strip <- a.x_backup %>% strsplit("[ .]") %>% unlist() + x_strip <- a.x_backup %pm>% strsplit("[ .]") %pm>% unlist() if (length(x_strip) > 1) { for (i in seq_len(length(x_strip) - 1)) { lastword <- x_strip[length(x_strip) - i + 1] @@ -1232,7 +1250,7 @@ exec_as.mo <- function(x, if (isTRUE(debug)) { cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (9) try to strip off one element from start and check the remains (only allow >= 2-part name outcome)\n")) } - x_strip <- a.x_backup %>% strsplit("[ .]") %>% unlist() + x_strip <- a.x_backup %pm>% strsplit("[ .]") %pm>% unlist() if (length(x_strip) > 1 & nchar(g.x_backup_without_spp) >= 6) { for (i in 2:(length(x_strip))) { x_strip_collapsed <- paste(x_strip[i:length(x_strip)], collapse = " ") @@ -1267,7 +1285,7 @@ exec_as.mo <- function(x, if (isTRUE(debug)) { cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (10) try to strip off one element from start and check the remains (any text size)\n")) } - x_strip <- a.x_backup %>% strsplit("[ .]") %>% unlist() + x_strip <- a.x_backup %pm>% strsplit("[ .]") %pm>% unlist() if (length(x_strip) > 1 & nchar(g.x_backup_without_spp) >= 6) { for (i in 2:(length(x_strip))) { x_strip_collapsed <- paste(x_strip[i:length(x_strip)], collapse = " ") @@ -1398,16 +1416,16 @@ exec_as.mo <- function(x, if (length(failures) > 0 & initial_search == TRUE) { options(mo_failures = sort(unique(failures))) plural <- c("value", "it", "was") - if (n_distinct(failures) > 1) { + if (pm_n_distinct(failures) > 1) { plural <- c("values", "them", "were") } x_input_clean <- trimws2(x_input) total_failures <- length(x_input_clean[as.character(x_input_clean) %in% as.character(failures) & !x_input %in% c(NA, NULL, NaN)]) total_n <- length(x_input[!x_input %in% c(NA, NULL, NaN)]) - msg <- paste0(nr2char(n_distinct(failures)), " unique ", plural[1], + msg <- paste0(nr2char(pm_n_distinct(failures)), " unique ", plural[1], " (covering ", percentage(total_failures / total_n), ") could not be coerced and ", plural[3], " considered 'unknown'") - if (n_distinct(failures) <= 10) { + if (pm_n_distinct(failures) <= 10) { msg <- paste0(msg, ": ", paste('"', unique(failures), '"', sep = "", collapse = ", ")) } msg <- paste0(msg, @@ -1421,7 +1439,7 @@ exec_as.mo <- function(x, } # handling uncertainties ---- if (NROW(uncertainties) > 0 & initial_search == TRUE) { - uncertainties <- as.list(distinct(uncertainties, input, .keep_all = TRUE)) + uncertainties <- as.list(pm_distinct(uncertainties, input, .keep_all = TRUE)) options(mo_uncertainties = uncertainties) plural <- c("", "it", "was") @@ -1633,8 +1651,8 @@ freq.mo <- function(x, ...) { decimal.mark = "."), " (", percentage(sum(grams == "Gram-positive", na.rm = TRUE) / length(grams), digits = digits), ")"), - `No. of genera` = n_distinct(mo_genus(x_noNA, language = NULL)), - `No. of species` = n_distinct(paste(mo_genus(x_noNA, language = NULL), + `No. of genera` = pm_n_distinct(mo_genus(x_noNA, language = NULL)), + `No. of species` = pm_n_distinct(paste(mo_genus(x_noNA, language = NULL), mo_species(x_noNA, language = NULL))))) } @@ -1662,7 +1680,7 @@ summary.mo <- function(object, ...) { top_3 <- top[order(-top$n), 1][1:3] value <- c("Class" = "mo", "" = length(x[is.na(x)]), - "Unique" = n_distinct(x[!is.na(x)]), + "Unique" = pm_n_distinct(x[!is.na(x)]), "#1" = top_3[1], "#2" = top_3[2], "#3" = top_3[3]) @@ -1752,14 +1770,16 @@ print.mo_uncertainties <- function(x, ...) { if (NROW(x) == 0) { return(NULL) } - cat(font_blue(strwrap(c("Scores are based on human pathogenic prevalence and the resemblance between the input and the full taxonomic name. Furthermore, an indication is given about the likelihood of the match - the more transformations are needed for coercion, the more unlikely the result.")), collapse = "\n")) + cat(font_blue(strwrap(c("Matching scores are based on human pathogenic prevalence and the resemblance between the input and the full taxonomic name. Furthermore, an indication is given about the probability of the match - the more transformations are needed for coercion, the more improbable the result.")), collapse = "\n")) cat("\n") msg <- "" for (i in seq_len(nrow(x))) { if (x[i, ]$candidates != "") { candidates <- unlist(strsplit(x[i, ]$candidates, ", ", fixed = TRUE)) - scores <- mo_matching_score(x[i, ]$input, candidates) * (1 / x[i, ]$uncertainty) + scores <- mo_matching_score(x = x[i, ]$input, + fullname = candidates, + uncertainty = x[i, ]$uncertainty) # sort on descending scores candidates <- candidates[order(1 - scores)] n_candidates <- length(candidates) @@ -1768,23 +1788,26 @@ print.mo_uncertainties <- function(x, ...) { candidates <- paste(candidates, collapse = ", ") # align with input after arrow candidates <- paste0("\n", strrep(" ", nchar(x[i, ]$input) + 6), - "Less likely", ifelse(n_candidates == 25, " (max 25)", ""), ": ", candidates) + "Also matched", ifelse(n_candidates == 25, " (max 25)", ""), ": ", candidates) } else { candidates <- "" } if (x[i, ]$uncertainty == 1) { - uncertainty_interpretation <- font_green("* VERY LIKELY *") + uncertainty_interpretation <- font_green("* MOST PROBABLE *") } else if (x[i, ]$uncertainty == 1) { - uncertainty_interpretation <- font_yellow("* LIKELY *") + uncertainty_interpretation <- font_yellow("* PROBABLE *") } else { - uncertainty_interpretation <- font_red("* UNLIKELY *") + uncertainty_interpretation <- font_red("* IMPROBABLE *") } msg <- paste(msg, paste0('"', x[i, ]$input, '" -> ', paste0(font_bold(font_italic(x[i, ]$fullname)), ifelse(!is.na(x[i, ]$renamed_to), paste(", renamed to", font_italic(x[i, ]$renamed_to)), ""), " (", x[i, ]$mo, - ", score: ", trimws(percentage(mo_matching_score(x[i, ]$input, x[i, ]$fullname) * (1 / x[i, ]$uncertainty), digits = 1)), + ", matching score = ", trimws(percentage(mo_matching_score(x = x[i, ]$input, + fullname = x[i, ]$fullname, + uncertainty = x[i, ]$uncertainty), + digits = 1)), ") "), uncertainty_interpretation, candidates), @@ -1800,7 +1823,7 @@ mo_renamed <- function() { if (is.null(items)) { items <- data.frame() } else { - items <- distinct(items, old_name, .keep_all = TRUE) + items <- pm_distinct(items, old_name, .keep_all = TRUE) } structure(.Data = items, class = c("mo_renamed", "data.frame")) @@ -1872,27 +1895,6 @@ load_mo_failures_uncertainties_renamed <- function(metadata) { options("mo_renamed" = metadata$renamed) } -mo_matching_score <- function(input, fullname) { - # fullname is always a taxonomically valid full name - levenshtein <- double(length = length(input)) - if (length(fullname) == 1) { - fullname <- rep(fullname, length(input)) - } - if (length(input) == 1) { - input <- rep(input, length(fullname)) - } - for (i in seq_len(length(input))) { - # determine Levenshtein distance, but maximise to nchar of fullname - levenshtein[i] <- min(as.double(utils::adist(input[i], fullname[i], ignore.case = FALSE)), - nchar(fullname[i])) - } - # self-made score between 0 and 1 (for % certainty, so 0 means huge distance, 1 means no distance) - dist <- (nchar(fullname) - 0.5 * levenshtein) / nchar(fullname) - index_in_MO_lookup <- tryCatch((nrow(MO_lookup) - match(fullname, MO_lookup$fullname)) / nrow(MO_lookup), - error = function(e) rep(1, length(fullname))) - (0.25 * dist) + (0.75 * index_in_MO_lookup) -} - trimws2 <- function(x) { trimws(gsub("[\\s]+", " ", x, perl = TRUE)) } @@ -1903,13 +1905,13 @@ parse_and_convert <- function(x) { if (NCOL(x) > 2) { stop("a maximum of two columns is allowed", call. = FALSE) } else if (NCOL(x) == 2) { - # support Tidyverse selection like: df %>% select(colA, colB) + # support Tidyverse selection like: df %pm>% select(colA, colB) # paste these columns together x <- as.data.frame(x, stringsAsFactors = FALSE) colnames(x) <- c("A", "B") x <- paste(x$A, x$B) } else { - # support Tidyverse selection like: df %>% select(colA) + # support Tidyverse selection like: df %pm>% select(colA) x <- as.data.frame(x, stringsAsFactors = FALSE)[[1]] } } @@ -1950,8 +1952,8 @@ replace_ignore_pattern <- function(x, ignore_pattern) { } left_join_MO_lookup <- function(x, ...) { - left_join(x = x, y = MO_lookup, ...) + pm_left_join(x = x, y = MO_lookup, ...) } left_join_MO.old_lookup <- function(x, ...) { - left_join(x = x, y = MO.old_lookup, ...) + pm_left_join(x = x, y = MO.old_lookup, ...) } diff --git a/R/mo_matching_score.R b/R/mo_matching_score.R new file mode 100755 index 000000000..54e87ddfe --- /dev/null +++ b/R/mo_matching_score.R @@ -0,0 +1,63 @@ +# ==================================================================== # +# TITLE # +# Antimicrobial Resistance (AMR) Analysis # +# # +# SOURCE # +# https://github.com/msberends/AMR # +# # +# LICENCE # +# (c) 2018-2020 Berends MS, Luz CF et al. # +# # +# This R package is free software; you can freely use and distribute # +# it for both personal and commercial purposes under the terms of the # +# GNU General Public License version 2.0 (GNU GPL-2), as published by # +# the Free Software Foundation. # +# # +# We created this package for both routine data analysis and academic # +# research and it was publicly released in the hope that it will be # +# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. # +# Visit our website for more info: https://msberends.github.io/AMR. # +# ==================================================================== # + +#' Calculate the matching score for microorganisms +#' +#' This helper function is used by [as.mo()] to determine the most probable match of taxonomic records, based on user input. +#' @param x Any user input value(s) +#' @param fullname A full taxonomic name, that exists in [`microorganisms$fullname`][microorganisms] +#' @param uncertainty The level of uncertainty set in [as.mo()], see `allow_uncertain` in that function (here, it defaults to 1, but is automatically determined in [as.mo()] based on the number of transformations needed to get to a result) +#' @details The matching score is based on four parameters: +#' +#' 1. A human pathogenic prevalence \eqn{P}, that is categorised into group 1, 2 and 3 (see [as.mo()]); +#' 2. A kingdom index \eqn{K} is set as follows: Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, and all others = 5; +#' 3. The level of uncertainty \eqn{U} that is needed to get to a result (1 to 3, see [as.mo()]); +#' 4. The [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) \eqn{L} is the distance between the user input and all taxonomic full names, with the text length of the user input being the maximum distance. A modified version of the Levenshtein distance \eqn{L'} based on the text length of the full name \eqn{F} is calculated as: +#' +#' \deqn{L' = F - \frac{0.5 \times L}{F}}{L' = F - (0.5 * L) / F} +#' +#' The final matching score \eqn{M} is calculated as: +#' \deqn{M = L' \times \frac{1}{P \times K} * \frac{1}{U}}{M = L' * (1 / (P * K)) * (1 / U)} +#' +#' @export +#' @examples +#' as.mo("E. coli") +#' mo_uncertainties() +mo_matching_score <- function(x, fullname, uncertainty = 1) { + # fullname is always a taxonomically valid full name + levenshtein <- double(length = length(x)) + if (length(fullname) == 1) { + fullname <- rep(fullname, length(x)) + } + if (length(x) == 1) { + x <- rep(x, length(fullname)) + } + for (i in seq_len(length(x))) { + # determine Levenshtein distance, but maximise to nchar of fullname + levenshtein[i] <- min(as.double(utils::adist(x[i], fullname[i], ignore.case = FALSE)), + nchar(fullname[i])) + } + # self-made score between 0 and 1 (for % certainty, so 0 means huge distance, 1 means no distance) + dist <- (nchar(fullname) - 0.5 * levenshtein) / nchar(fullname) + prevalence_kingdom_index <- tryCatch(MO_lookup[match(fullname, MO_lookup$fullname), "prevalence_kingdom_index", drop = TRUE], + error = function(e) rep(1, length(fullname))) + dist * (1 / prevalence_kingdom_index) * (1 / uncertainty) +} diff --git a/R/mo_property.R b/R/mo_property.R index 2e6e2e437..c96027f61 100755 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -47,11 +47,11 @@ #' @rdname mo_property #' @name mo_property #' @return -#' - An [`integer`] in case of [mo_year()] -#' - A [`list`] in case of [mo_taxonomy()] and [mo_info()] -#' - A named [`character`] in case of [mo_url()] -#' - A [`double`] in case of [mo_snomed()] -#' - A [`character`] in all other cases +#' - An [integer] in case of [mo_year()] +#' - A [list] in case of [mo_taxonomy()] and [mo_info()] +#' - A named [character] in case of [mo_url()] +#' - A [double] in case of [mo_snomed()] +#' - A [character] in all other cases #' @export #' @seealso [microorganisms] #' @inheritSection AMR Reference data publicly available @@ -378,8 +378,8 @@ mo_url <- function(x, open = FALSE, language = get_locale(), ...) { mo_names <- mo_name(mo) metadata <- get_mo_failures_uncertainties_renamed() - df <- data.frame(mo, stringsAsFactors = FALSE) %>% - left_join(select(microorganisms, mo, source, species_id), by = "mo") + df <- data.frame(mo, stringsAsFactors = FALSE) %pm>% + pm_left_join(pm_select(microorganisms, mo, source, species_id), by = "mo") df$url <- ifelse(df$source == "CoL", paste0(catalogue_of_life$url_CoL, "details/species/id/", df$species_id, "/"), ifelse(df$source == "DSMZ", diff --git a/R/mo_source.R b/R/mo_source.R index e9582e3d8..6c4f74132 100644 --- a/R/mo_source.R +++ b/R/mo_source.R @@ -29,11 +29,13 @@ #' @rdname mo_source #' @name mo_source #' @aliases set_mo_source get_mo_source -#' @details The reference file can be a text file seperated with commas (CSV) or tabs or pipes, an Excel file (either 'xls' or 'xlsx' format) or an R object file (extension '.rds'). To use an Excel file, you need to have the `readxl` package installed. +#' @details The reference file can be a text file separated with commas (CSV) or tabs or pipes, an Excel file (either 'xls' or 'xlsx' format) or an R object file (extension '.rds'). To use an Excel file, you will need to have the `readxl` package installed. #' -#' [set_mo_source()] will check the file for validity: it must be a [`data.frame`], must have a column named `"mo"` which contains values from [`microorganisms$mo`][microorganisms] and must have a reference column with your own defined values. If all tests pass, [set_mo_source()] will read the file into R and export it to `"~/.mo_source.rds"`. This compressed data file will then be used at default for MO determination (function [as.mo()] and consequently all `mo_*` functions like [mo_genus()] and [mo_gramstain()]). The location of the original file will be saved as option with `options(mo_source = path)`. Its timestamp will be saved with `options(mo_source_datetime = ...)`. -#' -#' [get_mo_source()] will return the data set by reading `"~/.mo_source.rds"` with [readRDS()]. If the original file has changed (the file defined with `path`), it will call [set_mo_source()] to update the data file automatically. +#' [set_mo_source()] will check the file for validity: it must be a [data.frame], must have a column named `"mo"` which contains values from [`microorganisms$mo`][microorganisms] and must have a reference column with your own defined values. If all tests pass, [set_mo_source()] will read the file into R and export it to `"~/.mo_source.rds"` after the user **specifically confirms and allows** that this file will be created. For this reason, this function only works in interactive sessions. +#' +#' The created compressed data file `"~/.mo_source.rds"` will be used at default for MO determination (function [as.mo()] and consequently all `mo_*` functions like [mo_genus()] and [mo_gramstain()]). The location of the original file will be saved as an R option with `options(mo_source = path)`. Its timestamp will be saved with `options(mo_source_datetime = ...)`. +#' +#' The function [get_mo_source()] will return the data set by reading `"~/.mo_source.rds"` with [readRDS()]. If the original file has changed (by checking the aforementioned options `mo_source` and `mo_source_datetime`), it will call [set_mo_source()] to update the data file automatically. #' #' Reading an Excel file (`.xlsx`) with only one row has a size of 8-9 kB. The compressed file created with [set_mo_source()] will then have a size of 0.1 kB and can be read by [get_mo_source()] in only a couple of microseconds (millionths of a second). #' diff --git a/R/pca.R b/R/pca.R index 9860b399e..79ad9453b 100755 --- a/R/pca.R +++ b/R/pca.R @@ -78,7 +78,7 @@ pca <- function(x, error = function(e) stop(e$message, call. = FALSE)) if (length(new_list[[i]]) == 1) { if (is.character(new_list[[i]]) & new_list[[i]] %in% colnames(x)) { - # this is to support quoted variables: df %>% pca("mycol1", "mycol2") + # this is to support quoted variables: df %pm>% pca("mycol1", "mycol2") new_list[[i]] <- x[, new_list[[i]]] } else { # remove item - it's a parameter like `center` @@ -102,7 +102,7 @@ pca <- function(x, x <- cbind(x.bak[, sapply(x.bak, function(y) !is.numeric(y) & !all(is.na(y))), drop = FALSE], x) } - x <- ungroup(x) # would otherwise select the grouping vars + x <- pm_ungroup(x) # would otherwise select the grouping vars x <- x[rowSums(is.na(x)) == 0, ] # remove columns containing NAs pca_data <- x[, which(sapply(x, function(x) is.numeric(x)))] diff --git a/R/proportion.R b/R/proportion.R index 868d1d799..b794013d6 100755 --- a/R/proportion.R +++ b/R/proportion.R @@ -29,7 +29,7 @@ #' @param minimum the minimum allowed number of available (tested) isolates. Any isolate count lower than `minimum` will return `NA` with a warning. The default number of `30` isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source. #' @param as_percent a logical to indicate whether the output must be returned as a hundred fold with % sign (a character). A value of `0.123456` will then be returned as `"12.3%"`. #' @param only_all_tested (for combination therapies, i.e. using more than one variable for `...`): a logical to indicate that isolates must be tested for all antibiotics, see section *Combination therapy* below -#' @param data a [`data.frame`] containing columns with class [`rsi`] (see [as.rsi()]) +#' @param data a [data.frame] containing columns with class [`rsi`] (see [as.rsi()]) #' @param translate_ab a column name of the [antibiotics] data set to translate the antibiotic abbreviations to, using [ab_property()]. Use a value #' @inheritParams ab_property #' @param combine_SI a logical to indicate whether all values of S and I must be merged into one, so the output only consists of S+I vs. R (susceptible vs. resistant). This used to be the parameter `combine_IR`, but this now follows the redefinition by EUCAST about the interpretion of I (increased exposure) in 2019, see section 'Interpretation of S, I and R' below. Default is `TRUE`. @@ -79,7 +79,7 @@ #' Using `only_all_tested` has no impact when only using one antibiotic as input. #' @source **M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition**, 2014, *Clinical and Laboratory Standards Institute (CLSI)*. . #' @seealso [AMR::count()] to count resistant and susceptible isolates. -#' @return A [`double`] or, when `as_percent = TRUE`, a [`character`]. +#' @return A [double] or, when `as_percent = TRUE`, a [character]. #' @rdname proportion #' @aliases portion #' @name proportion diff --git a/R/resistance_predict.R b/R/resistance_predict.R index c21487611..544aa7650 100755 --- a/R/resistance_predict.R +++ b/R/resistance_predict.R @@ -43,7 +43,7 @@ #' - `"binomial"` or `"binom"` or `"logit"`: a generalised linear regression model with binomial distribution #' - `"loglin"` or `"poisson"`: a generalised log-linear regression model with poisson distribution #' - `"lin"` or `"linear"`: a linear regression model -#' @return A [`data.frame`] with extra class [`resistance_predict`] with columns: +#' @return A [data.frame] with extra class [`resistance_predict`] with columns: #' - `year` #' - `value`, the same as `estimated` when `preserve_measurements = FALSE`, and a combination of `observed` and `estimated` otherwise #' - `se_min`, the lower bound of the standard error with a minimum of `0` (so the standard error will never go below 0%) @@ -134,7 +134,7 @@ resistance_predict <- function(x, dots <- unlist(list(...)) if (length(dots) != 0) { # backwards compatibility with old parameters - dots.names <- dots %>% names() + dots.names <- dots %pm>% names() if ("tbl" %in% dots.names) { x <- dots[which(dots.names == "tbl")] } @@ -264,8 +264,8 @@ resistance_predict <- function(x, observations = df$R + df$S, observed = df$R / (df$R + df$S), stringsAsFactors = FALSE) - df_prediction <- df_prediction %>% - left_join(df_observations, by = "year") + df_prediction <- df_prediction %pm>% + pm_left_join(df_observations, by = "year") df_prediction$estimated <- df_prediction$value if (preserve_measurements == TRUE) { diff --git a/R/rsi.R b/R/rsi.R index 245d6d5e3..40d7afe1a 100755 --- a/R/rsi.R +++ b/R/rsi.R @@ -215,8 +215,9 @@ is.rsi.eligible <- function(x, threshold = 0.05) { as.rsi.default <- function(x, ...) { if (is.rsi(x)) { x - } else if (identical(levels(x), c("S", "I", "R"))) { - structure(x, class = c("rsi", "ordered", "factor")) + } else if (all(is.na(x)) || identical(levels(x), c("S", "I", "R"))) { + structure(.Data = factor(x, levels = c("S", "I", "R"), ordered = TRUE), + class = c("rsi", "ordered", "factor")) } else if (inherits(x, "integer") & all(x %in% c(1:3, NA))) { x[x == 1] <- "S" x[x == 2] <- "I" @@ -263,8 +264,8 @@ as.rsi.default <- function(x, ...) { if (!isFALSE(list(...)$warn)) { # so as.rsi(..., warn = FALSE) will never throw a warning if (na_before != na_after) { - list_missing <- x.bak[is.na(x) & !is.na(x.bak) & x.bak != ""] %>% - unique() %>% + list_missing <- x.bak[is.na(x) & !is.na(x.bak) & x.bak != ""] %pm>% + unique() %pm>% sort() list_missing <- paste0('"', list_missing, '"', collapse = ", ") warning(na_after - na_before, " results truncated (", @@ -324,7 +325,7 @@ as.rsi.mic <- function(x, mo_coerced <- suppressWarnings(as.mo(mo)) guideline_coerced <- get_guideline(guideline) if (is.na(ab_coerced)) { - message(font_red(paste0("Unknown drug: `", font_bold(ab), "`. Rename this column to a drug name or code, and check the output with as.ab()."))) + message(font_red(paste0("Returning NAs for unknown drug: `", font_bold(ab), "`. Rename this column to a drug name or code, and check the output with as.ab()."))) return(as.rsi(rep(NA, length(x)))) } if (length(mo_coerced) == 1) { @@ -394,7 +395,7 @@ as.rsi.disk <- function(x, mo_coerced <- suppressWarnings(as.mo(mo)) guideline_coerced <- get_guideline(guideline) if (is.na(ab_coerced)) { - message(font_red(paste0("Unknown drug: `", font_bold(ab), "`. Rename this column to a drug name or code, and check the output with as.ab()."))) + message(font_red(paste0("Returning NAs for unknown drug: `", font_bold(ab), "`. Rename this column to a drug name or code, and check the output with as.ab()."))) return(as.rsi(rep(NA, length(x)))) } if (length(mo_coerced) == 1) { @@ -509,15 +510,15 @@ as.rsi.data.frame <- function(x, for (i in seq_len(length(ab_cols))) { if (types[i] == "mic") { - x[, ab_cols[i]] <- as.rsi.mic(x = x %>% pull(ab_cols[i]), - mo = x %>% pull(col_mo), + x[, ab_cols[i]] <- as.rsi.mic(x = x %pm>% pm_pull(ab_cols[i]), + mo = x %pm>% pm_pull(col_mo), ab = ab_cols[i], guideline = guideline, uti = uti, conserve_capped_values = conserve_capped_values) } else if (types[i] == "disk") { - x[, ab_cols[i]] <- as.rsi.disk(x = x %>% pull(ab_cols[i]), - mo = x %>% pull(col_mo), + x[, ab_cols[i]] <- as.rsi.disk(x = x %pm>% pm_pull(ab_cols[i]), + mo = x %pm>% pm_pull(col_mo), ab = ab_cols[i], guideline = guideline, uti = uti) @@ -554,6 +555,8 @@ exec_as.rsi <- function(method, conserve_capped_values, add_intrinsic_resistance) { + metadata_mo <- get_mo_failures_uncertainties_renamed() + x_bak <- data.frame(x_mo = paste0(x, mo)) df <- unique(data.frame(x, mo), stringsAsFactors = FALSE) x <- df$x @@ -582,7 +585,7 @@ exec_as.rsi <- function(method, new_rsi <- rep(NA_character_, length(x)) ab_param <- ab - trans <- rsi_translation %>% + trans <- rsi_translation %pm>% subset(guideline == guideline_coerced & method == method_param & ab == ab_param) trans$lookup <- paste(trans$mo, trans$ab) @@ -614,7 +617,7 @@ exec_as.rsi <- function(method, } } - get_record <- trans %>% + get_record <- trans %pm>% # no subsetting to UTI for now subset(lookup %in% c(lookup_mo[i], lookup_genus[i], @@ -625,14 +628,14 @@ exec_as.rsi <- function(method, lookup_other[i])) if (isTRUE(uti[i])) { - get_record <- get_record %>% + get_record <- get_record %pm>% # be as specific as possible (i.e. prefer species over genus): - # desc(uti) = TRUE on top and FALSE on bottom - arrange(desc(uti), desc(nchar(mo))) # 'uti' is a column in data set 'rsi_translation' + # pm_desc(uti) = TRUE on top and FALSE on bottom + pm_arrange(pm_desc(uti), pm_desc(nchar(mo))) # 'uti' is a column in data set 'rsi_translation' } else { - get_record <- get_record %>% - filter(uti == FALSE) %>% # 'uti' is a column in rsi_translation - arrange(desc(nchar(mo))) + get_record <- get_record %pm>% + pm_filter(uti == FALSE) %pm>% # 'uti' is a column in rsi_translation + pm_arrange(pm_desc(nchar(mo))) } get_record <- get_record[1L, ] @@ -643,29 +646,43 @@ exec_as.rsi <- function(method, mic_input <- x[i] mic_S <- as.mic(get_record$breakpoint_S) mic_R <- as.mic(get_record$breakpoint_R) - new_rsi[i] <- ifelse(isTRUE(conserve_capped_values) & mic_input %like% "^<[0-9]", "S", - ifelse(isTRUE(conserve_capped_values) & mic_input %like% "^>[0-9]", "R", - ifelse(isTRUE(which(levels(mic_input) == mic_input) <= which(levels(mic_S) == mic_S)), "S", - ifelse(isTRUE(which(levels(mic_input) == mic_input) >= which(levels(mic_R) == mic_R)), "R", - ifelse(!is.na(get_record$breakpoint_S) & !is.na(get_record$breakpoint_R), "I", - NA_character_))))) + new_rsi[i] <- quick_case_when(isTRUE(conserve_capped_values) & mic_input %like% "^<[0-9]" ~ "S", + isTRUE(conserve_capped_values) & mic_input %like% "^>[0-9]" ~ "R", + # start interpreting: EUCAST uses <= S and > R, CLSI uses <=S and >= R + isTRUE(which(levels(mic_input) == mic_input) <= which(levels(mic_S) == mic_S)) ~ "S", + guideline_coerced %like% "ECUAST" & + isTRUE(which(levels(mic_input) == mic_input) > which(levels(mic_R) == mic_R)) ~ "R", + guideline_coerced %like% "CLSI" & + isTRUE(which(levels(mic_input) == mic_input) >= which(levels(mic_R) == mic_R)) ~ "R", + # return "I" when not match the bottom or top + !is.na(get_record$breakpoint_S) & !is.na(get_record$breakpoint_R) ~ "I", + # and NA otherwise + TRUE ~ NA_character_) } else if (method == "disk") { - new_rsi[i] <- ifelse(isTRUE(as.double(x[i]) >= as.double(get_record$breakpoint_S)), "S", - ifelse(isTRUE(as.double(x[i]) <= as.double(get_record$breakpoint_R)), "R", - ifelse(!is.na(get_record$breakpoint_S) & !is.na(get_record$breakpoint_R), "I", - NA_character_))) + new_rsi[i] <- quick_case_when(isTRUE(as.double(x[i]) >= as.double(get_record$breakpoint_S)) ~ "S", + # start interpreting: EUCAST uses >= S and < R, CLSI uses >=S and <= R + guideline_coerced %like% "ECUAST" & + isTRUE(as.double(x[i]) < as.double(get_record$breakpoint_R)) ~ "R", + guideline_coerced %like% "CLSI" & + isTRUE(as.double(x[i]) <= as.double(get_record$breakpoint_R)) ~ "R", + # return "I" when not match the bottom or top + !is.na(get_record$breakpoint_S) & !is.na(get_record$breakpoint_R) ~ "I", + # and NA otherwise + TRUE ~ NA_character_) } } } - new_rsi <- x_bak %>% - left_join(data.frame(x_mo = paste0(df$x, df$mo), new_rsi), by = "x_mo") %>% - pull(new_rsi) + new_rsi <- x_bak %pm>% + pm_left_join(data.frame(x_mo = paste0(df$x, df$mo), new_rsi), by = "x_mo") %pm>% + pm_pull(new_rsi) if (warned == FALSE) { message(font_green("OK.")) } + load_mo_failures_uncertainties_renamed(metadata_mo) + structure(.Data = factor(new_rsi, levels = c("S", "I", "R"), ordered = TRUE), class = c("rsi", "ordered", "factor")) } @@ -781,7 +798,7 @@ plot.rsi <- function(x, # don't use as.rsi() here, it will confuse plot() data$x <- factor(data$x, levels = c("S", "I", "R"), ordered = TRUE) - ymax <- if_else(max(data$s) > 95, 105, 100) + ymax <- pm_if_else(max(data$s) > 95, 105, 100) # get plot() generic; this was moved from the 'graphics' pkg to the 'base' pkg in R 4.0.0 if (as.integer(R.Version()$major) >= 4) { @@ -799,7 +816,7 @@ plot.rsi <- function(x, axes = axes, ...) # x axis - axis(side = 1, at = 1:n_distinct(data$x), labels = levels(data$x), lwd = 0) + axis(side = 1, at = 1:pm_n_distinct(data$x), labels = levels(data$x), lwd = 0) # y axis, 0-100% axis(side = 2, at = seq(0, 100, 5)) diff --git a/R/rsi_calc.R b/R/rsi_calc.R index e7089016a..3363137ab 100755 --- a/R/rsi_calc.R +++ b/R/rsi_calc.R @@ -54,7 +54,7 @@ rsi_calc <- function(..., ndots <- length(dots) if (is.data.frame(dots_df)) { - # data.frame passed with other columns, like: example_isolates %>% proportion_S(AMC, GEN) + # data.frame passed with other columns, like: example_isolates %pm>% proportion_S(AMC, GEN) dots <- as.character(dots) # remove first element, it's the data.frame @@ -64,12 +64,12 @@ rsi_calc <- function(..., dots <- dots[2:length(dots)] } if (length(dots) == 0 | all(dots == "df")) { - # for complete data.frames, like example_isolates %>% select(AMC, GEN) %>% proportion_S() + # for complete data.frames, like example_isolates %pm>% select(AMC, GEN) %pm>% proportion_S() # and the old rsi function, which has "df" as name of the first parameter x <- dots_df } else { # get dots that are in column names already, and the ones that will be once evaluated using dots_df or global env - # this is to support susceptibility(example_isolates, AMC, dplyr::all_of(some_vector_with_AB_names)) + # this is to support susceptibility(example_isolates, AMC, any_of(some_vector_with_AB_names)) dots <- c(dots[dots %in% colnames(dots_df)], eval(parse(text = dots[!dots %in% colnames(dots_df)]), envir = dots_df, enclos = globalenv())) dots_not_exist <- dots[!dots %in% colnames(dots_df)] @@ -77,14 +77,14 @@ rsi_calc <- function(..., x <- dots_df[, dots, drop = FALSE] } } else if (ndots == 1) { - # only 1 variable passed (can also be data.frame), like: proportion_S(example_isolates$AMC) and example_isolates$AMC %>% proportion_S() + # only 1 variable passed (can also be data.frame), like: proportion_S(example_isolates$AMC) and example_isolates$AMC %pm>% proportion_S() x <- dots_df } else { # multiple variables passed without pipe, like: proportion_S(example_isolates$AMC, example_isolates$GEN) x <- NULL try(x <- as.data.frame(dots, stringsAsFactors = FALSE), silent = TRUE) if (is.null(x)) { - # support for example_isolates %>% group_by(hospital_id) %>% summarise(amox = susceptibility(GEN, AMX)) + # support for example_isolates %pm>% group_by(hospital_id) %pm>% summarise(amox = susceptibility(GEN, AMX)) x <- as.data.frame(list(...), stringsAsFactors = FALSE) } } @@ -138,7 +138,7 @@ rsi_calc <- function(..., } if (print_warning == TRUE) { - warning("Increase speed by transforming to class on beforehand: your_data %>% mutate_if(is.rsi.eligible, as.rsi)", + warning("Increase speed by transforming to class on beforehand: your_data %pm>% mutate_if(is.rsi.eligible, as.rsi)", call. = FALSE) } @@ -187,9 +187,9 @@ rsi_calc_df <- function(type, # "proportion", "count" or "both" translate_ab <- get_translate_ab(translate_ab) # select only groups and antibiotics - if (has_groups(data)) { + if (pm_has_groups(data)) { data_has_groups <- TRUE - groups <- setdiff(names(get_groups(data)), ".rows") # get_groups is from poorman.R + groups <- setdiff(names(pm_get_group_details(data)), ".rows") data <- data[, c(groups, colnames(data)[sapply(data, is.rsi)]), drop = FALSE] } else { data_has_groups <- FALSE @@ -261,13 +261,14 @@ rsi_calc_df <- function(type, # "proportion", "count" or "both" out } - # support dplyr groups - apply_group <- function(.data, fn, groups, ...) { - grouped <- split(x = .data, f = lapply(groups, function(x, .data) as.factor(.data[, x]), .data)) + # based on pm_apply_grouped_function + apply_group <- function(.data, fn, groups, drop = FALSE, ...) { + #groups <- get_groups(.data) + grouped <- pm_split_into_groups(.data, groups, drop) res <- do.call(rbind, unname(lapply(grouped, fn, ...))) if (any(groups %in% colnames(res))) { class(res) <- c("grouped_data", class(res)) - attr(res, "groups") <- groups[groups %in% colnames(res)] + res <- set_groups(res, groups[groups %in% colnames(res)]) } res } @@ -291,7 +292,7 @@ rsi_calc_df <- function(type, # "proportion", "count" or "both" if (data_has_groups) { # ordering by the groups and two more: "antibiotic" and "interpretation" - out <- ungroup(out[do.call("order", out[, seq_len(length(groups) + 2)]), ]) + out <- pm_ungroup(out[do.call("order", out[, seq_len(length(groups) + 2)]), ]) } else { out <- out[order(out$antibiotic, out$interpretation), ] } diff --git a/R/skewness.R b/R/skewness.R index 78b50cb92..2ba2b6d83 100755 --- a/R/skewness.R +++ b/R/skewness.R @@ -25,7 +25,7 @@ #' #' When negative: the left tail is longer; the mass of the distribution is concentrated on the right of the figure. When positive: the right tail is longer; the mass of the distribution is concentrated on the left of the figure. #' @inheritSection lifecycle Questioning lifecycle -#' @param x a vector of values, a [`matrix`] or a [`data.frame`] +#' @param x a vector of values, a [`matrix`] or a [data.frame] #' @param na.rm a logical value indicating whether `NA` values should be stripped before the computation proceeds. #' @seealso [kurtosis()] #' @rdname skewness diff --git a/R/zzz.R b/R/zzz.R index c51a4c58a..75f748cc3 100755 --- a/R/zzz.R +++ b/R/zzz.R @@ -20,6 +20,10 @@ # ==================================================================== # .onLoad <- function(libname, pkgname) { + assign(x = "AB_lookup", + value = create_AB_lookup(), + envir = asNamespace("AMR")) + assign(x = "MO_lookup", value = create_MO_lookup(), envir = asNamespace("AMR")) @@ -60,14 +64,27 @@ "\n[ prevent his notice with suppressPackageStartupMessages(library(AMR)) or use options(AMR_silentstart = TRUE) ]") } +create_AB_lookup <- function() { + AB_lookup <- AMR::antibiotics + AB_lookup$generalised_name <- generalise_antibiotic_name(AB_lookup$name) + AB_lookup$generalised_synonyms <- lapply(AB_lookup$synonyms, generalise_antibiotic_name) + AB_lookup$generalised_abbreviations <- lapply(AB_lookup$abbreviations, generalise_antibiotic_name) + AB_lookup$generalised_loinc <- lapply(AB_lookup$loinc, generalise_antibiotic_name) + AB_lookup +} + create_MO_lookup <- function() { MO_lookup <- AMR::microorganisms - MO_lookup$kingdom_index <- 99 + MO_lookup$kingdom_index <- NA_real_ MO_lookup[which(MO_lookup$kingdom == "Bacteria" | MO_lookup$mo == "UNKNOWN"), "kingdom_index"] <- 1 MO_lookup[which(MO_lookup$kingdom == "Fungi"), "kingdom_index"] <- 2 MO_lookup[which(MO_lookup$kingdom == "Protozoa"), "kingdom_index"] <- 3 MO_lookup[which(MO_lookup$kingdom == "Archaea"), "kingdom_index"] <- 4 + # all the rest + MO_lookup[which(is.na(MO_lookup$kingdom_index)), "kingdom_index"] <- 5 + + MO_lookup$prevalence_kingdom_index <- MO_lookup$prevalence * MO_lookup$kingdom_index # use this paste instead of `fullname` to work with Viridans Group Streptococci, etc. MO_lookup$fullname_lower <- tolower(trimws(paste(MO_lookup$genus, diff --git a/_pkgdown.yml b/_pkgdown.yml index 125002b9c..6f6165949 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -165,14 +165,15 @@ reference: them may also be suitable for your analysis. Especially the 'like' function can be useful: `if (x %like% y) {...}`. contents: - - "`get_locale`" - - "`like`" - "`age_groups`" - "`age`" - - "`join`" - "`availability`" - - "`pca`" + - "`get_locale`" - "`ggplot_pca`" + - "`join`" + - "`like`" + - "`mo_matching_score" + - "`pca`" - title: "Other: statistical tests" desc: > diff --git a/data-raw/ab.md5 b/data-raw/ab.md5 index 6b825550a..310f9a244 100644 --- a/data-raw/ab.md5 +++ b/data-raw/ab.md5 @@ -1 +1 @@ -37a7be09e34b5ec657a4bad94f45f355 +8f2f30e72f9d7c318439abe04073bece diff --git a/data-raw/antibiotics.dta b/data-raw/antibiotics.dta index c5e259bac..30df160bc 100644 Binary files a/data-raw/antibiotics.dta and b/data-raw/antibiotics.dta differ diff --git a/data-raw/antibiotics.rds b/data-raw/antibiotics.rds index 83b76dee7..f6fb626a9 100644 Binary files a/data-raw/antibiotics.rds and b/data-raw/antibiotics.rds differ diff --git a/data-raw/antibiotics.sas b/data-raw/antibiotics.sas index 0c24fa7f0..68531fa15 100644 Binary files a/data-raw/antibiotics.sas and b/data-raw/antibiotics.sas differ diff --git a/data-raw/antibiotics.sav b/data-raw/antibiotics.sav index 529bd5e1b..df3f2eae3 100644 Binary files a/data-raw/antibiotics.sav and b/data-raw/antibiotics.sav differ diff --git a/data-raw/antibiotics.txt b/data-raw/antibiotics.txt index 59d55a0d3..5062d566b 100644 --- a/data-raw/antibiotics.txt +++ b/data-raw/antibiotics.txt @@ -7,8 +7,8 @@ "AMK" "J01GB06" 37768 "Amikacin" "Aminoglycosides" "Aminoglycoside antibacterials" "Other aminoglycosides" "c(\"ak\", \"ami\", \"amik\", \"amk\", \"an\")" "c(\"amicacin\", \"amikacillin\", \"amikacin\", \"amikacin base\", \"amikacin dihydrate\", \"amikacin sulfate\", \"amikacina\", \"amikacine\", \"amikacinum\", \"amikavet\", \"amikin\", \"amiklin\", \"amikozit\", \"amukin\", \"arikace\", \"briclin\", \"lukadin\", \"mikavir\", \"pierami\", \"potentox\")" 1 "g" "c(\"13546-7\", \"15098-7\", \"17798-0\", \"31097-9\", \"31098-7\", \"31099-5\", \"3319-1\", \"3320-9\", \"3321-7\", \"35669-1\", \"50802-8\", \"50803-6\", \"56628-1\", \"59378-0\", \"80972-3\")" "AKF" "Amikacin/fosfomycin" "Aminoglycosides" "" "" "" "AMX" "J01CA04" 33613 "Amoxicillin" "Beta-lactams/penicillins" "Beta-lactam antibacterials, penicillins" "Penicillins with extended spectrum" "c(\"ac\", \"amox\", \"amx\")" "c(\"actimoxi\", \"amoclen\", \"amolin\", \"amopen\", \"amopenixin\", \"amoxibiotic\", \"amoxicaps\", \"amoxicilina\", \"amoxicillin\", \"amoxicilline\", \"amoxicillinum\", \"amoxiden\", \"amoxil\", \"amoxivet\", \"amoxy\", \"amoxycillin\", \"anemolin\", \"aspenil\", \"biomox\", \"bristamox\", \"cemoxin\", \"clamoxyl\", \"delacillin\", \"dispermox\", \"efpenix\", \"flemoxin\", \"hiconcil\", \"histocillin\", \"hydroxyampicillin\", \"ibiamox\", \"imacillin\", \"lamoxy\", \"metafarma capsules\", \"metifarma capsules\", \"moxacin\", \"moxatag\", \"ospamox\", \"pamoxicillin\", -\"piramox\", \"robamox\", \"sawamox pm\", \"tolodina\", \"unicillin\", \"utimox\", \"vetramox\")" 1 "g" 1 "g" "c(\"16365-9\", \"25274-2\", \"3344-9\", \"80133-2\")" -"AMC" "J01CR02" 23665637 "Amoxicillin/clavulanic acid" "Beta-lactams/penicillins" "Beta-lactam antibacterials, penicillins" "Combinations of penicillins, incl. beta-lactamase inhibitors" "c(\"a/c\", \"amcl\", \"aml\", \"aug\", \"xl\")" "c(\"amocla\", \"amoclan\", \"amoclav\", \"amoxsiklav\", \"augmentan\", \"augmentin\", \"augmentin xr\", \"augmentine\", \"auspilic\", \"clamentin\", \"clamobit\", \"clavamox\", \"clavinex\", \"clavoxilin plus\", \"clavulin\", \"clavumox\", \"coamoxiclav\", \"eumetinex\", \"kmoxilin\", \"spectramox\", \"spektramox\", \"viaclav\", \"xiclav\")" 1 "g" 3 "g" "character(0)" +\"piramox\", \"robamox\", \"sawamox pm\", \"tolodina\", \"unicillin\", \"utimox\", \"vetramox\")" 1.5 "g" 1 "g" "c(\"16365-9\", \"25274-2\", \"3344-9\", \"80133-2\")" +"AMC" "J01CR02" 23665637 "Amoxicillin/clavulanic acid" "Beta-lactams/penicillins" "Beta-lactam antibacterials, penicillins" "Combinations of penicillins, incl. beta-lactamase inhibitors" "c(\"a/c\", \"amcl\", \"aml\", \"aug\", \"xl\")" "c(\"amocla\", \"amoclan\", \"amoclav\", \"amoxsiklav\", \"augmentan\", \"augmentin\", \"augmentin xr\", \"augmentine\", \"auspilic\", \"clamentin\", \"clamobit\", \"clavamox\", \"clavinex\", \"clavoxilin plus\", \"clavulin\", \"clavumox\", \"coamoxiclav\", \"eumetinex\", \"kmoxilin\", \"spectramox\", \"spektramox\", \"viaclav\", \"xiclav\")" 1.5 "g" 3 "g" "character(0)" "AXS" 465441 "Amoxicillin/sulbactam" "Beta-lactams/penicillins" "" "" "" "AMB" "J02AA01" 5280965 "Amphotericin B" "Antifungals/antimycotics" "Antimycotics for systemic use" "Antibiotics" "c(\"amfb\", \"amph\")" "c(\"abelcet\", \"abelecet\", \"ambisome\", \"amfotericina b\", \"amphocin\", \"amphomoronal\", \"amphortericin b\", \"amphotec\", \"amphotericin\", \"amphotericin b\", \"amphotericine b\", \"amphotericinum b\", \"amphozone\", \"anfotericine b\", \"fungilin\", \"fungisome\", \"fungisone\", \"fungizone\", \"halizon\")" 35 "mg" "c(\"16370-9\", \"3353-0\", \"3354-8\", \"40707-2\", \"40757-7\", \"49859-2\")" "AMH" "Amphotericin B-high" "Aminoglycosides" "c(\"amfo b high\", \"amhl\", \"ampho b high\", \"amphotericin high\")" "" "" @@ -43,7 +43,7 @@ "BDP" "J01EA02" 68760 "Brodimoprim" "Trimethoprims" "Sulfonamides and trimethoprim" "Trimethoprim and derivatives" "" "c(\"brodimoprim\", \"brodimoprima\", \"brodimoprime\", \"brodimoprimum\", \"bromdimoprim\", \"hyprim\", \"unitrim\")" 0.2 "g" "character(0)" "BUT" 47472 "Butoconazole" "Antifungals/antimycotics" "" "c(\"butaconazole\", \"butoconazol\", \"butoconazole\", \"butoconazolum\", \"compositenstarke\", \"dahlin\", \"femstat\", \"gynofort\", \"polyfructosanum\")" "character(0)" "CDZ" "J01DD09" 44242317 "Cadazolid" "Cephalosporins (3rd gen.)" "Other beta-lactam antibacterials" "Third-generation cephalosporins" "" "cadazolid" 2 "g" "character(0)" -"CLA" "J04AA03" "Calcium aminosalicylate" "Antimycobacterials" "Drugs for treatment of tuberculosis" "Aminosalicylic acid and derivatives" "" "" "" +"CLA" "J04AA03" "Calcium aminosalicylate" "Antimycobacterials" "Drugs for treatment of tuberculosis" "Aminosalicylic acid and derivatives" "" "" 15 "" "CAP" "J04AB30" 135565060 "Capreomycin" "Antimycobacterials" "Drugs for treatment of tuberculosis" "Antibiotics" "c(\"\", \"capr\")" "" 1 "g" "" "CRB" "J01CA03" 20824 "Carbenicillin" "Beta-lactams/penicillins" "Beta-lactam antibacterials, penicillins" "Penicillins with extended spectrum" "c(\"bar\", \"carb\", \"cb\")" "c(\"anabactyl\", \"carbenicilina\", \"carbenicillin\", \"carbenicillina\", \"carbenicilline\", \"carbenicillinum\", \"geopen\", \"pyopen\")" 12 "g" "3434-8" "CRN" "J01CA05" 93184 "Carindacillin" "Beta-lactams/penicillins" "Beta-lactam antibacterials, penicillins" "Penicillins with extended spectrum" "" "c(\"carindacilina\", \"carindacillin\", \"carindacilline\", \"carindacillinum\")" 4 "g" "character(0)" @@ -100,7 +100,7 @@ "CDC" "Cefpodoxime/clavulanic acid" "Cephalosporins (3rd gen.)" "c(\"\", \"cecl\")" "" "" "CPR" "J01DC10" 5281006 "Cefprozil" "Cephalosporins (2nd gen.)" "Other beta-lactam antibacterials" "Second-generation cephalosporins" "c(\"cpr\", \"cpz\", \"fp\")" "c(\"arzimol\", \"brisoral\", \"cefprozil\", \"cefprozil anhydrous\", \"cefprozil hydrate\", \"cefprozilo\", \"cefprozilum\", \"cefzil\", \"cronocef\", \"procef\", \"serozil\")" 1 "g" "character(0)" "CEQ" 5464355 "Cefquinome" "Cephalosporins (4th gen.)" "" "c(\"cefquinoma\", \"cefquinome\", \"cefquinomum\", \"cobactan\")" "character(0)" -"CRD" "J01DB11" 5284529 "Cefroxadine" "Cephalosporins (1st gen.)" "Other beta-lactam antibacterials" "First-generation cephalosporins" "" "c(\"cefroxadine\", \"cefroxadino\", \"cefroxadinum\")" "character(0)" +"CRD" "J01DB11" 5284529 "Cefroxadine" "Cephalosporins (1st gen.)" "Other beta-lactam antibacterials" "First-generation cephalosporins" "" "c(\"cefroxadine\", \"cefroxadino\", \"cefroxadinum\")" 2.1 "character(0)" "CFS" "J01DD03" 656575 "Cefsulodin" "Cephalosporins (3rd gen.)" "Other beta-lactam antibacterials" "Third-generation cephalosporins" "c(\"cfsl\", \"cfsu\")" "c(\"cefsulodin\", \"cefsulodine\", \"cefsulodino\", \"cefsulodinum\")" 4 "g" "c(\"131-3\", \"25242-9\")" "CSU" 68718 "Cefsumide" "Cephalosporins (unclassified gen.)" "" "c(\"cefsumide\", \"cefsumido\", \"cefsumidum\")" "character(0)" "CPT" "J01DI02" 56841980 "Ceftaroline" "Cephalosporins (5th gen.)" "c(\"\", \"cfro\")" "c(\"teflaro\", \"zinforo\")" "character(0)" @@ -163,7 +163,7 @@ \"slphadione\", \"sulfadione\", \"sulfona\", \"sulfone ucb\", \"sulfonyldianiline\", \"sulphadione\", \"sulphonyldianiline\", \"sumicure s\", \"tarimyl\", \"udolac\", \"wln: zr dswr dz\")" 50 "mg" "9747-7" "DAP" "J01XX09" 16134395 "Daptomycin" "Other antibacterials" "Other antibacterials" "Other antibacterials" "c(\"dap\", \"dapt\")" "c(\"cidecin\", \"cubicin\", \"dapcin\", \"daptomicina\", \"daptomycine\", \"daptomycinum\")" 0.28 "g" "character(0)" "DFX" 487101 "Delafloxacin" "Quinolones" "" "c(\"baxdela\", \"delafloxacin\", \"delafloxacinum\")" "character(0)" -"DLM" "J04AK06" 6480466 "Delamanid" "Antimycobacterials" "Drugs for treatment of tuberculosis" "Other drugs for treatment of tuberculosis" "dela" "c(\"delamanid\", \"deltyba\")" "character(0)" +"DLM" "J04AK06" 6480466 "Delamanid" "Antimycobacterials" "Drugs for treatment of tuberculosis" "Other drugs for treatment of tuberculosis" "dela" "c(\"delamanid\", \"deltyba\")" 0.2 "character(0)" "DEM" "J01AA01" 54680690 "Demeclocycline" "Tetracyclines" "Tetracyclines" "Tetracyclines" "" "c(\"bioterciclin\", \"clortetrin\", \"deganol\", \"demeclociclina\", \"demeclocycline\", \"demeclocyclinum\", \"demeclor\", \"demetraclin\", \"diuciclin\", \"elkamicina\", \"ledermycin\", \"mexocine\", \"novotriclina\", \"perciclina\", \"sumaclina\")" 0.6 "g" "c(\"10982-7\", \"29494-2\")" "DKB" "J01GB09" 470999 "Dibekacin" "Aminoglycosides" "Aminoglycoside antibacterials" "Other aminoglycosides" "" "c(\"debecacin\", \"dibekacin\", \"dibekacin sulfate\", \"dibekacina\", \"dibekacine\", \"dibekacinum\", \"dideoxykanamycin b\", \"kappati\", \"orbicin\", \"panamicin\")" 0.14 "g" "character(0)" "DIC" "J01CF01" 18381 "Dicloxacillin" "Beta-lactams/penicillins" "Beta-lactam antibacterials, penicillins" "Beta-lactamase resistant penicillins" "c(\"\", \"dicl\")" "c(\"dichloroxacillin\", \"diclossacillina\", \"dicloxaciclin\", \"dicloxacilin\", \"dicloxacilina\", \"dicloxacillin\", \"dicloxacillin sodium\", \"dicloxacillina\", \"dicloxacilline\", \"dicloxacillinum\", \"dicloxacycline\", \"dycill\", \"dynapen\", \"maclicine\", \"nm|| dicloxacillin\", \"pathocil\")" 2 "g" 2 "g" "c(\"10984-3\", \"16769-2\", \"25252-8\")" @@ -188,7 +188,7 @@ "ETI1" "J04AD03" 2761171 "Ethionamide" "Antimycobacterials" "Drugs for treatment of tuberculosis" "Thiocarbamide derivatives" "ethi" "c(\"aethionamidum\", \"aetina\", \"aetiva\", \"amidazin\", \"amidazine\", \"ethatyl\", \"ethimide\", \"ethina\", \"ethinamide\", \"ethionamide\", \"ethionamidum\", \"ethioniamide\", \"ethylisothiamide\", \"ethyonomide\", \"etimid\", \"etiocidan\", \"etionamid\", \"etionamida\", \"etionamide\", \"etioniamid\", \"etionid\", \"etionizin\", \"etionizina\", \"etionizine\", \"fatoliamid\", \"iridocin\", \"iridocin bayer\", \"iridozin\", \"isothin\", \"isotiamida\", \"itiocide\", \"nicotion\", \"nisotin\", \"nizotin\", \"rigenicid\", \"sertinon\", \"teberus\", \"thianid\", \"thianide\", \"thioamide\", \"thiodine\", \"thiomid\", \"thioniden\", \"tianid\", \"tiomid\", \"trecator\", \"trecator sc\", \"trekator\", \"trescatyl\", \"trescazide\", \"tubenamide\", \"tubermin\", \"tuberoid\", \"tuberoson\")" 0.75 "g" "16845-0" "ETO" 6034 "Ethopabate" "Other antibacterials" "" "c(\"amprol plus\", \"ethopabat\", \"ethopabate\", \"ethyl pabate\")" "character(0)" -"FAR" "J01DI03" 65894 "Faropenem" "Other antibacterials" "" "c(\"faropenem\", \"faropenem sodium\", \"fropenem\", \"fropenum sodium\")" "character(0)" +"FAR" "J01DI03" 65894 "Faropenem" "Other antibacterials" "" "c(\"faropenem\", \"faropenem sodium\", \"fropenem\", \"fropenum sodium\")" 0.75 "character(0)" "FDX" 10034073 "Fidaxomicin" "Other antibacterials" "" "c(\"dificid\", \"dificlir\", \"difimicin\", \"fidaxomicin\", \"lipiarmycin\", \"tiacumicin b\")" "character(0)" "FIN" 11567473 "Finafloxacin" "Quinolones" "" "finafloxacin" "character(0)" "FLA" 46783781 "Flavomycin" "Other antibacterials" "" "moenomycin complex" "character(0)" @@ -210,7 +210,7 @@ "GAM" 59364992 "Gamithromycin" "Macrolides/lincosamides" "" "gamithromycin" "character(0)" "GRN" 124093 "Garenoxacin" "Quinolones" "" "c(\"ganefloxacin\", \"garenfloxacin\", \"garenoxacin\")" "character(0)" "GAT" "J01MA16" 5379 "Gatifloxacin" "Quinolones" "Quinolone antibacterials" "Fluoroquinolones" "c(\"\", \"gati\")" "c(\"gatiflo\", \"gatifloxacin\", \"gatifloxacine\", \"gatifloxcin\", \"gatilox\", \"gatiquin\", \"gatispan\", \"tequin\", \"tequin and zymar\", \"zymaxid\")" 0.4 "g" 0.4 "g" "character(0)" -"GEM" "J01MA15" 9571107 "Gemifloxacin" "Quinolones" "Quinolone antibacterials" "Fluoroquinolones" "" "c(\"factiv\", \"factive\", \"gemifioxacin\", \"gemifloxacin\", \"gemifloxacine\", \"gemifloxacino\", \"gemifloxacinum\")" "character(0)" +"GEM" "J01MA15" 9571107 "Gemifloxacin" "Quinolones" "Quinolone antibacterials" "Fluoroquinolones" "" "c(\"factiv\", \"factive\", \"gemifioxacin\", \"gemifloxacin\", \"gemifloxacine\", \"gemifloxacino\", \"gemifloxacinum\")" 0.32 "character(0)" "GEN" "J01GB03" 3467 "Gentamicin" "Aminoglycosides" "Aminoglycoside antibacterials" "Other aminoglycosides" "c(\"cn\", \"gen\", \"gent\", \"gm\")" "c(\"apogen\", \"centicin\", \"cidomycin\", \"garasol\", \"genoptic liquifilm\", \"genoptic s.o.p.\", \"gentacycol\", \"gentafair\", \"gentak\", \"gentamar\", \"gentamcin sulfate\", \"gentamicin\", \"gentamicina\", \"gentamicine\", \"gentamicins\", \"gentamicinum\", \"gentamycin\", \"gentamycins\", \"gentamycinum\", \"gentavet\", \"gentocin\", \"jenamicin\", \"lyramycin\", \"oksitselanim\", \"refobacin\", \"refobacin tm\", \"septigen\", \"uromycine\")" 0.24 "g" "c(\"13561-6\", \"13562-4\", \"15106-8\", \"22746-2\", \"22747-0\", \"31091-2\", \"31092-0\", \"31093-8\", \"35668-3\", \"3663-2\", \"3664-0\", \"3665-7\", \"39082-3\", \"47109-4\", \"59379-8\", \"80971-5\", \"88111-0\")" "GEH" "Gentamicin-high" "Aminoglycosides" "c(\"gehl\", \"genta high\", \"gentamicin high\")" "" "" "GEP" 25101874 "Gepotidacin" "Other antibacterials" "" "gepotidacin" "character(0)" @@ -250,7 +250,7 @@ "LIN" "J01FF02" 3000540 "Lincomycin" "Macrolides/lincosamides" "Macrolides, lincosamides and streptogramins" "Lincosamides" "linc" "c(\"cillimycin\", \"jiemycin\", \"lincolcina\", \"lincolnensin\", \"lincomicina\", \"lincomycin\", \"lincomycin a\", \"lincomycine\", \"lincomycinum\")" 1.8 "g" 1.8 "g" "87597-1" "LNZ" "J01XX08" 441401 "Linezolid" "Other antibacterials" "Other antibacterials" "Other antibacterials" "c(\"line\", \"lnz\", \"lz\", \"lzd\")" "c(\"linezlid\", \"linezoid\", \"linezolid\", \"linezolide\", \"linezolidum\", \"zivoxid\", \"zyvoxa\", \"zyvoxam\", \"zyvoxid\")" 1.2 "g" 1.2 "g" "c(\"34202-2\", \"80609-1\")" "LFE" "Linoprist-flopristin" "Other antibacterials" "" "" "" -"LOM" "J01MA07" 3948 "Lomefloxacin" "Quinolones" "Quinolone antibacterials" "Fluoroquinolones" "c(\"lmf\", \"lom\", \"lome\")" "c(\"lomefloxacin\", \"lomefloxacine\", \"lomefloxacino\", \"lomefloxacinum\", \"maxaquin\")" "character(0)" +"LOM" "J01MA07" 3948 "Lomefloxacin" "Quinolones" "Quinolone antibacterials" "Fluoroquinolones" "c(\"lmf\", \"lom\", \"lome\")" "c(\"lomefloxacin\", \"lomefloxacine\", \"lomefloxacino\", \"lomefloxacinum\", \"maxaquin\")" 0.4 "character(0)" "LOR" "J01DC08" 5284585 "Loracarbef" "Cephalosporins (2nd gen.)" "Other beta-lactam antibacterials" "Second-generation cephalosporins" "c(\"\", \"lora\")" "c(\"anhydrous loracarbef\", \"lorabid\", \"loracarbef\", \"loracarbefum\", \"lorbef\", \"loribid\")" 0.6 "g" "character(0)" "LYM" "J01AA04" 54707177 "Lymecycline" "Tetracyclines" "Tetracyclines" "Tetracyclines" "" "c(\"biovetin\", \"chlortetracyclin\", \"ciclisin\", \"ciclolysal\", \"infaciclina\", \"limeciclina\", \"lisinbiotic\", \"lymecyclin\", \"lymecycline\", \"lymecyclinum\", \"mucomycin\", \"ntetracycline\", \"tetralisal\", \"tetralysal\", \"vebicyclysal\")" 0.6 "g" 0.6 "g" "character(0)" "MNA" "J01XX06" 1292 "Mandelic acid" "Other antibacterials" "Other antibacterials" "Other antibacterials" "" "c(\"acido mandelico\", \"almond acid\", \"amygdalic acid\", \"benzoglycolic acid\", \"hydroxyacetic acid\", \"kyselina mandlova\", \"mandelic acid\", \"paramandelic acid\", \"phenylglycolic acid\", \"uromaline\")" 12 "g" "character(0)" @@ -277,7 +277,7 @@ "MIF" "J02AX05" 477468 "Micafungin" "Antifungals/antimycotics" "Antimycotics for systemic use" "Other antimycotics for systemic use" "c(\"\", \"mica\")" "c(\"micafungin\", \"mycamine\")" 0.1 "g" "58418-5" "MCZ" "J02AB01" 4189 "Miconazole" "Antifungals/antimycotics" "Antimycotics for systemic use" "Imidazole derivatives" "mico" "c(\"aflorix\", \"albistat\", \"andergin\", \"brentan\", \"conofite\", \"dactarin\", \"daktarin\", \"daktarin iv\", \"florid\", \"lotrimin af\", \"micantin\", \"miconasil nitrate\", \"miconazol\", \"miconazole\", \"miconazole base\", \"miconazolo\", \"miconazolum\", \"micozole\", \"minostate\", \"monista\", \"monistat\", \"monistat iv\", \"oravig\", \"vusion\", \"zimybase\", \"zimycan\")" 1 "g" "17278-3" "MCR" 3037206 "Micronomicin" "Aminoglycosides" "" "c(\"gentamicin c\", \"micromycin\", \"micronomicin\", \"micronomicina\", \"micronomicine\", \"micronomicinum\", \"sagamicin\", \"santemycin\")" "character(0)" -"MID" "J01FA03" 5282169 "Midecamycin" "Macrolides/lincosamides" "Macrolides, lincosamides and streptogramins" "Macrolides" "" "c(\"aboren\", \"espinomycin a\", \"macropen\", \"madecacine\", \"medemycin\", \"midecamicina\", \"midecamycin\", \"midecamycin a\", \"midecamycine\", \"midecamycinum\", \"midecin\", \"momicine\", \"mydecamycin\", \"myoxam\", \"normicina\", \"rubimycin\", \"turimycin p\")" 1 "g" "character(0)" +"MID" "J01FA03" 5282169 "Midecamycin" "Macrolides/lincosamides" "Macrolides, lincosamides and streptogramins" "Macrolides" "" "c(\"aboren\", \"espinomycin a\", \"macropen\", \"madecacine\", \"medemycin\", \"midecamicina\", \"midecamycin\", \"midecamycin a\", \"midecamycine\", \"midecamycinum\", \"midecin\", \"momicine\", \"mydecamycin\", \"myoxam\", \"normicina\", \"rubimycin\", \"turimycin p\")" 1.2 1 "g" "character(0)" "MIL" 37614 "Miloxacin" "Quinolones" "" "c(\"miloxacin\", \"miloxacine\", \"miloxacino\", \"miloxacinum\")" "character(0)" "MNO" "J01AA08" 54675783 "Minocycline" "Tetracyclines" "Tetracyclines" "Tetracyclines" "c(\"mc\", \"mh\", \"mi\", \"min\", \"mino\", \"mn\", \"mno\")" "c(\"akamin\", \"aknemin\", \"borymycin\", \"dynacin\", \"klinomycin\", \"minociclina\", \"minocin\", \"minocline\", \"minocyclin\", \"minocycline\", \"minocyclinum\", \"minocyn\", \"minoderm\", \"minomycin\", \"sebomin\", \"solodyn\", \"vectrin\")" 0.2 "g" 0.2 "g" "c(\"34606-4\", \"3822-4\", \"49757-8\")" "MCM" "J01FA11" 5282188 "Miocamycin" "Macrolides/lincosamides" "Macrolides, lincosamides and streptogramins" "Macrolides" "" "c(\"acecamycin\", \"macroral\", \"midecamycin acetate\", \"miocamen\", \"miocamycine\", \"miokamycin\", \"myocamicin\", \"ponsinomycin\")" 1.2 "g" "character(0)" @@ -343,7 +343,7 @@ "PPA" "J01MB04" 4831 "Pipemidic acid" "Quinolones" "Quinolone antibacterials" "Other quinolones" "c(\"pipz\", \"pizu\")" "c(\"acide pipemidique\", \"acido pipemidico\", \"acidum pipemidicum\", \"deblaston\", \"dolcol\", \"pipedac\", \"pipemid\", \"pipemidic\", \"pipemidic acid\", \"pipemidicacid\", \"pipram\", \"uromidin\")" 0.8 "g" "character(0)" "PIP" "J01CA12" 43672 "Piperacillin" "Beta-lactams/penicillins" "Beta-lactam antibacterials, penicillins" "Penicillins with extended spectrum" "c(\"pi\", \"pip\", \"pipc\", \"pipe\", \"pp\")" "c(\"isipen\", \"pentcillin\", \"peperacillin\", \"peracin\", \"piperacilina\", \"piperacillin\", \"piperacillin na\", \"piperacillin sodium\", \"piperacilline\", \"piperacillinum\", \"pipercillin\", \"pipracil\", \"pipril\")" 14 "g" "c(\"25268-4\", \"3972-7\")" "PIS" "Piperacillin/sulbactam" "Beta-lactams/penicillins" "" "" "" -"TZP" "J01CR05" 461573 "Piperacillin/tazobactam" "Beta-lactams/penicillins" "Beta-lactam antibacterials, penicillins" "Combinations of penicillins, incl. beta-lactamase inhibitors" "c(\"p/t\", \"piptaz\", \"pita\", \"pt\", \"ptc\", \"ptz\", \"tzp\")" "c(\"\", \"tazocel\", \"tazocillin\", \"tazocin\", \"zosyn\")" 14 "g" "character(0)" +"TZP" "J01CR05" 461573 "Piperacillin/tazobactam" "Beta-lactams/penicillins" "Beta-lactam antibacterials, penicillins" "Combinations of penicillins, incl. beta-lactamase inhibitors" "c(\"p/t\", \"piptaz\", \"piptazo\", \"pita\", \"pt\", \"ptc\", \"ptz\", \"tzp\")" "c(\"\", \"tazocel\", \"tazocillin\", \"tazocin\", \"zosyn\")" 14 "g" "character(0)" "PRC" 71978 "Piridicillin" "Beta-lactams/penicillins" "" "piridicillin" "character(0)" "PRL" 157385 "Pirlimycin" "Other antibacterials" "" "c(\"pirlimycin\", \"pirlimycina\", \"pirlimycine\", \"pirlimycinum\", \"pirsue\")" "character(0)" "PIR" "J01MB03" 4855 "Piromidic acid" "Quinolones" "Quinolone antibacterials" "Other quinolones" "" "c(\"acide piromidique\", \"acido piromidico\", \"acidum piromidicum\", \"actrun c\", \"bactramyl\", \"enterol\", \"gastrurol\", \"panacid\", \"pirodal\", \"piromidic acid\", \"pyrido\", \"reelon\", \"septural\", \"urisept\", \"uropir\", \"zaomeal\")" 2 "g" "character(0)" @@ -369,7 +369,7 @@ "RAM" 16132338 "Ramoplanin" "Glycopeptides" "" "ramoplanin" "character(0)" "RZM" 10993211 "Razupenem" "Carbapenems" "" "razupenem" "character(0)" "RTP" "A07AA11" 6918462 "Retapamulin" "Other antibacterials" "Intestinal antiinfectives" "Antibiotics" "" "c(\"altabax\", \"altargo\", \"retapamulin\")" 0.6 "g" "character(0)" -"RBC" "J02AC05" 44631912 "Ribociclib" "Antifungals/antimycotics" "Antimycotics for systemic use" "Triazole derivatives" "ribo" "c(\"kisqali\", \"ribociclib\")" "character(0)" +"RBC" "J02AC05" 44631912 "Ribociclib" "Antifungals/antimycotics" "Antimycotics for systemic use" "Triazole derivatives" "ribo" "c(\"kisqali\", \"ribociclib\")" 0.2 "character(0)" "RST" "J01GB10" 33042 "Ribostamycin" "Aminoglycosides" "Aminoglycoside antibacterials" "Other aminoglycosides" "" "c(\"dekamycin iv\", \"hetangmycin\", \"ribastamin\", \"ribostamicina\", \"ribostamycin\", \"ribostamycine\", \"ribostamycinum\", \"vistamycin\", \"xylostatin\")" 1 "g" "character(0)" "RID1" 16659285 "Ridinilazole" "Other antibacterials" "" "ridinilazole" "character(0)" "RIB" "J04AB04" 135398743 "Rifabutin" "Antimycobacterials" "Drugs for treatment of tuberculosis" "Antibiotics" "rifb" "c(\"alfacid\", \"ansamicin\", \"ansamycin\", \"ansatipin\", \"ansatipine\", \"mycobutin\", \"rifabutin\", \"rifabutina\", \"rifabutine\", \"rifabutinum\")" 0.15 "g" "24032-5" @@ -392,7 +392,7 @@ "SAR" 56208 "Sarafloxacin" "Quinolones" "" "c(\"difloxacine\", \"difloxacino\", \"difloxacinum\", \"saraflox\", \"sarafloxacin\", \"sarafloxacine\", \"sarafloxacino\", \"sarafloxacinum\")" "character(0)" "SRX" 9933415 "Sarmoxicillin" "Beta-lactams/penicillins" "" "sarmoxicillin" "character(0)" "SEC" 71815 "Secnidazole" "Other antibacterials" "" "c(\"flagentyl\", \"secnidal\", \"secnidazol\", \"secnidazole\", \"secnidazolum\", \"secnil\", \"sindose\", \"solosec\")" "character(0)" -"SMF" "J04AK05" "Simvastatin/fenofibrate" "Antimycobacterials" "Drugs for treatment of tuberculosis" "Other drugs for treatment of tuberculosis" "simv" "" "" +"SMF" "J04AK05" "Simvastatin/fenofibrate" "Antimycobacterials" "Drugs for treatment of tuberculosis" "Other drugs for treatment of tuberculosis" "simv" "" 86 "" "SIS" "J01GB08" 36119 "Sisomicin" "Aminoglycosides" "Aminoglycoside antibacterials" "Other aminoglycosides" "siso" "c(\"rickamicin\", \"salvamina\", \"siseptin sulfate\", \"sisomicin\", \"sisomicin sulfate\", \"sisomicina\", \"sisomicine\", \"sisomicinum\", \"sisomin\", \"sisomycin\", \"sissomicin\", \"sizomycin\")" 0.24 "g" "character(0)" "SIT" 461399 "Sitafloxacin" "Quinolones" "" "c(\"gracevit\", \"sitafloxacinisomer\")" "character(0)" "SDA" "J04AA02" 2724368 "Sodium aminosalicylate" "Antimycobacterials" "Drugs for treatment of tuberculosis" "Aminosalicylic acid and derivatives" "" "c(\"bactylan\", \"decapasil\", \"lepasen\", \"monopas\", \"nippas\", \"p.a.s. sodium\", \"pamisyl sodium\", \"parasal sodium\", \"pas sodium\", \"pasade\", \"pasnal\", \"passodico\", \"salvis\", \"sanipirol\", \"sodiopas\", \"sodium p.a.s\", \"sodium pas\", \"teebacin\", \"tubersan\")" 14 "g" 14 "g" "character(0)" @@ -458,7 +458,7 @@ "TLP" 163307 "Talmetoprim" "Other antibacterials" "" "talmetoprim" "character(0)" "TAZ" "J01CG02" 123630 "Tazobactam" "Beta-lactams/penicillins" "Beta-lactam antibacterials, penicillins" "Beta-lactamase inhibitors" "tazo" "c(\"tazobactam\", \"tazobactam acid\", \"tazobactamum\", \"tazobactum\")" "character(0)" "TBP" 9800194 "Tebipenem" "Carbapenems" "" "" "" -"TZD" "J01XX11" 11234049 "Tedizolid" "Other antibacterials" "Other antibacterials" "Other antibacterials" "tedi" "c(\"tedizolid\", \"torezolid\")" "character(0)" +"TZD" "J01XX11" 11234049 "Tedizolid" "Other antibacterials" "Other antibacterials" "Other antibacterials" "tedi" "c(\"tedizolid\", \"torezolid\")" 0.2 "character(0)" "TEC" "J01XA02" 16131923 "Teicoplanin" "Glycopeptides" "Other antibacterials" "Glycopeptide antibacterials" "c(\"tec\", \"tei\", \"teic\", \"tp\", \"tpl\", \"tpn\")" "c(\"targocid\", \"tecoplanina\", \"tecoplanine\", \"tecoplaninum\", \"teichomycin\", \"teicoplanina\", \"teicoplanine\", \"teicoplaninum\")" 0.4 "g" "c(\"25534-9\", \"25535-6\", \"34378-0\", \"34379-8\", \"4043-6\", \"80968-1\")" "TCM" "Teicoplanin-macromethod" "Glycopeptides" "" "" "" "TLV" "J01XA03" 3081362 "Telavancin" "Glycopeptides" "Other antibacterials" "Glycopeptide antibacterials" "tela" "c(\"telavancin\", \"vibativ\")" "character(0)" diff --git a/data-raw/antibiotics.xlsx b/data-raw/antibiotics.xlsx index 5ea53f199..11bb4ab4b 100644 Binary files a/data-raw/antibiotics.xlsx and b/data-raw/antibiotics.xlsx differ diff --git a/data-raw/poorman_prepend.R b/data-raw/poorman_prepend.R index 41181ea82..74d0fa407 100644 --- a/data-raw/poorman_prepend.R +++ b/data-raw/poorman_prepend.R @@ -24,9 +24,11 @@ # Source file: data-raw/reproduction_of_poorman.R # ------------------------------------------------ -# Poorman: a package to replace all dplyr functions with base R so we can lose dependency on dplyr. +# poorman: a package to replace all dplyr functions with base R so we can lose dependency on dplyr. # These functions were downloaded from https://github.com/nathaneastwood/poorman, -# from this commit: https://github.com/nathaneastwood/poorman/tree/{commit} +# from this commit: https://github.com/nathaneastwood/poorman/tree/{commit}. +# +# All functions are prefixed with 'pm_' to make it obvious that they are dplyr substitutes. # # All code below was released under MIT license, that permits 'free of charge, to any person obtaining a # copy of the software and associated documentation files (the "Software"), to deal in the Software @@ -34,7 +36,5 @@ # distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software # is furnished to do so', given that a copyright notice is given in the software. # -# Copyright notice as found on https://github.com/nathaneastwood/poorman/blob/master/LICENSE on 2 May 2020: -# YEAR: 2020 -# COPYRIGHT HOLDER: Nathan Eastwood - +# Copyright notice on {date}, the day this code was downloaded, as found on +# https://github.com/nathaneastwood/poorman/blob/{commit}/LICENSE: diff --git a/data-raw/reproduction_of_antibiotics.R b/data-raw/reproduction_of_antibiotics.R index 9dc72c364..dc07099d6 100644 --- a/data-raw/reproduction_of_antibiotics.R +++ b/data-raw/reproduction_of_antibiotics.R @@ -492,6 +492,8 @@ antibiotics[which(antibiotics$ab == "VOR"), "abbreviations"][[1]] <- list(c(anti antibiotics[which(antibiotics$ab == "FOS"), "synonyms"][[1]] <- list(sort(c(antibiotics[which(antibiotics$ab == "FOS"), "synonyms"][[1]], "Monuril"))) antibiotics[which(antibiotics$ab == "FOS"), "synonyms"][[1]] <- list(sort(c(antibiotics[which(antibiotics$ab == "FOS"), "synonyms"][[1]], "Monurol"))) +antibiotics[which(antibiotics$ab == "TZP"), "abbreviations"][[1]] <- list(sort(c(antibiotics[which(antibiotics$ab == "TZP"), "abbreviations"][[1]], "piptazo"))) + antibiotics <- antibiotics %>% mutate(ab = as.character(ab)) %>% rbind(antibiotics %>% @@ -612,5 +614,5 @@ for (i in 1:nrow(antibiotics)) { # REFER TO data-raw/loinc.R FOR ADDING LOINC CODES -usethis::use_data(antibiotics, overwrite = TRUE) +usethis::use_data(antibiotics, overwrite = TRUE, version = 2) rm(antibiotics) diff --git a/data-raw/reproduction_of_poorman.R b/data-raw/reproduction_of_poorman.R index f204d53dd..1ba5f2f01 100644 --- a/data-raw/reproduction_of_poorman.R +++ b/data-raw/reproduction_of_poorman.R @@ -1,38 +1,77 @@ # get complete filenames of all R files in the GitHub repository of nathaneastwood/poorman -commit <- "7d76d77f8f7bc663bf30fb5a161abb49801afa17" +commit <- "52eb6947e0b4430cd588976ed8820013eddf955f" -files <- xml2::read_html(paste0("https://github.com/nathaneastwood/poorman/tree/", commit, "/R")) %>% - rvest::html_nodes("table") %>% - rvest::html_table() -files <- files[[1]][,"Name"] +files <- xml2::read_html(paste0("https://github.com/nathaneastwood/poorman/tree/", commit, "/R")) %>% + rvest::html_nodes("a") %>% + rvest::html_attr("href") +# get full URLs of all raw R files +files <- paste0("https://raw.githubusercontent.com", gsub("blob/", "", files[files %like% "/R/.*.R$"])) # remove files with only pkg specific code -files <- files[!files %in% c("zzz.R", "init.R")] -files <- paste0("https://raw.githubusercontent.com/nathaneastwood/poorman/", commit, "/R/", - files[grepl("[.]R$", files)]) +files <- files[!files %like% "(zzz.R|init.R)"] # add our prepend file, containing info about the source of the data -files <- c("data-raw/poorman_prepend.R", files) +intro <- readLines("data-raw/poorman_prepend.R") +# copyright info: +copyright <- paste0("# ", readLines("https://raw.githubusercontent.com/nathaneastwood/poorman/master/LICENSE")) # read all contents to a character vector contents <- character(0) sapply(files, function(file) { + message("reading ", basename(file)) contents <<- c(contents, readLines(file)) invisible() }) +contents <- c(intro, + copyright, + "", + contents) # remove lines starting with "#'" and NULL and write to file contents <- contents[!grepl("^(#'|NULL|\"_PACKAGE)", contents)] # now make it independent on UseMethod, since we will not export these functions + contents <- gsub('UseMethod[(]"(.*?)"[)]', 'if ("grouped_data" %in% class(.data)) {||| \\1.grouped_data(.data, ...)||| } else {||| \\1.default(.data, ...)||| }', paste(contents, collapse = "|||"), - perl = TRUE) %>% + perl = TRUE) %>% # add commit to intro part - gsub("{commit}", commit, ., fixed = TRUE) %>% - strsplit(split = "|||", fixed = TRUE) %>% - unlist() + gsub("{commit}", commit, ., fixed = TRUE) %>% + # add date to intro part + gsub("{date}", format(Sys.Date(), "%e %B %Y"), ., fixed = TRUE) %>% + strsplit(split = "|||", fixed = TRUE) %>% + unlist() %>% + # add "pm_" as prefix to all functions + gsub("^([a-z_.]+) <- function", "pm_\\1 <- function", .) -writeLines(contents, "R/aa_helper_functions_dplyr.R") +# now get all those pm_* functions to replace all untransformed function name calls as well +new_pm_names <- sort(gsub("pm_(.*?) <-.*", "\\1", contents[grepl("^pm_", contents)])) +for (i in seq_len(length(new_pm_names))) { + contents <- gsub(paste0("([^a-z._])", new_pm_names[i], "([^a-z._])"), paste0("\\1pm_", new_pm_names[i], "\\2"), contents) + # starting with a space or a straight bracket or an opening parenthesis, ending with nothing or a non-character or a closing parenthesis + contents <- gsub(paste0("( |\\[|\\()", new_pm_names[i], "($|[^a-z]|\\))"), paste0("\\1pm_", new_pm_names[i], "\\2"), contents) +} + +# replace %>% with %pm>% +contents <- gsub("%>%", "%pm>%", contents, fixed = TRUE) +# fix for new lines, since n() also existed +contents <- gsub("\\pm_n", "\\n", contents, fixed = TRUE) +# prefix other functions also with "pm_" +contents <- gsub("^([a-z_]+)(\\$|)", "pm_\\1\\2", contents) +# prefix environments +contents <- gsub("eval_env", "pm_eval_env", contents, fixed = TRUE) +contents <- gsub("select_env", "pm_select_env", contents, fixed = TRUE) +contents <- gsub("context", "pm_context", contents, fixed = TRUE) +# now some items are overprefixed +contents <- gsub("(pm_)+", "pm_", contents) +# special case for pm_distinct(), we need '.keep_all' to work +contents <- gsub("pm_distinct <- function(.data, ..., .keep_all = FALSE)", "pm_distinct <- function(.data, ...)", contents, fixed = TRUE) +# removes unnecessary calls to package +contents <- gsub("poorman::", "AMR:::", contents, fixed = TRUE) + +# who needs US spelling? +contents <- contents[!grepl("summarize", contents)] + +writeLines(contents, "R/aa_helper_pm_functions.R") diff --git a/data/antibiotics.rda b/data/antibiotics.rda index eeab73eec..d27bc1129 100755 Binary files a/data/antibiotics.rda and b/data/antibiotics.rda differ diff --git a/docs/404.html b/docs/404.html index d652206d4..b623613b4 100644 --- a/docs/404.html +++ b/docs/404.html @@ -81,7 +81,7 @@ AMR (for R) - 1.3.0.9021 + 1.3.0.9022 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 418732a7a..5faa964a5 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -81,7 +81,7 @@ AMR (for R) - 1.3.0.9021 + 1.3.0.9022 diff --git a/docs/articles/index.html b/docs/articles/index.html index 64ac1ce4c..a9667d597 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.3.0.9021 + 1.3.0.9022 diff --git a/docs/authors.html b/docs/authors.html index 14d4d3e42..f8542b4e2 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -81,7 +81,7 @@ AMR (for R) - 1.3.0.9021 + 1.3.0.9022 diff --git a/docs/index.html b/docs/index.html index b42350ed5..a680feac3 100644 --- a/docs/index.html +++ b/docs/index.html @@ -43,7 +43,7 @@ AMR (for R) - 1.3.0.9021 + 1.3.0.9022 diff --git a/docs/news/index.html b/docs/news/index.html index 86ee004a3..0d2584004 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.3.0.9021 + 1.3.0.9022 @@ -236,13 +236,13 @@ Source: NEWS.md -
-

-AMR 1.3.0.9021 Unreleased +
+

+AMR 1.3.0.9022 Unreleased

-
+

-Last updated: 14 September 2020 +Last updated: 18 September 2020

Note: some changes in this version were suggested by anonymous reviewers from the journal we submitted our manuscipt to. We are those reviewers very grateful for going through our code so thoroughly!

@@ -262,6 +262,7 @@ #> [1] "Enterococcus casseliflavus" "Enterococcus gallinarum"
+
  • Support for veterinary ATC codes

  • @@ -286,6 +287,7 @@
  • Big speed improvement for interpreting MIC values and disk zone diameters. When interpreting 5,000 MIC values of two antibiotics (10,000 values in total), our benchmarks showed a total run time going from 80.7-85.1 seconds to 1.8-2.0 seconds.

  • Added parameter ‘add_intrinsic_resistance’ (defaults to FALSE), that considers intrinsic resistance according to EUCAST

  • +
  • Fixed a bug where in EUCAST rules the breakpoint for R would be interpreted as “>=” while this should have been “<”

  • @@ -299,7 +301,7 @@
  • Improvements for as.mo():

      -
    • Any user input value that could mean more than one taxonomic entry is now considered ‘uncertain’. Instead of a warning, a message will be thrown and the accompanying mo_uncertainties() has been changed completely; it now prints all possible candidates with their matching score.
    • +
    • A completely new matching score for ambiguous user input, using mo_matching_score(). Any user input value that could mean more than one taxonomic entry is now considered ‘uncertain’. Instead of a warning, a message will be thrown and the accompanying mo_uncertainties() has been changed completely; it now prints all possible candidates with their matching score.
    • Big speed improvement for already valid microorganism ID. This also means an significant speed improvement for using mo_* functions like mo_name() on microoganism IDs.
    • Added parameter ignore_pattern to as.mo() which can also be given to mo_* functions like mo_name(), to exclude known non-relevant input from analysing. This can also be set with the option AMR_ignore_pattern.
    @@ -312,6 +314,10 @@
  • Added a feature from AMR 1.1.0 and earlier again, but now without other package dependencies: tibble printing support for classes <rsi>, <mic>, <disk>, <ab> and <mo>. When using tibbles containing antimicrobial columns (class <rsi>), “S” will print in green, “I” will print in yellow and “R” will print in red. Microbial IDs (class <mo>) will emphasise on the genus and species, not on the kingdom.

  • Names of antiviral agents in data set antivirals now have a starting capital letter, like it is the case in the antibiotics data set

  • Updated the documentation of the WHONET data set to clarify that all patient names are fictitious

  • +
  • Small as.ab() algorithm improvements

  • +
  • Fix for combining MIC values with raw numbers, i.e. c(as.mic(2), 2) previously failed but now returns a valid MIC class

  • +
  • ggplot_rsi() and geom_rsi() gained parameters minimum and language, to influence the internal use of rsi_df()

  • +
  • Added abbreviation “piptazo” to piperacillin/tazobactam (TZP)

  • diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index cd255e126..9251612ed 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -2,7 +2,7 @@ pandoc: 2.7.3 pkgdown: 1.5.1.9000 pkgdown_sha: eae56f08694abebf93cdfc0dd8e9ede06d8c815f articles: [] -last_built: 2020-09-14T18:40Z +last_built: 2020-09-18T14:05Z urls: reference: https://msberends.github.io/AMR/reference article: https://msberends.github.io/AMR/articles diff --git a/docs/reference/WHONET.html b/docs/reference/WHONET.html index afe7137b4..97623f0c6 100644 --- a/docs/reference/WHONET.html +++ b/docs/reference/WHONET.html @@ -50,7 +50,7 @@ - + @@ -82,7 +82,7 @@ AMR (for R) - 1.3.0.9016 + 1.3.0.9022
    @@ -247,7 +247,7 @@

    Format

    -

    A data.frame with 500 observations and 53 variables:

      +

      A data.frame with 500 observations and 53 variables:

      • Identification number
        ID of the sample

      • Specimen number
        ID of the specimen

      • Organism
        Name of the microorganism. Before analysis, you should transform this to a valid microbial class, using as.mo().

      • diff --git a/docs/reference/ab_from_text.html b/docs/reference/ab_from_text.html index 4d221bedd..befc71c14 100644 --- a/docs/reference/ab_from_text.html +++ b/docs/reference/ab_from_text.html @@ -50,7 +50,7 @@ - + @@ -82,7 +82,7 @@ AMR (for R) - 1.3.0.9015 + 1.3.0.9022
    @@ -282,7 +282,7 @@

    Value

    -

    A list, or a character if collapse is not NULL

    +

    A list, or a character if collapse is not NULL

    Details

    This function is also internally used by as.ab(), although it then only searches for the first drug name and will throw a note if more drug names could have been returned.

    Parameter type

    @@ -345,7 +345,8 @@ The lifecycle of this function is maturing< type = "admin", collapse = "|")) -} +} +
    @@ -309,10 +309,10 @@
      -
    • An integer in case of ab_cid()

    • -
    • A named list in case of ab_info() and multiple ab_synonyms()/ab_tradenames()

    • -
    • A double in case of ab_ddd()

    • -
    • A character in all other cases

    • +
    • An integer in case of ab_cid()

    • +
    • A named list in case of ab_info() and multiple ab_synonyms()/ab_tradenames()

    • +
    • A double in case of ab_ddd()

    • +
    • A character in all other cases

    Details

    diff --git a/docs/reference/age_groups.html b/docs/reference/age_groups.html index 0e57cdddf..da23d90c2 100644 --- a/docs/reference/age_groups.html +++ b/docs/reference/age_groups.html @@ -50,7 +50,7 @@ - + @@ -82,7 +82,7 @@ AMR (for R) - 1.3.0.9015 + 1.3.0.9022

    @@ -323,8 +323,9 @@ The lifecycle of this function is stablefilter(mo == as.mo("E. coli")) %>% group_by(age_group = age_groups(age)) %>% select(age_group, CIP) %>% - ggplot_rsi(x = "age_group") -} + ggplot_rsi(x = "age_group", minimum = 0) +} + @@ -250,7 +250,7 @@

    Format

    -

    For the antibiotics data set: a data.frame with 456 observations and 14 variables:

    +

    For the antibiotics data set: a data.frame with 456 observations and 14 variables:

    • ab
      Antibiotic ID as used in this package (like AMC), using the official EARS-Net (European Antimicrobial Resistance Surveillance Network) codes where available

    • @@ -270,7 +270,7 @@
    -

    For the antivirals data set: a data.frame with 102 observations and 9 variables:

    +

    For the antivirals data set: a data.frame with 102 observations and 9 variables:

    • atc
      ATC code (Anatomical Therapeutic Chemical) as defined by the WHOCC

    • diff --git a/docs/reference/as.ab.html b/docs/reference/as.ab.html index fd79c0516..9c0450b52 100644 --- a/docs/reference/as.ab.html +++ b/docs/reference/as.ab.html @@ -50,7 +50,7 @@ - + @@ -82,7 +82,7 @@ AMR (for R) - 1.3.0.9016 + 1.3.0.9022 @@ -269,7 +269,7 @@

      Value

      -

      Character (vector) with class ab. Unknown values will return NA.

      +

      A character vector with additional class ab

      Details

      All entries in the antibiotics data set have three different identifiers: a human readable EARS-Net code (column ab, used by ECDC and WHONET), an ATC code (column atc, used by WHO), and a CID code (column cid, Compound ID, used by PubChem). The data set contains more than 5,000 official brand names from many different countries, as found in PubChem.

      @@ -317,7 +317,7 @@ This package contains all ~550 antibiotic, antimycotic and antiviral dru
        -
      • antibiotics for the dataframe that is being used to determine ATCs

      • +
      • antibiotics for the data.frame that is being used to determine ATCs

      • ab_from_text() for a function to retrieve antimicrobial drugs from clinical text (from health care records)

      diff --git a/docs/reference/as.disk.html b/docs/reference/as.disk.html index 3c03271dd..519606210 100644 --- a/docs/reference/as.disk.html +++ b/docs/reference/as.disk.html @@ -50,7 +50,7 @@ - + @@ -82,7 +82,7 @@ AMR (for R) - 1.3.0.9016 + 1.3.0.9022 @@ -261,7 +261,7 @@

      Value

      -

      An integer with additional new class disk

      +

      An integer with additional class disk

      Details

      Interpret disk values as RSI values with as.rsi(). It supports guidelines from EUCAST and CLSI.

      diff --git a/docs/reference/as.mic.html b/docs/reference/as.mic.html index 2a923713e..db86f533b 100644 --- a/docs/reference/as.mic.html +++ b/docs/reference/as.mic.html @@ -6,7 +6,7 @@ -Transform input to minimum inhibitory concentrations — as.mic • AMR (for R) +Transform input to minimum inhibitory concentrations (MIC) — as.mic • AMR (for R) @@ -48,9 +48,9 @@ - + - + @@ -82,7 +82,7 @@ AMR (for R) - 1.3.0.9016 + 1.3.0.9022 @@ -233,13 +233,13 @@
      -

      This transforms a vector to a new class mic, which is an ordered factor with valid minimum inhibitory concentrations (MIC) as levels. Invalid MIC values will be translated as NA with a warning.

      +

      This transforms a vector to a new class mic, which is an ordered factor with valid minimum inhibitory concentrations (MIC) as levels. Invalid MIC values will be translated as NA with a warning.

      as.mic(x, na.rm = FALSE)
      @@ -261,7 +261,7 @@
       
           

      Value

      -

      Ordered factor with new class mic

      +

      Ordered factor with additional class mic

      Details

      To interpret MIC values as RSI values, use as.rsi() on MIC values. It supports guidelines from EUCAST and CLSI.

      diff --git a/docs/reference/as.mo.html b/docs/reference/as.mo.html index 174c4d889..e8c2f31bf 100644 --- a/docs/reference/as.mo.html +++ b/docs/reference/as.mo.html @@ -50,7 +50,7 @@ - + @@ -82,7 +82,7 @@ AMR (for R) - 1.3.0.9018 + 1.3.0.9022
      @@ -266,7 +266,7 @@ x -

      a character vector or a data.frame with one or two columns

      +

      a character vector or a data.frame with one or two columns

      Becker @@ -284,7 +284,7 @@ reference_df -

      a data.frame to be used for extra reference when translating x to a valid mo. See set_mo_source() and get_mo_source() to automate the usage of your own codes (e.g. used in your analysis or organisation).

      +

      a data.frame to be used for extra reference when translating x to a valid mo. See set_mo_source() and get_mo_source() to automate the usage of your own codes (e.g. used in your analysis or organisation).

      ignore_pattern @@ -302,7 +302,7 @@

      Value

      -

      A character vector with additional class mo

      +

      A character vector with additional class mo

      Details

      @@ -352,9 +352,9 @@

    There are three helper functions that can be run after using the as.mo() function:

      -
    • Use mo_uncertainties() to get a data.frame that prints in a pretty format with all taxonomic names that were guessed. The output contains a score that is based on the human pathogenic prevalence and the Levenshtein distance between the user input and the full taxonomic name.

    • -
    • Use mo_failures() to get a character vector with all values that could not be coerced to a valid value.

    • -
    • Use mo_renamed() to get a data.frame with all values that could be coerced based on old, previously accepted taxonomic names.

    • +
    • Use mo_uncertainties() to get a data.frame that prints in a pretty format with all taxonomic names that were guessed. The output contains the matching score for all matches (see Background on matching score).

    • +
    • Use mo_failures() to get a character vector with all values that could not be coerced to a valid value.

    • +
    • Use mo_renamed() to get a data.frame with all values that could be coerced based on old, previously accepted taxonomic names.

    @@ -366,6 +366,21 @@

    Group 2 consists of all microorganisms where the taxonomic phylum is Proteobacteria, Firmicutes, Actinobacteria or Sarcomastigophora, or where the taxonomic genus is Aspergillus, Bacteroides, Candida, Capnocytophaga, Chryseobacterium, Cryptococcus, Elisabethkingia, Flavobacterium, Fusobacterium, Giardia, Leptotrichia, Mycoplasma, Prevotella, Rhodotorula, Treponema, Trichophyton or Ureaplasma. This group consequently contains all less common and rare human pathogens.

    Group 3 (least prevalent microorganisms) consists of all other microorganisms. This group contains microorganisms most probably not found in humans.

    +

    Background on matching scores

    + + +

    With ambiguous user input, the returned results are chosen based on their matching score using mo_matching_score(). This matching score is based on four parameters:

      +
    1. The prevalence \(P\) is categorised into group 1, 2 and 3 as stated above;

    2. +
    3. A kingdom index \(K\) is set as follows: Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, and all others = 5;

    4. +
    5. The level of uncertainty \(U\) needed to get to the result, as stated above (1 to 3);

    6. +
    7. The Levenshtein distance \(L\) is the distance between the user input and all taxonomic full names, with the text length of the user input being the maximum distance. A modified version of the Levenshtein distance \(L'\) based on the text length of the full name \(F\) is calculated as:

    8. +
    + +

    $$L' = F - \frac{0.5 \times L}{F}$$

    +

    The final matching score \(M\) is calculated as: +$$M = L' \times \frac{1}{P \times K} * \frac{1}{U}$$

    +

    All matches are sorted descending on their matching score and for all user input values, the top match will be returned.

    +

    Source

    @@ -403,7 +418,7 @@ This package contains the complete taxonomic tree of almost all microorganisms (

    On our website https://msberends.github.io/AMR you can find a comprehensive tutorial about how to conduct AMR analysis, the complete documentation of all functions (which reads a lot easier than here in R) and an example analysis using WHONET data. As we would like to better understand the backgrounds and needs of our users, please participate in our survey!

    See also

    -

    microorganisms for the data.frame that is being used to determine ID's.

    +

    microorganisms for the data.frame that is being used to determine ID's.

    The mo_property() functions (like mo_genus(), mo_gramstain()) to get properties based on the returned code.

    Examples

    diff --git a/docs/reference/atc_online.html b/docs/reference/atc_online.html index 780df8c32..3ee3485cf 100644 --- a/docs/reference/atc_online.html +++ b/docs/reference/atc_online.html @@ -49,8 +49,8 @@ - - + + @@ -82,7 +82,7 @@ AMR (for R) - 1.3.0.9016 + 1.3.0.9022
    @@ -239,14 +239,15 @@
    -

    Gets data from the WHO to determine properties of an ATC (e.g. an antibiotic) like name, defined daily dose (DDD) or standard unit.

    +

    Gets data from the WHO to determine properties of an ATC (e.g. an antibiotic), such as the name, defined daily dose (DDD) or standard unit.

    atc_online_property(
       atc_code,
       property,
       administration = "O",
    -  url = "https://www.whocc.no/atc_ddd_index/?code=%s&showdescription=no"
    +  url = "https://www.whocc.no/atc_ddd_index/?code=%s&showdescription=no",
    +  url_vet = "https://www.whocc.no/atcvet/atcvet_index/?code=%s&showdescription=no"
     )
     
     atc_online_groups(atc_code, ...)
    @@ -270,7 +271,11 @@
         
         
           url
    -      

    url of website of the WHO. The sign %s can be used as a placeholder for ATC codes.

    +

    url of website of the WHOCC. The sign %s can be used as a placeholder for ATC codes.

    + + + url_vet +

    url of website of the WHOCC for veterinary medicine. The sign %s can be used as a placeholder for ATC_vet codes (that all start with "Q").

    ... diff --git a/docs/reference/availability.html b/docs/reference/availability.html index 70168339a..c32ec4a8f 100644 --- a/docs/reference/availability.html +++ b/docs/reference/availability.html @@ -50,7 +50,7 @@ - + @@ -82,7 +82,7 @@ AMR (for R) - 1.3.0.9015 + 1.3.0.9022 @@ -249,7 +249,7 @@ tbl -

    a data.frame or list

    +

    a data.frame or list

    width @@ -259,10 +259,10 @@

    Value

    -

    data.frame with column names of tbl as row names

    +

    data.frame with column names of tbl as row names

    Details

    -

    The function returns a data.frame with columns "resistant" and "visual_resistance". The values in that columns are calculated with resistance().

    +

    The function returns a data.frame with columns "resistant" and "visual_resistance". The values in that columns are calculated with resistance().

    Stable lifecycle

    @@ -291,7 +291,8 @@ The lifecycle of this function is stablefilter(mo == as.mo("E. coli")) %>% select_if(is.rsi) %>% availability() -}
    +} + @@ -324,7 +324,7 @@

    M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition, 2014, Clinical and Laboratory Standards Institute (CLSI). https://clsi.org/standards/products/microbiology/documents/m39/.

    Value

    -

    The function bug_drug_combinations() returns a data.frame with columns "mo", "ab", "S", "I", "R" and "total".

    +

    The function bug_drug_combinations() returns a data.frame with columns "mo", "ab", "S", "I", "R" and "total".

    Details

    The function format() calculates the resistance per bug-drug combination. Use combine_IR = FALSE (default) to test R vs. S+I and combine_IR = TRUE to test R+I vs. S.

    diff --git a/docs/reference/catalogue_of_life_version.html b/docs/reference/catalogue_of_life_version.html index 1f80b3861..fe19997a1 100644 --- a/docs/reference/catalogue_of_life_version.html +++ b/docs/reference/catalogue_of_life_version.html @@ -50,7 +50,7 @@ - + @@ -82,7 +82,7 @@ AMR (for R) - 1.3.0.9015 + 1.3.0.9022 @@ -247,7 +247,7 @@

    Value

    -

    a list, which prints in pretty format

    +

    a list, which prints in pretty format

    Details

    For DSMZ, see microorganisms.

    diff --git a/docs/reference/count.html b/docs/reference/count.html index a10357cd6..b8852153c 100644 --- a/docs/reference/count.html +++ b/docs/reference/count.html @@ -51,7 +51,7 @@ - + @@ -83,7 +83,7 @@ count_resistant() should be used to count resistant isolates, count_susceptible( AMR (for R) - 1.3.0.9015 + 1.3.0.9022 @@ -283,7 +283,7 @@ count_resistant() should be used to count resistant isolates, count_susceptible( data -

    a data.frame containing columns with class rsi (see as.rsi())

    +

    a data.frame containing columns with class rsi (see as.rsi())

    translate_ab @@ -305,7 +305,7 @@ count_resistant() should be used to count resistant isolates, count_susceptible(

    Value

    -

    An integer

    +

    An integer

    Details

    These functions are meant to count isolates. Use the resistance()/susceptibility() functions to calculate microbial resistance/susceptibility.

    @@ -356,10 +356,12 @@ A microorganism is categorised as Susceptible, Increased exposure when

    Please note that, in combination therapies, for only_all_tested = TRUE applies that:

        count_S()    +   count_I()    +   count_R()    = count_all()
    -  proportion_S() + proportion_I() + proportion_R() = 1
    + proportion_S() + proportion_I() + proportion_R() = 1 +

    and that, in combination therapies, for only_all_tested = FALSE applies that:

        count_S()    +   count_I()    +   count_R()    >= count_all()
    -  proportion_S() + proportion_I() + proportion_R() >= 1
    + proportion_S() + proportion_I() + proportion_R() >= 1 +

    Using only_all_tested has no impact when only using one antibiotic as input.

    Read more on our website!

    @@ -430,7 +432,8 @@ A microorganism is categorised as Susceptible, Increased exposure when select(hospital_id, AMX, CIP) %>% group_by(hospital_id) %>% count_df(translate = FALSE) -} +} + @@ -297,7 +297,7 @@ Leclercq et al. EUCAST expert rules in antimicrobial susceptibility test

    Value

    -

    The input of x, possibly with edited values of antibiotics. Or, if verbose = TRUE, a data.frame with all original and new values of the affected bug-drug combinations.

    +

    The input of x, possibly with edited values of antibiotics. Or, if verbose = TRUE, a data.frame with all original and new values of the affected bug-drug combinations.

    Details

    Note: This function does not translate MIC values to RSI values. Use as.rsi() for that.
    @@ -451,7 +451,8 @@ The lifecycle of this function is maturing< # do not apply EUCAST rules, but rather get a data.frame # with 18 rows, containing all details about the transformations: c <- eucast_rules(a, verbose = TRUE) -# } +# } +

    @@ -247,7 +247,7 @@

    Format

    -

    A data.frame with 2,000 observations and 49 variables:

      +

      A data.frame with 2,000 observations and 49 variables:

      • date
        date of receipt at the laboratory

      • hospital_id
        ID of the hospital, from A to D

      • ward_icu
        logical to determine if ward is an intensive care unit

      • diff --git a/docs/reference/example_isolates_unclean.html b/docs/reference/example_isolates_unclean.html index e26bfb64e..4a295b347 100644 --- a/docs/reference/example_isolates_unclean.html +++ b/docs/reference/example_isolates_unclean.html @@ -50,7 +50,7 @@ - + @@ -82,7 +82,7 @@ AMR (for R) - 1.3.0.9015 + 1.3.0.9022 @@ -247,7 +247,7 @@

        Format

        -

        A data.frame with 3,000 observations and 8 variables:

          +

          A data.frame with 3,000 observations and 8 variables:

          • patient_id
            ID of the patient

          • date
            date of receipt at the laboratory

          • hospital
            ID of the hospital, from A to C

          • diff --git a/docs/reference/first_isolate.html b/docs/reference/first_isolate.html index a6e0557c1..8528d5f11 100644 --- a/docs/reference/first_isolate.html +++ b/docs/reference/first_isolate.html @@ -50,7 +50,7 @@ - + @@ -82,7 +82,7 @@ AMR (for R) - 1.3.0.9015 + 1.3.0.9022 @@ -285,7 +285,7 @@ x -

            a data.frame containing isolates.

            +

            a data.frame containing isolates.

            col_date @@ -369,14 +369,16 @@

            WHY THIS IS SO IMPORTANT
            To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode (ref). If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all S. aureus isolates would be overestimated, because you included this MRSA more than once. It would be selection bias.

            All isolates with a microbial ID of NA will be excluded as first isolate.

            -

            The functions filter_first_isolate() and filter_first_weighted_isolate() are helper functions to quickly filter on first isolates. The function filter_first_isolate() is essentially equal to one of:

             x %>% filter(first_isolate(., ...))
            +

            The functions filter_first_isolate() and filter_first_weighted_isolate() are helper functions to quickly filter on first isolates. The function filter_first_isolate() is essentially equal to one of:

             x %>% filter(first_isolate(., ...))
            +

            The function filter_first_weighted_isolate() is essentially equal to:

             x %>%
                mutate(keyab = key_antibiotics(.)) %>%
                mutate(only_weighted_firsts = first_isolate(x,
                                                            col_keyantibiotics = "keyab", ...)) %>%
                filter(only_weighted_firsts == TRUE) %>%
            -   select(-only_weighted_firsts, -keyab)
            + select(-only_weighted_firsts, -keyab) +

            Key antibiotics

            @@ -452,7 +454,8 @@ The lifecycle of this function is stablex$first_isolate_weighed <- first_isolate(x, col_keyantibiotics = 'keyab') x$first_blood_isolate <- first_isolate(x, specimen_group = "Blood") -} +} + @@ -253,6 +253,7 @@ translate_ab = "name", combine_SI = TRUE, combine_IR = FALSE, + minimum = 30, language = get_locale(), nrow = NULL, colours = c(S = "#61a8ff", SI = "#61a8ff", I = "#61f7ff", IR = "#ff6961", R = @@ -273,6 +274,7 @@ x = c("antibiotic", "interpretation"), fill = "interpretation", translate_ab = "name", + minimum = 30, language = get_locale(), combine_SI = TRUE, combine_IR = FALSE, @@ -294,6 +296,8 @@ position = NULL, x = "antibiotic", translate_ab = "name", + minimum = 30, + language = get_locale(), combine_SI = TRUE, combine_IR = FALSE, datalabels.size = 3, @@ -305,7 +309,7 @@ data -

            a data.frame with column(s) of class rsi (see as.rsi())

            +

            a data.frame with column(s) of class rsi (see as.rsi())

            position @@ -343,6 +347,10 @@ combine_IR

            a logical to indicate whether all values of I and R must be merged into one, so the output only consists of S vs. I+R (susceptible vs. non-susceptible). This is outdated, see parameter combine_SI.

            + + minimum +

            the minimum allowed number of available (tested) isolates. Any isolate count lower than minimum will return NA with a warning. The default number of 30 isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.

            + language

            language of the returned text, defaults to system language (see get_locale()) and can also be set with getOption("AMR_locale"). Use language = NULL or language = "" to prevent translation.

            @@ -492,7 +500,8 @@ The lifecycle of this function is maturing< title = "AMR of Anti-UTI Drugs Per Hospital", x.title = "Hospital", datalabels = FALSE) -} +} + @@ -249,7 +249,7 @@ x -

            a data.frame

            +

            a data.frame

            search_string @@ -306,7 +306,8 @@ The lifecycle of this function is maturing< df <- data.frame(AMP_ED2 = "S", AMP_ED20 = "S") guess_ab_col(df, "ampicillin") -# [1] "AMP_ED20" +# [1] "AMP_ED20" + @@ -431,7 +431,7 @@

            as.mic() is.mic()

            -

            Transform input to minimum inhibitory concentrations

            +

            Transform input to minimum inhibitory concentrations (MIC)

            @@ -544,18 +544,6 @@ - -

            get_locale()

            - -

            Translate strings from AMR package

            - - - -

            like() `%like%` `%like_case%`

            - -

            Pattern Matching

            - -

            age_groups()

            @@ -568,12 +556,6 @@

            Age in years of individuals

            - -

            inner_join_microorganisms() left_join_microorganisms() right_join_microorganisms() full_join_microorganisms() semi_join_microorganisms() anti_join_microorganisms()

            - -

            Join microorganisms to a data set

            - -

            availability()

            @@ -581,15 +563,33 @@ -

            pca()

            +

            get_locale()

            -

            Principal Component Analysis (for AMR)

            +

            Translate strings from AMR package

            ggplot_pca()

            PCA biplot with ggplot2

            + + + +

            inner_join_microorganisms() left_join_microorganisms() right_join_microorganisms() full_join_microorganisms() semi_join_microorganisms() anti_join_microorganisms()

            + +

            Join microorganisms to a data set

            + + + +

            like() `%like%` `%like_case%`

            + +

            Pattern Matching

            + + + +

            pca()

            + +

            Principal Component Analysis (for AMR)

            diff --git a/docs/reference/intrinsic_resistant.html b/docs/reference/intrinsic_resistant.html index 1af42371e..52abb7c89 100644 --- a/docs/reference/intrinsic_resistant.html +++ b/docs/reference/intrinsic_resistant.html @@ -50,7 +50,7 @@ - + @@ -82,7 +82,7 @@ AMR (for R) - 1.3.0.9015 + 1.3.0.9022 @@ -247,7 +247,7 @@

            Format

            -

            A data.frame with 49,462 observations and 2 variables:

              +

              A data.frame with 49,462 observations and 2 variables:

              • microorganism
                Name of the microorganism

              • antibiotic
                Name of the antibiotic drug

              @@ -273,7 +273,8 @@ filter(antibiotic == "Vancomycin", microorganism %like% "Enterococcus") %>% pull(microorganism) # [1] "Enterococcus casseliflavus" "Enterococcus gallinarum" -} +} + @@ -277,7 +277,7 @@

              Details

              -

              Note: As opposed to the join() functions of dplyr, character vectors are supported and at default existing columns will get a suffix "2" and the newly joined columns will not get a suffix.

              +

              Note: As opposed to the join() functions of dplyr, character vectors are supported and at default existing columns will get a suffix "2" and the newly joined columns will not get a suffix.

              These functions rely on merge(), a base R function to do joins.

              Stable lifecycle

              @@ -309,7 +309,8 @@ The lifecycle of this function is stablecolnames(df) df_joined <- left_join_microorganisms(df, "bacteria") colnames(df_joined) -} +} + @@ -258,7 +258,7 @@ x -

              a vector of values, a matrix or a data.frame

              +

              a vector of values, a matrix or a data.frame

              na.rm diff --git a/docs/reference/like.html b/docs/reference/like.html index e7122de85..e5b474c47 100644 --- a/docs/reference/like.html +++ b/docs/reference/like.html @@ -50,7 +50,7 @@ - + @@ -82,7 +82,7 @@ AMR (for R) - 1.3.0.9016 + 1.3.0.9022 @@ -257,7 +257,7 @@ pattern -

              a character string containing a regular expression (or character string for fixed = TRUE) to be matched in the given character vector. Coerced by as.character() to a character string if possible. If a character vector of length 2 or more is supplied, the first element is used with a warning.

              +

              a character string containing a regular expression (or character string for fixed = TRUE) to be matched in the given character vector. Coerced by as.character() to a character string if possible. If a character vector of length 2 or more is supplied, the first element is used with a warning.

              ignore.case diff --git a/docs/reference/mdro.html b/docs/reference/mdro.html index 6fff058ef..65aa0845a 100644 --- a/docs/reference/mdro.html +++ b/docs/reference/mdro.html @@ -50,7 +50,7 @@ - + @@ -82,7 +82,7 @@ AMR (for R) - 1.3.0.9015 + 1.3.0.9022 @@ -308,13 +308,13 @@
              • CMI 2012 paper - function mdr_cmi2012() or mdro():
                -Ordered factor with levels Negative < Multi-drug-resistant (MDR) < Extensively drug-resistant (XDR) < Pandrug-resistant (PDR)

              • +Ordered factor with levels Negative < Multi-drug-resistant (MDR) < Extensively drug-resistant (XDR) < Pandrug-resistant (PDR)

              • TB guideline - function mdr_tb() or mdro(..., guideline = "TB"):
                -Ordered factor with levels Negative < Mono-resistant < Poly-resistant < Multi-drug-resistant < Extensively drug-resistant

              • +Ordered factor with levels Negative < Mono-resistant < Poly-resistant < Multi-drug-resistant < Extensively drug-resistant

              • German guideline - function mrgn() or mdro(..., guideline = "MRGN"):
                -Ordered factor with levels Negative < 3MRGN < 4MRGN

              • +Ordered factor with levels Negative < 3MRGN < 4MRGN

              • Everything else:
                -Ordered factor with levels Negative < Positive, unconfirmed < Positive. The value "Positive, unconfirmed" means that, according to the guideline, it is not entirely sure if the isolate is multi-drug resistant and this should be confirmed with additional (e.g. molecular) tests

              • +Ordered factor with levels Negative < Positive, unconfirmed < Positive. The value "Positive, unconfirmed" means that, according to the guideline, it is not entirely sure if the isolate is multi-drug resistant and this should be confirmed with additional (e.g. molecular) tests

              Details

              @@ -459,7 +459,8 @@ A microorganism is categorised as Susceptible, Increased exposure when mutate(EUCAST = eucast_exceptional_phenotypes(.), BRMO = brmo(.), MRGN = mrgn(.)) -} +} + @@ -247,7 +247,7 @@

              Format

              -

              A data.frame with 5,583 observations and 2 variables:

                +

                A data.frame with 5,583 observations and 2 variables:

                • code
                  Commonly used code of a microorganism

                • mo
                  ID of the microorganism in the microorganisms data set

                diff --git a/docs/reference/microorganisms.html b/docs/reference/microorganisms.html index 1f2b3b880..4039b3015 100644 --- a/docs/reference/microorganisms.html +++ b/docs/reference/microorganisms.html @@ -50,7 +50,7 @@ - + @@ -82,7 +82,7 @@ AMR (for R) - 1.3.0.9015 + 1.3.0.9022 @@ -247,7 +247,7 @@

                Format

                -

                A data.frame with 67,151 observations and 16 variables:

                  +

                  A data.frame with 67,151 observations and 16 variables:

                  • mo
                    ID of microorganism as used by this package

                  • fullname
                    Full name, like "Escherichia coli"

                  • kingdom, phylum, class, order, family, genus, species, subspecies
                    Taxonomic rank of the microorganism

                  • diff --git a/docs/reference/microorganisms.old.html b/docs/reference/microorganisms.old.html index 7f2aaa679..2169e0e00 100644 --- a/docs/reference/microorganisms.old.html +++ b/docs/reference/microorganisms.old.html @@ -50,7 +50,7 @@ - + @@ -82,7 +82,7 @@ AMR (for R) - 1.3.0.9015 + 1.3.0.9022 @@ -247,7 +247,7 @@

                    Format

                    -

                    A data.frame with 12,708 observations and 4 variables:

                      +

                      A data.frame with 12,708 observations and 4 variables:

                      • fullname
                        Old full taxonomic name of the microorganism

                      • fullname_new
                        New full taxonomic name of the microorganism

                      • ref
                        Author(s) and year of concerning scientific publication

                      • diff --git a/docs/reference/mo_matching_score.html b/docs/reference/mo_matching_score.html new file mode 100644 index 000000000..302a24462 --- /dev/null +++ b/docs/reference/mo_matching_score.html @@ -0,0 +1,308 @@ + + + + + + + + +Calculate the matching score for microorganisms — mo_matching_score • AMR (for R) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                        +
                        + + + + +
                        + +
                        +
                        + + +
                        +

                        This helper function is used by as.mo() to determine the most probable match of taxonomic records, based on user input.

                        +
                        + +
                        mo_matching_score(x, fullname, uncertainty = 1)
                        + +

                        Arguments

                        + + + + + + + + + + + + + + +
                        x

                        Any user input value(s)

                        fullname

                        A full taxonomic name, that exists in microorganisms$fullname

                        uncertainty

                        The level of uncertainty set in as.mo(), see allow_uncertain in that function (here, it defaults to 1, but is automatically determined in as.mo() based on the number of transformations needed to get to a result)

                        + +

                        Details

                        + +

                        The matching score is based on four parameters:

                          +
                        1. A human pathogenic prevalence \(P\), that is categorised into group 1, 2 and 3 (see as.mo());

                        2. +
                        3. A kingdom index \(K\) is set as follows: Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, and all others = 5;

                        4. +
                        5. The level of uncertainty \(U\) that is needed to get to a result (1 to 3, see as.mo());

                        6. +
                        7. The Levenshtein distance \(L\) is the distance between the user input and all taxonomic full names, with the text length of the user input being the maximum distance. A modified version of the Levenshtein distance \(L'\) based on the text length of the full name \(F\) is calculated as:

                        8. +
                        + +

                        $$L' = F - \frac{0.5 \times L}{F}$$

                        +

                        The final matching score \(M\) is calculated as: +$$M = L' \times \frac{1}{P \times K} * \frac{1}{U}$$

                        + +

                        Examples

                        +
                        as.mo("E. coli")
                        +mo_uncertainties()
                        +
                        +
                        + +
                        + + + +
                        + + + + + + + + diff --git a/docs/reference/mo_property.html b/docs/reference/mo_property.html index 8ac22bbbd..086d480ea 100644 --- a/docs/reference/mo_property.html +++ b/docs/reference/mo_property.html @@ -50,7 +50,7 @@ - + @@ -82,7 +82,7 @@ AMR (for R) - 1.3.0.9018 + 1.3.0.9022 @@ -319,11 +319,11 @@
                          -
                        • An integer in case of mo_year()

                        • -
                        • A list in case of mo_taxonomy() and mo_info()

                        • -
                        • A named character in case of mo_url()

                        • -
                        • A double in case of mo_snomed()

                        • -
                        • A character in all other cases

                        • +
                        • An integer in case of mo_year()

                        • +
                        • A list in case of mo_taxonomy() and mo_info()

                        • +
                        • A named character in case of mo_url()

                        • +
                        • A double in case of mo_snomed()

                        • +
                        • A character in all other cases

                        Details

                        diff --git a/docs/reference/mo_source.html b/docs/reference/mo_source.html index 1efc0d96a..2d801d620 100644 --- a/docs/reference/mo_source.html +++ b/docs/reference/mo_source.html @@ -51,7 +51,7 @@ - + @@ -83,7 +83,7 @@ This is the fastest way to have your organisation (or analysis) specific codes p AMR (for R) - 1.3.0.9015 + 1.3.0.9022 @@ -259,9 +259,10 @@ This is the fastest way to have your organisation (or analysis) specific codes p

                        Details

                        -

                        The reference file can be a text file seperated with commas (CSV) or tabs or pipes, an Excel file (either 'xls' or 'xlsx' format) or an R object file (extension '.rds'). To use an Excel file, you need to have the readxl package installed.

                        -

                        set_mo_source() will check the file for validity: it must be a data.frame, must have a column named "mo" which contains values from microorganisms$mo and must have a reference column with your own defined values. If all tests pass, set_mo_source() will read the file into R and export it to "~/.mo_source.rds". This compressed data file will then be used at default for MO determination (function as.mo() and consequently all mo_* functions like mo_genus() and mo_gramstain()). The location of the original file will be saved as option with options(mo_source = path). Its timestamp will be saved with options(mo_source_datetime = ...).

                        -

                        get_mo_source() will return the data set by reading "~/.mo_source.rds" with readRDS(). If the original file has changed (the file defined with path), it will call set_mo_source() to update the data file automatically.

                        +

                        The reference file can be a text file separated with commas (CSV) or tabs or pipes, an Excel file (either 'xls' or 'xlsx' format) or an R object file (extension '.rds'). To use an Excel file, you will need to have the readxl package installed.

                        +

                        set_mo_source() will check the file for validity: it must be a data.frame, must have a column named "mo" which contains values from microorganisms$mo and must have a reference column with your own defined values. If all tests pass, set_mo_source() will read the file into R and export it to "~/.mo_source.rds" after the user specifically confirms and allows that this file will be created. For this reason, this function only works in interactive sessions.

                        +

                        The created compressed data file "~/.mo_source.rds" will be used at default for MO determination (function as.mo() and consequently all mo_* functions like mo_genus() and mo_gramstain()). The location of the original file will be saved as an R option with options(mo_source = path). Its timestamp will be saved with options(mo_source_datetime = ...).

                        +

                        The function get_mo_source() will return the data set by reading "~/.mo_source.rds" with readRDS(). If the original file has changed (by checking the aforementioned options mo_source and mo_source_datetime), it will call set_mo_source() to update the data file automatically.

                        Reading an Excel file (.xlsx) with only one row has a size of 8-9 kB. The compressed file created with set_mo_source() will then have a size of 0.1 kB and can be read by get_mo_source() in only a couple of microseconds (millionths of a second).

                        How to setup

                        @@ -278,7 +279,8 @@ This is the fastest way to have your organisation (or analysis) specific codes p

                        We save it as "home/me/ourcodes.xlsx". Now we have to set it as a source:

                        set_mo_source("home/me/ourcodes.xlsx")
                         #> NOTE: Created mo_source file '~/.mo_source.rds' from 'home/me/ourcodes.xlsx'
                        -#>       (columns "Organisation XYZ" and "mo")
                        +#> (columns "Organisation XYZ" and "mo") +

                        It has now created a file "~/.mo_source.rds" with the contents of our Excel file. Only the first column with foreign values and the 'mo' column will be kept when creating the RDS file.

                        And now we can use it in our functions:

                        as.mo("lab_mo_ecoli")
                        @@ -289,7 +291,8 @@ This is the fastest way to have your organisation (or analysis) specific codes p
                         
                         # other input values still work too
                         as.mo(c("Escherichia coli", "E. coli", "lab_mo_ecoli"))
                        -#> [1] B_ESCHR_COLI B_ESCHR_COLI B_ESCHR_COLI
                        +#> [1] B_ESCHR_COLI B_ESCHR_COLI B_ESCHR_COLI +

                        If we edit the Excel file by, let's say, adding row 4 like this:

                          |         A          |       B      |
                         --|--------------------|--------------|
                        @@ -306,10 +309,12 @@ This is the fastest way to have your organisation (or analysis) specific codes p
                         #> [1] B_ESCHR_COLI
                         
                         mo_genus("lab_Staph_aureus")
                        -#> [1] "Staphylococcus"
                        +#> [1] "Staphylococcus" +

                        To delete the reference data file, just use "", NULL or FALSE as input for set_mo_source():

                        set_mo_source(NULL)
                        -# Removed mo_source file '~/.mo_source.rds'.
                        +# Removed mo_source file '~/.mo_source.rds'. +

                        If the original Excel file is moved or deleted, the mo_source file will be removed upon the next use of as.mo(). If the mo_source file is manually deleted (i.e. without using set_mo_source()), the references to the mo_source file will be removed upon the next use of as.mo().

                        Stable lifecycle

                        diff --git a/docs/reference/plot.html b/docs/reference/plot.html index 1b626cd7a..de16fc1dd 100644 --- a/docs/reference/plot.html +++ b/docs/reference/plot.html @@ -50,7 +50,7 @@ - + @@ -82,7 +82,7 @@ AMR (for R) - 1.3.0.9016 + 1.3.0.9022 diff --git a/docs/reference/proportion.html b/docs/reference/proportion.html index 357269270..f7efc04d9 100644 --- a/docs/reference/proportion.html +++ b/docs/reference/proportion.html @@ -51,7 +51,7 @@ - + @@ -83,7 +83,7 @@ resistance() should be used to calculate resistance, susceptibility() should be AMR (for R) - 1.3.0.9015 + 1.3.0.9022 @@ -299,7 +299,7 @@ resistance() should be used to calculate resistance, susceptibility() should be data -

                        a data.frame containing columns with class rsi (see as.rsi())

                        +

                        a data.frame containing columns with class rsi (see as.rsi())

                        translate_ab @@ -324,7 +324,7 @@ resistance() should be used to calculate resistance, susceptibility() should be

                        M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition, 2014, Clinical and Laboratory Standards Institute (CLSI). https://clsi.org/standards/products/microbiology/documents/m39/.

                        Value

                        -

                        A double or, when as_percent = TRUE, a character.

                        +

                        A double or, when as_percent = TRUE, a character.

                        Details

                        The function resistance() is equal to the function proportion_R(). The function susceptibility() is equal to the function proportion_SI().

                        @@ -354,10 +354,12 @@ resistance() should be used to calculate resistance, susceptibility() should be

                        Please note that, in combination therapies, for only_all_tested = TRUE applies that:

                            count_S()    +   count_I()    +   count_R()    = count_all()
                        -  proportion_S() + proportion_I() + proportion_R() = 1
                        + proportion_S() + proportion_I() + proportion_R() = 1 +

                        and that, in combination therapies, for only_all_tested = FALSE applies that:

                            count_S()    +   count_I()    +   count_R()    >= count_all()
                        -  proportion_S() + proportion_I() + proportion_R() >= 1
                        + proportion_S() + proportion_I() + proportion_R() >= 1 +

                        Using only_all_tested has no impact when only using one antibiotic as input.

                        Stable lifecycle

                        @@ -470,7 +472,8 @@ A microorganism is categorised as Susceptible, Increased exposure when genus == "Helicobacter") %>% summarise(p = susceptibility(AMX, MTR), # amoxicillin with metronidazole n = count_all(AMX, MTR)) -} +} + @@ -273,7 +273,7 @@ ) # S3 method for resistance_predict -plot(x, main = paste("Resistance Prediction of", x_name), ...) +plot(x, main = paste("Resistance Prediction of", x_name), ...) ggplot_rsi_predict( x, @@ -287,7 +287,7 @@ x -

                        a data.frame containing isolates.

                        +

                        a data.frame containing isolates.

                        col_ab @@ -345,7 +345,7 @@

                        Value

                        -

                        A data.frame with extra class resistance_predict with columns:

                          +

                          A data.frame with extra class resistance_predict with columns:

                          • year

                          • value, the same as estimated when preserve_measurements = FALSE, and a combination of observed and estimated otherwise

                          • se_min, the lower bound of the standard error with a minimum of 0 (so the standard error will never go below 0%)

                          • @@ -399,7 +399,7 @@ A microorganism is categorised as Susceptible, Increased exposure when col_ab = "AMX", year_min = 2010, model = "binomial") -plot(x) +plot(x) if (require("ggplot2")) { ggplot_rsi_predict(x) } @@ -410,7 +410,7 @@ A microorganism is categorised as Susceptible, Increased exposure when filter_first_isolate() %>% filter(mo_genus(mo) == "Staphylococcus") %>% resistance_predict("PEN", model = "binomial") - plot(x) + plot(x) # get the model from the object mymodel <- attributes(x)$model @@ -445,7 +445,8 @@ A microorganism is categorised as Susceptible, Increased exposure when y = "%R", x = "Year") + theme_minimal(base_size = 13) -} +} + @@ -247,7 +247,7 @@

                            Format

                            -

                            A data.frame with 18,650 observations and 10 variables:

                              +

                              A data.frame with 18,650 observations and 10 variables:

                              • guideline
                                Name of the guideline

                              • method
                                Either "MIC" or "DISK"

                              • site
                                Body site, e.g. "Oral" or "Respiratory"

                              • diff --git a/docs/reference/skewness.html b/docs/reference/skewness.html index b74686d98..d766b4d44 100644 --- a/docs/reference/skewness.html +++ b/docs/reference/skewness.html @@ -51,7 +51,7 @@ - + @@ -83,7 +83,7 @@ When negative: the left tail is longer; the mass of the distribution is concentr AMR (for R) - 1.3.0.9015 + 1.3.0.9022 @@ -260,7 +260,7 @@ When negative: the left tail is longer; the mass of the distribution is concentr x -

                                a vector of values, a matrix or a data.frame

                                +

                                a vector of values, a matrix or a data.frame

                                na.rm diff --git a/docs/reference/translate.html b/docs/reference/translate.html index 8c9167e75..76af823f7 100644 --- a/docs/reference/translate.html +++ b/docs/reference/translate.html @@ -82,7 +82,7 @@ AMR (for R) - 1.3.0.9020 + 1.3.0.9022 diff --git a/docs/sitemap.xml b/docs/sitemap.xml index a6844cfc8..e21019553 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -123,6 +123,9 @@ https://msberends.github.io/AMR/reference/microorganisms.old.html + + https://msberends.github.io/AMR/reference/mo_matching_score.html + https://msberends.github.io/AMR/reference/mo_property.html diff --git a/docs/survey.html b/docs/survey.html index dcfd68113..71a654017 100644 --- a/docs/survey.html +++ b/docs/survey.html @@ -81,7 +81,7 @@ AMR (for R) - 1.3.0.9021 + 1.3.0.9022 diff --git a/man/WHONET.Rd b/man/WHONET.Rd index ce4104d83..05e9dd73a 100644 --- a/man/WHONET.Rd +++ b/man/WHONET.Rd @@ -5,7 +5,7 @@ \alias{WHONET} \title{Data set with 500 isolates - WHONET example} \format{ -A \code{\link{data.frame}} with 500 observations and 53 variables: +A \link{data.frame} with 500 observations and 53 variables: \itemize{ \item \verb{Identification number}\cr ID of the sample \item \verb{Specimen number}\cr ID of the specimen diff --git a/man/ab_from_text.Rd b/man/ab_from_text.Rd index 569c01af2..0d935d597 100644 --- a/man/ab_from_text.Rd +++ b/man/ab_from_text.Rd @@ -27,7 +27,7 @@ ab_from_text( \item{...}{parameters passed on to \code{\link[=as.ab]{as.ab()}}} } \value{ -A \link{list}, or a \link{character} if \code{collapse} is not \code{NULL} +A \link{list}, or a \link{character} if \code{collapse} is not \code{NULL} } \description{ Use this function on e.g. clinical texts from health care records. It returns a \link{list} with all antimicrobial drugs, doses and forms of administration found in the texts. diff --git a/man/ab_property.Rd b/man/ab_property.Rd index e31194420..0800a4656 100644 --- a/man/ab_property.Rd +++ b/man/ab_property.Rd @@ -62,10 +62,10 @@ ab_property(x, property = "name", language = get_locale(), ...) } \value{ \itemize{ -\item An \code{\link{integer}} in case of \code{\link[=ab_cid]{ab_cid()}} -\item A named \code{\link{list}} in case of \code{\link[=ab_info]{ab_info()}} and multiple \code{\link[=ab_synonyms]{ab_synonyms()}}/\code{\link[=ab_tradenames]{ab_tradenames()}} -\item A \code{\link{double}} in case of \code{\link[=ab_ddd]{ab_ddd()}} -\item A \code{\link{character}} in all other cases +\item An \link{integer} in case of \code{\link[=ab_cid]{ab_cid()}} +\item A named \link{list} in case of \code{\link[=ab_info]{ab_info()}} and multiple \code{\link[=ab_synonyms]{ab_synonyms()}}/\code{\link[=ab_tradenames]{ab_tradenames()}} +\item A \link{double} in case of \code{\link[=ab_ddd]{ab_ddd()}} +\item A \link{character} in all other cases } } \description{ diff --git a/man/age_groups.Rd b/man/age_groups.Rd index 555a0cb5a..3eea88e04 100644 --- a/man/age_groups.Rd +++ b/man/age_groups.Rd @@ -76,7 +76,7 @@ example_isolates \%>\% filter(mo == as.mo("E. coli")) \%>\% group_by(age_group = age_groups(age)) \%>\% select(age_group, CIP) \%>\% - ggplot_rsi(x = "age_group") + ggplot_rsi(x = "age_group", minimum = 0) } } \seealso{ diff --git a/man/antibiotics.Rd b/man/antibiotics.Rd index 53d37d6b3..8341e7b29 100644 --- a/man/antibiotics.Rd +++ b/man/antibiotics.Rd @@ -6,7 +6,7 @@ \alias{antivirals} \title{Data sets with 558 antimicrobials} \format{ -\subsection{For the \link{antibiotics} data set: a \code{\link{data.frame}} with 456 observations and 14 variables:}{ +\subsection{For the \link{antibiotics} data set: a \link{data.frame} with 456 observations and 14 variables:}{ \itemize{ \item \code{ab}\cr Antibiotic ID as used in this package (like \code{AMC}), using the official EARS-Net (European Antimicrobial Resistance Surveillance Network) codes where available \item \code{atc}\cr ATC code (Anatomical Therapeutic Chemical) as defined by the WHOCC, like \code{J01CR02} @@ -25,7 +25,7 @@ } } -\subsection{For the \link{antivirals} data set: a \code{\link{data.frame}} with 102 observations and 9 variables:}{ +\subsection{For the \link{antivirals} data set: a \link{data.frame} with 102 observations and 9 variables:}{ \itemize{ \item \code{atc}\cr ATC code (Anatomical Therapeutic Chemical) as defined by the WHOCC \item \code{cid}\cr Compound ID as found in PubChem diff --git a/man/as.ab.Rd b/man/as.ab.Rd index 98ef6213b..1b1a6bb66 100644 --- a/man/as.ab.Rd +++ b/man/as.ab.Rd @@ -20,7 +20,7 @@ is.ab(x) \item{...}{arguments passed on to internal functions} } \value{ -Character (vector) with class \code{\link{ab}}. Unknown values will return \code{NA}. +A \link{character} \link{vector} with additional class \code{\link{ab}} } \description{ Use this function to determine the antibiotic code of one or more antibiotics. The data set \link{antibiotics} will be searched for abbreviations, official names and synonyms (brand names). @@ -101,7 +101,7 @@ ab_name("eryt") # "Erythromycin" } \seealso{ \itemize{ -\item \link{antibiotics} for the dataframe that is being used to determine ATCs +\item \link{antibiotics} for the \link{data.frame} that is being used to determine ATCs \item \code{\link[=ab_from_text]{ab_from_text()}} for a function to retrieve antimicrobial drugs from clinical text (from health care records) } } diff --git a/man/as.disk.Rd b/man/as.disk.Rd index 23bc498c4..9a4562e52 100644 --- a/man/as.disk.Rd +++ b/man/as.disk.Rd @@ -16,7 +16,7 @@ is.disk(x) \item{na.rm}{a logical indicating whether missing values should be removed} } \value{ -An \code{\link{integer}} with additional new class \code{\link{disk}} +An \link{integer} with additional class \code{\link{disk}} } \description{ This transforms a vector to a new class \code{\link{disk}}, which is a disk diffusion growth zone size (around an antibiotic disk) in millimetres between 6 and 50. diff --git a/man/as.mic.Rd b/man/as.mic.Rd index 35a42364a..6f9b06cb7 100755 --- a/man/as.mic.Rd +++ b/man/as.mic.Rd @@ -4,7 +4,7 @@ \alias{as.mic} \alias{mic} \alias{is.mic} -\title{Transform input to minimum inhibitory concentrations} +\title{Transform input to minimum inhibitory concentrations (MIC)} \usage{ as.mic(x, na.rm = FALSE) @@ -16,10 +16,10 @@ is.mic(x) \item{na.rm}{a logical indicating whether missing values should be removed} } \value{ -Ordered \code{\link{factor}} with new class \code{\link{mic}} +Ordered \link{factor} with additional class \code{\link{mic}} } \description{ -This transforms a vector to a new class \code{\link{mic}}, which is an ordered \code{\link{factor}} with valid minimum inhibitory concentrations (MIC) as levels. Invalid MIC values will be translated as \code{NA} with a warning. +This transforms a vector to a new class \code{\link{mic}}, which is an ordered \link{factor} with valid minimum inhibitory concentrations (MIC) as levels. Invalid MIC values will be translated as \code{NA} with a warning. } \details{ To interpret MIC values as RSI values, use \code{\link[=as.rsi]{as.rsi()}} on MIC values. It supports guidelines from EUCAST and CLSI. diff --git a/man/as.mo.Rd b/man/as.mo.Rd index cd371ca58..2db1e728c 100644 --- a/man/as.mo.Rd +++ b/man/as.mo.Rd @@ -29,7 +29,7 @@ mo_uncertainties() mo_renamed() } \arguments{ -\item{x}{a character vector or a \code{\link{data.frame}} with one or two columns} +\item{x}{a character vector or a \link{data.frame} with one or two columns} \item{Becker}{a logical to indicate whether \emph{Staphylococci} should be categorised into coagulase-negative \emph{Staphylococci} ("CoNS") and coagulase-positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} (1,2). Note that this does not include species that were newly named after these publications, like \emph{S. caeli}. @@ -41,7 +41,7 @@ This excludes \emph{Enterococci} at default (who are in group D), use \code{Lanc \item{allow_uncertain}{a number between \code{0} (or \code{"none"}) and \code{3} (or \code{"all"}), or \code{TRUE} (= \code{2}) or \code{FALSE} (= \code{0}) to indicate whether the input should be checked for less probable results, please see \emph{Details}} -\item{reference_df}{a \code{\link{data.frame}} to be used for extra reference when translating \code{x} to a valid \code{\link{mo}}. See \code{\link[=set_mo_source]{set_mo_source()}} and \code{\link[=get_mo_source]{get_mo_source()}} to automate the usage of your own codes (e.g. used in your analysis or organisation).} +\item{reference_df}{a \link{data.frame} to be used for extra reference when translating \code{x} to a valid \code{\link{mo}}. See \code{\link[=set_mo_source]{set_mo_source()}} and \code{\link[=get_mo_source]{get_mo_source()}} to automate the usage of your own codes (e.g. used in your analysis or organisation).} \item{ignore_pattern}{a regular expression (case-insensitive) of which all matches in \code{x} must return \code{NA}. This can be convenient to exclude known non-relevant input and can also be set with the option \code{AMR_ignore_pattern}, e.g. \code{options(AMR_ignore_pattern = "(not reported|contaminated flora)")}.} @@ -50,7 +50,7 @@ This excludes \emph{Enterococci} at default (who are in group D), use \code{Lanc \item{...}{other parameters passed on to functions} } \value{ -A \code{\link{character}} \code{\link{vector}} with additional class \code{\link{mo}} +A \link{character} \link{vector} with additional class \code{\link{mo}} } \description{ Use this function to determine a valid microorganism ID (\code{\link{mo}}). Determination is done using intelligent rules and the complete taxonomic kingdoms Bacteria, Chromista, Protozoa, Archaea and most microbial species from the kingdom Fungi (see Source). The input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. Please see \emph{Examples}. @@ -109,9 +109,9 @@ With the default setting (\code{allow_uncertain = TRUE}, level 2), below example There are three helper functions that can be run after using the \code{\link[=as.mo]{as.mo()}} function: \itemize{ -\item Use \code{\link[=mo_uncertainties]{mo_uncertainties()}} to get a \code{\link{data.frame}} that prints in a pretty format with all taxonomic names that were guessed. The output contains a score that is based on the human pathogenic prevalence and the \href{https://en.wikipedia.org/wiki/Levenshtein_distance}{Levenshtein distance} between the user input and the full taxonomic name. -\item Use \code{\link[=mo_failures]{mo_failures()}} to get a \code{\link{character}} \code{\link{vector}} with all values that could not be coerced to a valid value. -\item Use \code{\link[=mo_renamed]{mo_renamed()}} to get a \code{\link{data.frame}} with all values that could be coerced based on old, previously accepted taxonomic names. +\item Use \code{\link[=mo_uncertainties]{mo_uncertainties()}} to get a \link{data.frame} that prints in a pretty format with all taxonomic names that were guessed. The output contains the matching score for all matches (see \emph{Background on matching score}). +\item Use \code{\link[=mo_failures]{mo_failures()}} to get a \link{character} \link{vector} with all values that could not be coerced to a valid value. +\item Use \code{\link[=mo_renamed]{mo_renamed()}} to get a \link{data.frame} with all values that could be coerced based on old, previously accepted taxonomic names. } } @@ -125,6 +125,24 @@ Group 2 consists of all microorganisms where the taxonomic phylum is Proteobacte Group 3 (least prevalent microorganisms) consists of all other microorganisms. This group contains microorganisms most probably not found in humans. } + +\subsection{Background on matching scores}{ + +With ambiguous user input, the returned results are chosen based on their matching score using \code{\link[=mo_matching_score]{mo_matching_score()}}. This matching score is based on four parameters: +\enumerate{ +\item The prevalence \eqn{P} is categorised into group 1, 2 and 3 as stated above; +\item A kingdom index \eqn{K} is set as follows: Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, and all others = 5; +\item The level of uncertainty \eqn{U} needed to get to the result, as stated above (1 to 3); +\item The \href{https://en.wikipedia.org/wiki/Levenshtein_distance}{Levenshtein distance} \eqn{L} is the distance between the user input and all taxonomic full names, with the text length of the user input being the maximum distance. A modified version of the Levenshtein distance \eqn{L'} based on the text length of the full name \eqn{F} is calculated as: +} + +\deqn{L' = F - \frac{0.5 \times L}{F}}{L' = F - (0.5 * L) / F} + +The final matching score \eqn{M} is calculated as: +\deqn{M = L' \times \frac{1}{P \times K} * \frac{1}{U}}{M = L' * (1 / (P * K)) * (1 / U)} + +All matches are sorted descending on their matching score and for all user input values, the top match will be returned. +} } \section{Source}{ @@ -220,7 +238,7 @@ df <- df \%>\% } } \seealso{ -\link{microorganisms} for the \code{\link{data.frame}} that is being used to determine ID's. +\link{microorganisms} for the \link{data.frame} that is being used to determine ID's. The \code{\link[=mo_property]{mo_property()}} functions (like \code{\link[=mo_genus]{mo_genus()}}, \code{\link[=mo_gramstain]{mo_gramstain()}}) to get properties based on the returned code. } diff --git a/man/atc_online.Rd b/man/atc_online.Rd index 2abb65184..707b9d0d1 100644 --- a/man/atc_online.Rd +++ b/man/atc_online.Rd @@ -13,7 +13,8 @@ atc_online_property( atc_code, property, administration = "O", - url = "https://www.whocc.no/atc_ddd_index/?code=\%s&showdescription=no" + url = "https://www.whocc.no/atc_ddd_index/?code=\%s&showdescription=no", + url_vet = "https://www.whocc.no/atcvet/atcvet_index/?code=\%s&showdescription=no" ) atc_online_groups(atc_code, ...) @@ -27,12 +28,14 @@ atc_online_ddd(atc_code, ...) \item{administration}{type of administration when using \code{property = "Adm.R"}, see Details} -\item{url}{url of website of the WHO. The sign \verb{\%s} can be used as a placeholder for ATC codes.} +\item{url}{url of website of the WHOCC. The sign \verb{\%s} can be used as a placeholder for ATC codes.} + +\item{url_vet}{url of website of the WHOCC for veterinary medicine. The sign \verb{\%s} can be used as a placeholder for ATC_vet codes (that all start with "Q").} \item{...}{parameters to pass on to \code{atc_property}} } \description{ -Gets data from the WHO to determine properties of an ATC (e.g. an antibiotic) like name, defined daily dose (DDD) or standard unit. +Gets data from the WHO to determine properties of an ATC (e.g. an antibiotic), such as the name, defined daily dose (DDD) or standard unit. } \details{ Options for parameter \code{administration}: diff --git a/man/availability.Rd b/man/availability.Rd index 0004d6cee..17087d307 100644 --- a/man/availability.Rd +++ b/man/availability.Rd @@ -7,18 +7,18 @@ availability(tbl, width = NULL) } \arguments{ -\item{tbl}{a \code{\link{data.frame}} or \code{\link{list}}} +\item{tbl}{a \link{data.frame} or \link{list}} \item{width}{number of characters to present the visual availability, defaults to filling the width of the console} } \value{ -\code{\link{data.frame}} with column names of \code{tbl} as row names +\link{data.frame} with column names of \code{tbl} as row names } \description{ Easy check for data availability of all columns in a data set. This makes it easy to get an idea of which antimicrobial combinations can be used for calculation with e.g. \code{\link[=susceptibility]{susceptibility()}} and \code{\link[=resistance]{resistance()}}. } \details{ -The function returns a \code{\link{data.frame}} with columns \code{"resistant"} and \code{"visual_resistance"}. The values in that columns are calculated with \code{\link[=resistance]{resistance()}}. +The function returns a \link{data.frame} with columns \code{"resistant"} and \code{"visual_resistance"}. The values in that columns are calculated with \code{\link[=resistance]{resistance()}}. } \section{Stable lifecycle}{ diff --git a/man/bug_drug_combinations.Rd b/man/bug_drug_combinations.Rd index c504c5472..d52027363 100644 --- a/man/bug_drug_combinations.Rd +++ b/man/bug_drug_combinations.Rd @@ -55,7 +55,7 @@ bug_drug_combinations(x, col_mo = NULL, FUN = mo_shortname, ...) decimal point.} } \value{ -The function \code{\link[=bug_drug_combinations]{bug_drug_combinations()}} returns a \code{\link{data.frame}} with columns "mo", "ab", "S", "I", "R" and "total". +The function \code{\link[=bug_drug_combinations]{bug_drug_combinations()}} returns a \link{data.frame} with columns "mo", "ab", "S", "I", "R" and "total". } \description{ Determine antimicrobial resistance (AMR) of all bug-drug combinations in your data set where at least 30 (default) isolates are available per species. Use \code{\link[=format]{format()}} on the result to prettify it to a publicable/printable format, see Examples. diff --git a/man/catalogue_of_life_version.Rd b/man/catalogue_of_life_version.Rd index 9e048dcf1..94e55d62c 100644 --- a/man/catalogue_of_life_version.Rd +++ b/man/catalogue_of_life_version.Rd @@ -7,7 +7,7 @@ catalogue_of_life_version() } \value{ -a \code{\link{list}}, which prints in pretty format +a \link{list}, which prints in pretty format } \description{ This function returns information about the included data from the Catalogue of Life. diff --git a/man/count.Rd b/man/count.Rd index 51def1bec..ba172386b 100644 --- a/man/count.Rd +++ b/man/count.Rd @@ -45,7 +45,7 @@ count_df( \item{only_all_tested}{(for combination therapies, i.e. using more than one variable for \code{...}): a logical to indicate that isolates must be tested for all antibiotics, see section \emph{Combination therapy} below} -\item{data}{a \code{\link{data.frame}} containing columns with class \code{\link{rsi}} (see \code{\link[=as.rsi]{as.rsi()}})} +\item{data}{a \link{data.frame} containing columns with class \code{\link{rsi}} (see \code{\link[=as.rsi]{as.rsi()}})} \item{translate_ab}{a column name of the \link{antibiotics} data set to translate the antibiotic abbreviations to, using \code{\link[=ab_property]{ab_property()}}. Use a value} @@ -56,7 +56,7 @@ count_df( \item{combine_IR}{a logical to indicate whether all values of I and R must be merged into one, so the output only consists of S vs. I+R (susceptible vs. non-susceptible). This is outdated, see parameter \code{combine_SI}.} } \value{ -An \code{\link{integer}} +An \link{integer} } \description{ These functions can be used to count resistant/susceptible microbial isolates. All functions support quasiquotation with pipes, can be used in \code{summarise()} from the \code{dplyr} package and also support grouped variables, please see \emph{Examples}. diff --git a/man/eucast_rules.Rd b/man/eucast_rules.Rd index a75178bc8..ac4da9b48 100644 --- a/man/eucast_rules.Rd +++ b/man/eucast_rules.Rd @@ -39,7 +39,7 @@ eucast_rules( \item{...}{column name of an antibiotic, please see section \emph{Antibiotics} below} } \value{ -The input of \code{x}, possibly with edited values of antibiotics. Or, if \code{verbose = TRUE}, a \code{\link{data.frame}} with all original and new values of the affected bug-drug combinations. +The input of \code{x}, possibly with edited values of antibiotics. Or, if \code{verbose = TRUE}, a \link{data.frame} with all original and new values of the affected bug-drug combinations. } \description{ Apply susceptibility rules as defined by the European Committee on Antimicrobial Susceptibility Testing (EUCAST, \url{http://eucast.org}), see \emph{Source}. This includes (1) expert rules and intrinsic resistance and (2) inferred resistance as defined in their breakpoint tables. diff --git a/man/example_isolates.Rd b/man/example_isolates.Rd index ac9511c20..c5d005f12 100644 --- a/man/example_isolates.Rd +++ b/man/example_isolates.Rd @@ -5,7 +5,7 @@ \alias{example_isolates} \title{Data set with 2,000 example isolates} \format{ -A \code{\link{data.frame}} with 2,000 observations and 49 variables: +A \link{data.frame} with 2,000 observations and 49 variables: \itemize{ \item \code{date}\cr date of receipt at the laboratory \item \code{hospital_id}\cr ID of the hospital, from A to D diff --git a/man/example_isolates_unclean.Rd b/man/example_isolates_unclean.Rd index 674419d5a..7b6fb63fe 100644 --- a/man/example_isolates_unclean.Rd +++ b/man/example_isolates_unclean.Rd @@ -5,7 +5,7 @@ \alias{example_isolates_unclean} \title{Data set with unclean data} \format{ -A \code{\link{data.frame}} with 3,000 observations and 8 variables: +A \link{data.frame} with 3,000 observations and 8 variables: \itemize{ \item \code{patient_id}\cr ID of the patient \item \code{date}\cr date of receipt at the laboratory diff --git a/man/first_isolate.Rd b/man/first_isolate.Rd index b6ddde4fc..ffead95d0 100755 --- a/man/first_isolate.Rd +++ b/man/first_isolate.Rd @@ -50,7 +50,7 @@ filter_first_weighted_isolate( ) } \arguments{ -\item{x}{a \code{\link{data.frame}} containing isolates.} +\item{x}{a \link{data.frame} containing isolates.} \item{col_date}{column name of the result date (or date that is was received on the lab), defaults to the first column of with a date class} diff --git a/man/ggplot_rsi.Rd b/man/ggplot_rsi.Rd index 074592708..a6b7a4755 100644 --- a/man/ggplot_rsi.Rd +++ b/man/ggplot_rsi.Rd @@ -21,6 +21,7 @@ ggplot_rsi( translate_ab = "name", combine_SI = TRUE, combine_IR = FALSE, + minimum = 30, language = get_locale(), nrow = NULL, colours = c(S = "#61a8ff", SI = "#61a8ff", I = "#61f7ff", IR = "#ff6961", R = @@ -41,6 +42,7 @@ geom_rsi( x = c("antibiotic", "interpretation"), fill = "interpretation", translate_ab = "name", + minimum = 30, language = get_locale(), combine_SI = TRUE, combine_IR = FALSE, @@ -62,6 +64,8 @@ labels_rsi_count( position = NULL, x = "antibiotic", translate_ab = "name", + minimum = 30, + language = get_locale(), combine_SI = TRUE, combine_IR = FALSE, datalabels.size = 3, @@ -69,7 +73,7 @@ labels_rsi_count( ) } \arguments{ -\item{data}{a \code{\link{data.frame}} with column(s) of class \code{\link{rsi}} (see \code{\link[=as.rsi]{as.rsi()}})} +\item{data}{a \link{data.frame} with column(s) of class \code{\link{rsi}} (see \code{\link[=as.rsi]{as.rsi()}})} \item{position}{position adjustment of bars, either \code{"fill"}, \code{"stack"} or \code{"dodge"}} @@ -89,6 +93,8 @@ labels_rsi_count( \item{combine_IR}{a logical to indicate whether all values of I and R must be merged into one, so the output only consists of S vs. I+R (susceptible vs. non-susceptible). This is outdated, see parameter \code{combine_SI}.} +\item{minimum}{the minimum allowed number of available (tested) isolates. Any isolate count lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.} + \item{language}{language of the returned text, defaults to system language (see \code{\link[=get_locale]{get_locale()}}) and can also be set with \code{getOption("AMR_locale")}. Use \code{language = NULL} or \code{language = ""} to prevent translation.} \item{nrow}{(when using \code{facet}) number of rows} diff --git a/man/guess_ab_col.Rd b/man/guess_ab_col.Rd index 87df0c527..87c567396 100644 --- a/man/guess_ab_col.Rd +++ b/man/guess_ab_col.Rd @@ -7,7 +7,7 @@ guess_ab_col(x = NULL, search_string = NULL, verbose = FALSE) } \arguments{ -\item{x}{a \code{\link{data.frame}}} +\item{x}{a \link{data.frame}} \item{search_string}{a text to search \code{x} for, will be checked with \code{\link[=as.ab]{as.ab()}} if this value is not a column in \code{x}} diff --git a/man/intrinsic_resistant.Rd b/man/intrinsic_resistant.Rd index 5bb12634c..18914cb50 100644 --- a/man/intrinsic_resistant.Rd +++ b/man/intrinsic_resistant.Rd @@ -5,7 +5,7 @@ \alias{intrinsic_resistant} \title{Data set with bacterial intrinsic resistance} \format{ -A \code{\link{data.frame}} with 49,462 observations and 2 variables: +A \link{data.frame} with 49,462 observations and 2 variables: \itemize{ \item \code{microorganism}\cr Name of the microorganism \item \code{antibiotic}\cr Name of the antibiotic drug diff --git a/man/join.Rd b/man/join.Rd index 93812b009..c642c3d48 100755 --- a/man/join.Rd +++ b/man/join.Rd @@ -36,7 +36,7 @@ anti_join_microorganisms(x, by = NULL, ...) Join the data set \link{microorganisms} easily to an existing table or character vector. } \details{ -\strong{Note:} As opposed to the \code{join()} functions of \code{dplyr}, \code{\link{character}} vectors are supported and at default existing columns will get a suffix \code{"2"} and the newly joined columns will not get a suffix. +\strong{Note:} As opposed to the \code{join()} functions of \code{dplyr}, \link{character} vectors are supported and at default existing columns will get a suffix \code{"2"} and the newly joined columns will not get a suffix. These functions rely on \code{\link[=merge]{merge()}}, a base R function to do joins. } diff --git a/man/kurtosis.Rd b/man/kurtosis.Rd index 815025d38..60328e5dd 100644 --- a/man/kurtosis.Rd +++ b/man/kurtosis.Rd @@ -16,7 +16,7 @@ kurtosis(x, na.rm = FALSE) \method{kurtosis}{data.frame}(x, na.rm = FALSE) } \arguments{ -\item{x}{a vector of values, a \code{\link{matrix}} or a \code{\link{data.frame}}} +\item{x}{a vector of values, a \code{\link{matrix}} or a \link{data.frame}} \item{na.rm}{a logical value indicating whether \code{NA} values should be stripped before the computation proceeds.} } diff --git a/man/like.Rd b/man/like.Rd index 24be196b8..30ff857a7 100755 --- a/man/like.Rd +++ b/man/like.Rd @@ -18,7 +18,7 @@ x \%like_case\% pattern \arguments{ \item{x}{a character vector where matches are sought, or an object which can be coerced by \code{\link[=as.character]{as.character()}} to a character vector.} -\item{pattern}{a character string containing a regular expression (or \code{\link{character}} string for \code{fixed = TRUE}) to be matched in the given character vector. Coerced by \code{\link[=as.character]{as.character()}} to a character string if possible. If a \code{\link{character}} vector of length 2 or more is supplied, the first element is used with a warning.} +\item{pattern}{a character string containing a regular expression (or \link{character} string for \code{fixed = TRUE}) to be matched in the given character vector. Coerced by \code{\link[=as.character]{as.character()}} to a character string if possible. If a \link{character} vector of length 2 or more is supplied, the first element is used with a warning.} \item{ignore.case}{if \code{FALSE}, the pattern matching is \emph{case sensitive} and if \code{TRUE}, case is ignored during matching.} } diff --git a/man/mdro.Rd b/man/mdro.Rd index f6d29f134..83844577d 100644 --- a/man/mdro.Rd +++ b/man/mdro.Rd @@ -59,13 +59,13 @@ eucast_exceptional_phenotypes(x, guideline = "EUCAST", ...) \value{ \itemize{ \item CMI 2012 paper - function \code{\link[=mdr_cmi2012]{mdr_cmi2012()}} or \code{\link[=mdro]{mdro()}}:\cr -Ordered \code{\link{factor}} with levels \code{Negative} < \code{Multi-drug-resistant (MDR)} < \verb{Extensively drug-resistant (XDR)} < \code{Pandrug-resistant (PDR)} +Ordered \link{factor} with levels \code{Negative} < \code{Multi-drug-resistant (MDR)} < \verb{Extensively drug-resistant (XDR)} < \code{Pandrug-resistant (PDR)} \item TB guideline - function \code{\link[=mdr_tb]{mdr_tb()}} or \code{\link[=mdro]{mdro(..., guideline = "TB")}}:\cr -Ordered \code{\link{factor}} with levels \code{Negative} < \code{Mono-resistant} < \code{Poly-resistant} < \code{Multi-drug-resistant} < \verb{Extensively drug-resistant} +Ordered \link{factor} with levels \code{Negative} < \code{Mono-resistant} < \code{Poly-resistant} < \code{Multi-drug-resistant} < \verb{Extensively drug-resistant} \item German guideline - function \code{\link[=mrgn]{mrgn()}} or \code{\link[=mdro]{mdro(..., guideline = "MRGN")}}:\cr -Ordered \code{\link{factor}} with levels \code{Negative} < \verb{3MRGN} < \verb{4MRGN} +Ordered \link{factor} with levels \code{Negative} < \verb{3MRGN} < \verb{4MRGN} \item Everything else:\cr -Ordered \code{\link{factor}} with levels \code{Negative} < \verb{Positive, unconfirmed} < \code{Positive}. The value \code{"Positive, unconfirmed"} means that, according to the guideline, it is not entirely sure if the isolate is multi-drug resistant and this should be confirmed with additional (e.g. molecular) tests +Ordered \link{factor} with levels \code{Negative} < \verb{Positive, unconfirmed} < \code{Positive}. The value \code{"Positive, unconfirmed"} means that, according to the guideline, it is not entirely sure if the isolate is multi-drug resistant and this should be confirmed with additional (e.g. molecular) tests } } \description{ diff --git a/man/microorganisms.Rd b/man/microorganisms.Rd index a18c77bda..fbf6a6397 100755 --- a/man/microorganisms.Rd +++ b/man/microorganisms.Rd @@ -5,7 +5,7 @@ \alias{microorganisms} \title{Data set with 67,151 microorganisms} \format{ -A \code{\link{data.frame}} with 67,151 observations and 16 variables: +A \link{data.frame} with 67,151 observations and 16 variables: \itemize{ \item \code{mo}\cr ID of microorganism as used by this package \item \code{fullname}\cr Full name, like \code{"Escherichia coli"} diff --git a/man/microorganisms.codes.Rd b/man/microorganisms.codes.Rd index 4ffcce2c6..ded0029f3 100644 --- a/man/microorganisms.codes.Rd +++ b/man/microorganisms.codes.Rd @@ -5,7 +5,7 @@ \alias{microorganisms.codes} \title{Data set with 5,583 common microorganism codes} \format{ -A \code{\link{data.frame}} with 5,583 observations and 2 variables: +A \link{data.frame} with 5,583 observations and 2 variables: \itemize{ \item \code{code}\cr Commonly used code of a microorganism \item \code{mo}\cr ID of the microorganism in the \link{microorganisms} data set diff --git a/man/microorganisms.old.Rd b/man/microorganisms.old.Rd index d1a24b350..8305f76c6 100644 --- a/man/microorganisms.old.Rd +++ b/man/microorganisms.old.Rd @@ -5,7 +5,7 @@ \alias{microorganisms.old} \title{Data set with previously accepted taxonomic names} \format{ -A \code{\link{data.frame}} with 12,708 observations and 4 variables: +A \link{data.frame} with 12,708 observations and 4 variables: \itemize{ \item \code{fullname}\cr Old full taxonomic name of the microorganism \item \code{fullname_new}\cr New full taxonomic name of the microorganism diff --git a/man/mo_matching_score.Rd b/man/mo_matching_score.Rd new file mode 100644 index 000000000..67d9d95cb --- /dev/null +++ b/man/mo_matching_score.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mo_matching_score.R +\name{mo_matching_score} +\alias{mo_matching_score} +\title{Calculate the matching score for microorganisms} +\usage{ +mo_matching_score(x, fullname, uncertainty = 1) +} +\arguments{ +\item{x}{Any user input value(s)} + +\item{fullname}{A full taxonomic name, that exists in \code{\link[=microorganisms]{microorganisms$fullname}}} + +\item{uncertainty}{The level of uncertainty set in \code{\link[=as.mo]{as.mo()}}, see \code{allow_uncertain} in that function (here, it defaults to 1, but is automatically determined in \code{\link[=as.mo]{as.mo()}} based on the number of transformations needed to get to a result)} +} +\description{ +This helper function is used by \code{\link[=as.mo]{as.mo()}} to determine the most probable match of taxonomic records, based on user input. +} +\details{ +The matching score is based on four parameters: +\enumerate{ +\item A human pathogenic prevalence \eqn{P}, that is categorised into group 1, 2 and 3 (see \code{\link[=as.mo]{as.mo()}}); +\item A kingdom index \eqn{K} is set as follows: Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, and all others = 5; +\item The level of uncertainty \eqn{U} that is needed to get to a result (1 to 3, see \code{\link[=as.mo]{as.mo()}}); +\item The \href{https://en.wikipedia.org/wiki/Levenshtein_distance}{Levenshtein distance} \eqn{L} is the distance between the user input and all taxonomic full names, with the text length of the user input being the maximum distance. A modified version of the Levenshtein distance \eqn{L'} based on the text length of the full name \eqn{F} is calculated as: +} + +\deqn{L' = F - \frac{0.5 \times L}{F}}{L' = F - (0.5 * L) / F} + +The final matching score \eqn{M} is calculated as: +\deqn{M = L' \times \frac{1}{P \times K} * \frac{1}{U}}{M = L' * (1 / (P * K)) * (1 / U)} +} +\examples{ +as.mo("E. coli") +mo_uncertainties() +} diff --git a/man/mo_property.Rd b/man/mo_property.Rd index e32ce85e1..cfa7443c8 100644 --- a/man/mo_property.Rd +++ b/man/mo_property.Rd @@ -88,11 +88,11 @@ mo_property(x, property = "fullname", language = get_locale(), ...) } \value{ \itemize{ -\item An \code{\link{integer}} in case of \code{\link[=mo_year]{mo_year()}} -\item A \code{\link{list}} in case of \code{\link[=mo_taxonomy]{mo_taxonomy()}} and \code{\link[=mo_info]{mo_info()}} -\item A named \code{\link{character}} in case of \code{\link[=mo_url]{mo_url()}} -\item A \code{\link{double}} in case of \code{\link[=mo_snomed]{mo_snomed()}} -\item A \code{\link{character}} in all other cases +\item An \link{integer} in case of \code{\link[=mo_year]{mo_year()}} +\item A \link{list} in case of \code{\link[=mo_taxonomy]{mo_taxonomy()}} and \code{\link[=mo_info]{mo_info()}} +\item A named \link{character} in case of \code{\link[=mo_url]{mo_url()}} +\item A \link{double} in case of \code{\link[=mo_snomed]{mo_snomed()}} +\item A \link{character} in all other cases } } \description{ diff --git a/man/mo_source.Rd b/man/mo_source.Rd index 95f28f614..d1edabaaa 100644 --- a/man/mo_source.Rd +++ b/man/mo_source.Rd @@ -19,11 +19,13 @@ These functions can be used to predefine your own reference to be used in \code{ This is \strong{the fastest way} to have your organisation (or analysis) specific codes picked up and translated by this package. } \details{ -The reference file can be a text file seperated with commas (CSV) or tabs or pipes, an Excel file (either 'xls' or 'xlsx' format) or an R object file (extension '.rds'). To use an Excel file, you need to have the \code{readxl} package installed. +The reference file can be a text file separated with commas (CSV) or tabs or pipes, an Excel file (either 'xls' or 'xlsx' format) or an R object file (extension '.rds'). To use an Excel file, you will need to have the \code{readxl} package installed. -\code{\link[=set_mo_source]{set_mo_source()}} will check the file for validity: it must be a \code{\link{data.frame}}, must have a column named \code{"mo"} which contains values from \code{\link[=microorganisms]{microorganisms$mo}} and must have a reference column with your own defined values. If all tests pass, \code{\link[=set_mo_source]{set_mo_source()}} will read the file into R and export it to \code{"~/.mo_source.rds"}. This compressed data file will then be used at default for MO determination (function \code{\link[=as.mo]{as.mo()}} and consequently all \verb{mo_*} functions like \code{\link[=mo_genus]{mo_genus()}} and \code{\link[=mo_gramstain]{mo_gramstain()}}). The location of the original file will be saved as option with \code{options(mo_source = path)}. Its timestamp will be saved with \code{options(mo_source_datetime = ...)}. +\code{\link[=set_mo_source]{set_mo_source()}} will check the file for validity: it must be a \link{data.frame}, must have a column named \code{"mo"} which contains values from \code{\link[=microorganisms]{microorganisms$mo}} and must have a reference column with your own defined values. If all tests pass, \code{\link[=set_mo_source]{set_mo_source()}} will read the file into R and export it to \code{"~/.mo_source.rds"} after the user \strong{specifically confirms and allows} that this file will be created. For this reason, this function only works in interactive sessions. -\code{\link[=get_mo_source]{get_mo_source()}} will return the data set by reading \code{"~/.mo_source.rds"} with \code{\link[=readRDS]{readRDS()}}. If the original file has changed (the file defined with \code{path}), it will call \code{\link[=set_mo_source]{set_mo_source()}} to update the data file automatically. +The created compressed data file \code{"~/.mo_source.rds"} will be used at default for MO determination (function \code{\link[=as.mo]{as.mo()}} and consequently all \verb{mo_*} functions like \code{\link[=mo_genus]{mo_genus()}} and \code{\link[=mo_gramstain]{mo_gramstain()}}). The location of the original file will be saved as an R option with \code{options(mo_source = path)}. Its timestamp will be saved with \code{options(mo_source_datetime = ...)}. + +The function \code{\link[=get_mo_source]{get_mo_source()}} will return the data set by reading \code{"~/.mo_source.rds"} with \code{\link[=readRDS]{readRDS()}}. If the original file has changed (by checking the aforementioned options \code{mo_source} and \code{mo_source_datetime}), it will call \code{\link[=set_mo_source]{set_mo_source()}} to update the data file automatically. Reading an Excel file (\code{.xlsx}) with only one row has a size of 8-9 kB. The compressed file created with \code{\link[=set_mo_source]{set_mo_source()}} will then have a size of 0.1 kB and can be read by \code{\link[=get_mo_source]{get_mo_source()}} in only a couple of microseconds (millionths of a second). } diff --git a/man/proportion.Rd b/man/proportion.Rd index a6cfaa005..7bb61c266 100644 --- a/man/proportion.Rd +++ b/man/proportion.Rd @@ -60,7 +60,7 @@ rsi_df( \item{only_all_tested}{(for combination therapies, i.e. using more than one variable for \code{...}): a logical to indicate that isolates must be tested for all antibiotics, see section \emph{Combination therapy} below} -\item{data}{a \code{\link{data.frame}} containing columns with class \code{\link{rsi}} (see \code{\link[=as.rsi]{as.rsi()}})} +\item{data}{a \link{data.frame} containing columns with class \code{\link{rsi}} (see \code{\link[=as.rsi]{as.rsi()}})} \item{translate_ab}{a column name of the \link{antibiotics} data set to translate the antibiotic abbreviations to, using \code{\link[=ab_property]{ab_property()}}. Use a value} @@ -71,7 +71,7 @@ rsi_df( \item{combine_IR}{a logical to indicate whether all values of I and R must be merged into one, so the output only consists of S vs. I+R (susceptible vs. non-susceptible). This is outdated, see parameter \code{combine_SI}.} } \value{ -A \code{\link{double}} or, when \code{as_percent = TRUE}, a \code{\link{character}}. +A \link{double} or, when \code{as_percent = TRUE}, a \link{character}. } \description{ These functions can be used to calculate the (co-)resistance or susceptibility of microbial isolates (i.e. percentage of S, SI, I, IR or R). All functions support quasiquotation with pipes, can be used in \code{summarise()} from the \code{dplyr} package and also support grouped variables, please see \emph{Examples}. diff --git a/man/resistance_predict.Rd b/man/resistance_predict.Rd index beb368bba..651490aed 100644 --- a/man/resistance_predict.Rd +++ b/man/resistance_predict.Rd @@ -47,7 +47,7 @@ ggplot_rsi_predict( ) } \arguments{ -\item{x}{a \code{\link{data.frame}} containing isolates.} +\item{x}{a \link{data.frame} containing isolates.} \item{col_ab}{column name of \code{x} containing antimicrobial interpretations (\code{"R"}, \code{"I"} and \code{"S"})} @@ -76,7 +76,7 @@ ggplot_rsi_predict( \item{ribbon}{a logical to indicate whether a ribbon should be shown (default) or error bars} } \value{ -A \code{\link{data.frame}} with extra class \code{\link{resistance_predict}} with columns: +A \link{data.frame} with extra class \code{\link{resistance_predict}} with columns: \itemize{ \item \code{year} \item \code{value}, the same as \code{estimated} when \code{preserve_measurements = FALSE}, and a combination of \code{observed} and \code{estimated} otherwise diff --git a/man/rsi_translation.Rd b/man/rsi_translation.Rd index fc775b903..36f40bb8a 100644 --- a/man/rsi_translation.Rd +++ b/man/rsi_translation.Rd @@ -5,7 +5,7 @@ \alias{rsi_translation} \title{Data set for R/SI interpretation} \format{ -A \code{\link{data.frame}} with 18,650 observations and 10 variables: +A \link{data.frame} with 18,650 observations and 10 variables: \itemize{ \item \code{guideline}\cr Name of the guideline \item \code{method}\cr Either "MIC" or "DISK" diff --git a/man/skewness.Rd b/man/skewness.Rd index ca24255ac..08c0b0773 100644 --- a/man/skewness.Rd +++ b/man/skewness.Rd @@ -16,7 +16,7 @@ skewness(x, na.rm = FALSE) \method{skewness}{data.frame}(x, na.rm = FALSE) } \arguments{ -\item{x}{a vector of values, a \code{\link{matrix}} or a \code{\link{data.frame}}} +\item{x}{a vector of values, a \code{\link{matrix}} or a \link{data.frame}} \item{na.rm}{a logical value indicating whether \code{NA} values should be stripped before the computation proceeds.} } diff --git a/tests/testthat/test-_all_examples.R b/tests/testthat/test-_all_examples.R new file mode 100755 index 000000000..6997d3db5 --- /dev/null +++ b/tests/testthat/test-_all_examples.R @@ -0,0 +1,37 @@ +# ==================================================================== # +# TITLE # +# Antimicrobial Resistance (AMR) Analysis # +# # +# SOURCE # +# https://github.com/msberends/AMR # +# # +# LICENCE # +# (c) 2018-2020 Berends MS, Luz CF et al. # +# # +# This R package is free software; you can freely use and distribute # +# it for both personal and commercial purposes under the terms of the # +# GNU General Public License version 2.0 (GNU GPL-2), as published by # +# the Free Software Foundation. # +# # +# We created this package for both routine data analysis and academic # +# research and it was publicly released in the hope that it will be # +# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. # +# Visit our website for more info: https://msberends.github.io/AMR. # +# ==================================================================== # + +# context("All examples") +# +# # run all examples (will take forever) +# exported_functions <- ls("package:AMR") +# +# for (i in seq_len(length(exported_functions))) { +# test_that(paste(exported_functions[i], "works"), { +# skip_on_cran() +# expect_output(suppressWarnings(example(exported_functions[i], +# package = "AMR", +# give.lines = TRUE, +# run.dontrun = TRUE, +# run.donttest = TRUE)), +# label = paste0("Examples of function ", exported_functions[i])) +# }) +# } diff --git a/tests/testthat/test-ab.R b/tests/testthat/test-ab.R index a3ce9556b..9549e40c2 100755 --- a/tests/testthat/test-ab.R +++ b/tests/testthat/test-ab.R @@ -37,7 +37,7 @@ test_that("as.ab works", { rep("ERY", 10)) expect_identical(class(as.ab("amox")), c("ab", "character")) - expect_identical(class(pull(antibiotics, ab)), c("ab", "character")) + expect_identical(class(antibiotics$ab), c("ab", "character")) expect_true(is.ab(as.ab("amox"))) expect_output(print(as.ab("amox"))) expect_output(print(data.frame(a = as.ab("amox")))) diff --git a/tests/testthat/test-ab_property.R b/tests/testthat/test-ab_property.R index 8ca75e0cb..1a5857323 100644 --- a/tests/testthat/test-ab_property.R +++ b/tests/testthat/test-ab_property.R @@ -41,7 +41,7 @@ test_that("ab_property works", { expect_identical(ab_name(21319, language = NULL), "Flucloxacillin") expect_identical(ab_name("J01CF05", language = NULL), "Flucloxacillin") - expect_identical(ab_ddd("AMX", "oral"), 1) + expect_identical(ab_ddd("AMX", "oral"), 1.5) expect_identical(ab_ddd("AMX", "oral", units = TRUE), "g") expect_identical(ab_ddd("AMX", "iv"), 1) expect_identical(ab_ddd("AMX", "iv", units = TRUE), "g") diff --git a/tests/testthat/test-mic.R b/tests/testthat/test-mic.R index 9b0fac82c..c96985708 100755 --- a/tests/testthat/test-mic.R +++ b/tests/testthat/test-mic.R @@ -42,9 +42,10 @@ test_that("mic works", { barplot(as.mic(c(1, 2, 4, 8))) plot(as.mic(c(1, 2, 4, 8))) print(as.mic(c(1, 2, 4, 8))) - - expect_equal(summary(as.mic(c(2, 8))), c("Class" = "mic", - "" = "0", - "Min." = "2", - "Max." = "8")) + + expect_equal(summary(as.mic(c(2, 8))), + structure(c("Class" = "mic", + "" = "0", + "Min." = "2", + "Max." = "8"), class = c("summaryDefault", "table"))) }) diff --git a/tests/testthat/test-rsi.R b/tests/testthat/test-rsi.R index 2dc37a0cf..e1caf561f 100644 --- a/tests/testthat/test-rsi.R +++ b/tests/testthat/test-rsi.R @@ -81,7 +81,7 @@ test_that("mic2rsi works", { mo = "B_STRPT_PNMN", ab = "AMX", guideline = "EUCAST")), - "R") + "I") expect_true(example_isolates %>% mutate(amox_mic = as.mic(2)) %>%