1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-08 23:21:56 +02:00

(v1.3.0.9022) mo_matching_score(), poorman update, as.rsi() fix

This commit is contained in:
2020-09-18 16:05:53 +02:00
parent 89401ede9f
commit 4e40e42011
138 changed files with 2923 additions and 1472 deletions

View File

@ -19,41 +19,12 @@
# Visit our website for more info: https://msberends.github.io/AMR. #
# ==================================================================== #
# functions from dplyr, will perhaps become poorman
distinct <- function(.data, ..., .keep_all = FALSE) {
check_is_dataframe(.data)
if ("grouped_data" %in% class(.data)) {
distinct.grouped_data(.data, ..., .keep_all = .keep_all)
} else {
distinct.default(.data, ..., .keep_all = .keep_all)
}
}
distinct.default <- function(.data, ..., .keep_all = FALSE) {
names <- rownames(.data)
rownames(.data) <- NULL
if (length(deparse_dots(...)) == 0) {
selected <- .data
} else {
selected <- select(.data, ...)
}
rows <- as.integer(rownames(unique(selected)))
if (isTRUE(.keep_all)) {
res <- .data[rows, , drop = FALSE]
} else {
res <- selected[rows, , drop = FALSE]
}
rownames(res) <- names[rows]
res
}
distinct.grouped_data <- function(.data, ..., .keep_all = FALSE) {
apply_grouped_function(.data, "distinct", ..., .keep_all = .keep_all)
}
# faster implementation of left_join than using merge() by poorman - we use match():
left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) {
pm_left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) {
if (is.null(by)) {
by <- intersect(names(x), names(y))[1L]
if (is.na(by)) {
stop_("no common column found for left_join()")
stop_("no common column found for pm_left_join()")
}
join_message(by)
} else if (!is.null(names(by))) {
@ -77,17 +48,28 @@ left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) {
rownames(merged) <- NULL
merged
}
filter_join_worker <- function(x, y, by = NULL, type = c("anti", "semi")) {
type <- match.arg(type, choices = c("anti", "semi"), several.ok = FALSE)
if (is.null(by)) {
by <- intersect(names(x), names(y))
join_message(by)
# pm_filter_join_worker <- function(x, y, by = NULL, type = c("anti", "semi")) {
# type <- match.arg(type, choices = c("anti", "semi"), several.ok = FALSE)
# if (is.null(by)) {
# by <- intersect(names(x), names(y))
# join_message(by)
# }
# rows <- interaction(x[, by]) %in% interaction(y[, by])
# if (type == "anti") rows <- !rows
# res <- x[rows, , drop = FALSE]
# rownames(res) <- NULL
# res
# }
quick_case_when <- function(...) {
vectors <- list(...)
split <- lapply(vectors, function(x) unlist(strsplit(paste(deparse(x), collapse = ""), "~", fixed = TRUE)))
for (i in seq_len(length(vectors))) {
if (eval(parse(text = split[[i]][1]), envir = parent.frame())) {
return(eval(parse(text = split[[i]][2]), envir = parent.frame()))
}
}
rows <- interaction(x[, by]) %in% interaction(y[, by])
if (type == "anti") rows <- !rows
res <- x[rows, , drop = FALSE]
rownames(res) <- NULL
res
return(NA)
}
# No export, no Rd
@ -165,7 +147,7 @@ search_type_in_df <- function(x, type) {
if (any(colnames(x) %like% "^(specimen date|specimen_date|spec_date)")) {
# WHONET support
found <- sort(colnames(x)[colnames(x) %like% "^(specimen date|specimen_date|spec_date)"])[1]
if (!any(class(pull(x, found)) %in% c("Date", "POSIXct"))) {
if (!any(class(pm_pull(x, found)) %in% c("Date", "POSIXct"))) {
stop(font_red(paste0("ERROR: Found column `", font_bold(found), "` to be used as input for `col_", type,
"`, but this column contains no valid dates. Transform its values to valid dates first.")),
call. = FALSE)
@ -461,7 +443,7 @@ font_stripstyle <- function(x) {
gsub("(?:(?:\\x{001b}\\[)|\\x{009b})(?:(?:[0-9]{1,3})?(?:(?:;[0-9]{0,3})*)?[A-M|f-m])|\\x{001b}[A-M]", "", x, perl = TRUE)
}
progress_estimated <- function(n = 1, n_min = 0, ...) {
progress_ticker <- function(n = 1, n_min = 0, ...) {
if (!interactive() || n < n_min) {
pb <- list()
pb$tick <- function() {

View File

@ -1,775 +0,0 @@
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# SOURCE #
# https://github.com/msberends/AMR #
# #
# LICENCE #
# (c) 2018-2020 Berends MS, Luz CF et al. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
# Visit our website for more info: https://msberends.github.io/AMR. #
# ==================================================================== #
# ------------------------------------------------
# THIS FILE WAS CREATED AUTOMATICALLY!
# Source file: data-raw/reproduction_of_poorman.R
# ------------------------------------------------
# Poorman: a package to replace all dplyr functions with base R so we can lose dependency on dplyr.
# These functions were downloaded from https://github.com/nathaneastwood/poorman,
# from this commit: https://github.com/nathaneastwood/poorman/tree/7d76d77f8f7bc663bf30fb5a161abb49801afa17
#
# All code below was released under MIT license, that permits 'free of charge, to any person obtaining a
# copy of the software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
# is furnished to do so', given that a copyright notice is given in the software.
#
# Copyright notice as found on https://github.com/nathaneastwood/poorman/blob/master/LICENSE on 2 May 2020:
# YEAR: 2020
# COPYRIGHT HOLDER: Nathan Eastwood
arrange <- function(.data, ...) {
check_is_dataframe(.data)
if ("grouped_data" %in% class(.data)) {
arrange.grouped_data(.data, ...)
} else {
arrange.default(.data, ...)
}
}
arrange.default <- function(.data, ...) {
rows <- eval.parent(substitute(with(.data, order(...))))
.data[rows, , drop = FALSE]
}
arrange.grouped_data <- function(.data, ...) {
apply_grouped_function(.data, "arrange", ...)
}
between <- function(x, left, right) {
if (!is.null(attr(x, "class")) && !inherits(x, c("Date", "POSIXct"))) {
warning("`between()` called on numeric vector with S3 class")
}
if (!is.double(x)) x <- as.numeric(x)
x >= as.numeric(left) & x <= as.numeric(right)
}
count <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) {
groups <- get_groups(x)
if (!missing(...)) x <- group_by(x, ..., .add = TRUE)
wt <- deparse_var(wt)
res <- do.call(tally, list(x, wt, sort, name))
if (length(groups) > 0L) res <- do.call(group_by, list(res, as.name(groups)))
res
}
tally <- function(x, wt = NULL, sort = FALSE, name = NULL) {
name <- check_name(x, name)
wt <- deparse_var(wt)
res <- do.call(summarise, set_names(list(x, as.name(tally_n(x, wt))), c(".data", name)))
res <- ungroup(res)
if (isTRUE(sort)) res <- do.call(arrange, list(res, call("desc", as.name(name))))
rownames(res) <- NULL
res
}
add_count <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) {
name <- check_name(x, name)
row_names <- rownames(x)
wt <- deparse_var(wt)
if (!missing(...)) x <- group_by(x, ..., .add = TRUE)
res <- do.call(add_tally, list(x, wt, sort, name))
res[row_names, ]
}
add_tally <- function(x, wt = NULL, sort = FALSE, name = NULL) {
wt <- deparse_var(wt)
n <- tally_n(x, wt)
name <- check_name(x, name)
res <- do.call(mutate, set_names(list(x, as.name(n)), c(".data", name)))
if (isTRUE(sort)) {
do.call(arrange, list(res, call("desc", as.name(name))))
} else {
res
}
}
tally_n <- function(x, wt) {
if (is.null(wt) && "n" %in% colnames(x)) {
message("Using `n` as weighting variable")
wt <- "n"
}
context$.data <- x
on.exit(rm(list = ".data", envir = context))
if (is.null(wt)) {
"n()"
} else {
paste0("sum(", wt, ", na.rm = TRUE)")
}
}
check_name <- function(df, name) {
if (is.null(name)) {
if ("n" %in% colnames(df)) {
stop(
"Column 'n' is already present in output\n",
"* Use `name = \"new_name\"` to pick a new name"
)
}
return("n")
}
if (!is.character(name) || length(name) != 1) {
stop("`name` must be a single string")
}
name
}
desc <- function(x) -xtfrm(x)
select_env <- new.env()
peek_vars <- function() {
get(".col_names", envir = select_env)
}
context <- new.env()
n <- function() {
do.call(nrow, list(quote(.data)), envir = context)
}
filter <- function(.data, ...) {
check_is_dataframe(.data)
if ("grouped_data" %in% class(.data)) {
filter.grouped_data(.data, ...)
} else {
filter.default(.data, ...)
}
}
filter.default <- function(.data, ...) {
conditions <- paste(deparse_dots(...), collapse = " & ")
context$.data <- .data
on.exit(rm(.data, envir = context))
.data[do.call(with, list(.data, str2lang(unname(conditions)))), ]
}
filter.grouped_data <- function(.data, ...) {
rows <- rownames(.data)
res <- apply_grouped_function(.data, "filter", ...)
res[rows[rows %in% rownames(res)], ]
}
group_by <- function(.data, ..., .add = FALSE) {
check_is_dataframe(.data)
pre_groups <- get_groups(.data)
groups <- deparse_dots(...)
if (isTRUE(.add)) groups <- unique(c(pre_groups, groups))
unknown <- !(groups %in% colnames(.data))
if (any(unknown)) stop("Invalid groups: ", groups[unknown])
structure(.data, class = c("grouped_data", class(.data)), groups = groups)
}
ungroup <- function(x, ...) {
check_is_dataframe(x)
rm_groups <- deparse_dots(...)
groups <- attr(x, "groups")
if (length(rm_groups) == 0L) rm_groups <- groups
attr(x, "groups") <- groups[!(groups %in% rm_groups)]
if (length(attr(x, "groups")) == 0L) {
attr(x, "groups") <- NULL
class(x) <- class(x)[!(class(x) %in% "grouped_data")]
}
x
}
get_groups <- function(x) {
attr(x, "groups", exact = TRUE)
}
has_groups <- function(x) {
groups <- get_groups(x)
if (is.null(groups)) FALSE else TRUE
}
set_groups <- function(x, groups) {
attr(x, "groups") <- groups
x
}
apply_grouped_function <- function(.data, fn, ...) {
groups <- get_groups(.data)
grouped <- split_into_groups(.data, groups)
res <- do.call(rbind, unname(lapply(grouped, fn, ...)))
if (any(groups %in% colnames(res))) {
class(res) <- c("grouped_data", class(res))
attr(res, "groups") <- groups[groups %in% colnames(res)]
}
res
}
split_into_groups <- function(.data, groups) {
class(.data) <- "data.frame"
group_factors <- lapply(groups, function(x, .data) as.factor(.data[, x]), .data)
res <- split(x = .data, f = group_factors)
res
}
print.grouped_data <- function(x, ..., digits = NULL, quote = FALSE, right = TRUE, row.names = TRUE, max = NULL) {
class(x) <- "data.frame"
print(x, ..., digits = digits, quote = quote, right = right, row.names = row.names, max = max)
cat("\nGroups: ", paste(attr(x, "groups", exact = TRUE), collapse = ", "), "\n\n")
}
if_else <- function(condition, true, false, missing = NULL) {
if (!is.logical(condition)) stop("`condition` must be a logical vector.")
cls_true <- class(true)
cls_false <- class(false)
cls_missing <- class(missing)
if (!identical(cls_true, cls_false)) {
stop("The class of `true` <", class(true), "> is not the same as the class of `false` <", class(false), ">")
}
if (!is.null(missing) && !identical(cls_true, cls_missing)) {
stop("`missing` must be a ", cls_true, " vector, not a ", cls_missing, " vector.")
}
res <- ifelse(condition, true, false)
if (!is.null(missing)) res[is.na(res)] <- missing
attributes(res) <- attributes(true)
res
}
inner_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) {
join_worker(x = x, y = y, by = by, suffix = suffix, sort = FALSE)
}
# left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) {
# join_worker(x = x, y = y, by = by, suffix = suffix, all.x = TRUE)
# }
right_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) {
join_worker(x = x, y = y, by = by, suffix = suffix, all.y = TRUE)
}
full_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) {
join_worker(x = x, y = y, by = by, suffix = suffix, all = TRUE)
}
join_worker <- function(x, y, by = NULL, suffix = c(".x", ".y"), ...) {
x[, ".join_id"] <- seq_len(nrow(x))
if (is.null(by)) {
by <- intersect(names(x), names(y))
join_message(by)
merged <- merge(x = x, y = y, by = by, suffixes = suffix, ...)[, union(names(x), names(y))]
} else if (is.null(names(by))) {
merged <- merge(x = x, y = y, by = by, suffixes = suffix, ...)
} else {
merged <- merge(x = x, y = y, by.x = names(by), by.y = by, suffixes = suffix, ...)
}
merged <- merged[order(merged[, ".join_id"]), colnames(merged) != ".join_id"]
rownames(merged) <- NULL
merged
}
join_message <- function(by) {
if (length(by) > 1L) {
message("Joining, by = c(\"", paste0(by, collapse = "\", \""), "\")\n", sep = "")
} else {
message("Joining, by = \"", by, "\"\n", sep = "")
}
}
anti_join <- function(x, y, by = NULL) {
filter_join_worker(x, y, by, type = "anti")
}
semi_join <- function(x, y, by = NULL) {
filter_join_worker(x, y, by, type = "semi")
}
# filter_join_worker <- function(x, y, by = NULL, type = c("anti", "semi")) {
# type <- match.arg(type, choices = c("anti", "semi"), several.ok = FALSE)
# if (is.null(by)) {
# by <- intersect(names(x), names(y))
# join_message(by)
# }
# rows <- interaction(x[, by]) %in% interaction(y[, by])
# if (type == "anti") rows <- !rows
# res <- x[rows, ]
# rownames(res) <- NULL
# res
# }
lag <- function (x, n = 1L, default = NA) {
if (inherits(x, "ts")) stop("`x` must be a vector, not a `ts` object, do you want `stats::lag()`?")
if (length(n) != 1L || !is.numeric(n) || n < 0L) stop("`n` must be a nonnegative integer scalar")
if (n == 0L) return(x)
tryCatch(
storage.mode(default) <- typeof(x),
warning = function(w) {
stop("Cannot convert `default` <", typeof(default), "> to `x` <", typeof(x), ">")
}
)
xlen <- length(x)
n <- pmin(n, xlen)
res <- c(rep(default, n), x[seq_len(xlen - n)])
attributes(res) <- attributes(x)
res
}
lead <- function (x, n = 1L, default = NA) {
if (length(n) != 1L || !is.numeric(n) || n < 0L) stop("n must be a nonnegative integer scalar")
if (n == 0L) return(x)
tryCatch(
storage.mode(default) <- typeof(x),
warning = function(w) {
stop("Cannot convert `default` <", typeof(default), "> to `x` <", typeof(x), ">")
}
)
xlen <- length(x)
n <- pmin(n, xlen)
res <- c(x[-seq_len(n)], rep(default, n))
attributes(res) <- attributes(x)
res
}
mutate <- function(.data, ...) {
check_is_dataframe(.data)
if ("grouped_data" %in% class(.data)) {
mutate.grouped_data(.data, ...)
} else {
mutate.default(.data, ...)
}
}
mutate.default <- function(.data, ...) {
conditions <- deparse_dots(...)
cond_names <- names(conditions)
unnamed <- which(nchar(cond_names) == 0L)
if (is.null(cond_names)) {
names(conditions) <- conditions
} else if (length(unnamed) > 0L) {
names(conditions)[unnamed] <- conditions[unnamed]
}
not_matched <- names(conditions)[!names(conditions) %in% names(.data)]
.data[, not_matched] <- NA
context$.data <- .data
on.exit(rm(.data, envir = context))
for (i in seq_along(conditions)) {
.data[, names(conditions)[i]] <- do.call(with, list(.data, str2lang(unname(conditions)[i])))
}
.data
}
mutate.grouped_data <- function(.data, ...) {
rows <- rownames(.data)
res <- apply_grouped_function(.data, "mutate", ...)
res[rows, ]
}
n_distinct <- function(..., na.rm = FALSE) {
res <- c(...)
if (is.list(res)) return(nrow(unique(as.data.frame(res, stringsAsFactors = FALSE))))
if (isTRUE(na.rm)) res <- res[!is.na(res)]
length(unique(res))
}
`%>%` <- function(lhs, rhs) {
lhs <- substitute(lhs)
rhs <- substitute(rhs)
eval(as.call(c(rhs[[1L]], lhs, as.list(rhs[-1L]))), envir = parent.frame())
}
pull <- function(.data, var = -1) {
var_deparse <- deparse_var(var)
col_names <- colnames(.data)
if (!(var_deparse %in% col_names) & grepl("^[[:digit:]]+L|[[:digit:]]", var_deparse)) {
var <- as.integer(gsub("L", "", var_deparse))
var <- if_else(var < 1L, rev(col_names)[abs(var)], col_names[var])
} else if (var_deparse %in% col_names) {
var <- var_deparse
}
.data[, var]
}
relocate <- function(.data, ..., .before = NULL, .after = NULL) {
check_is_dataframe(.data)
data_names <- colnames(.data)
col_pos <- select_positions(.data, ...)
.before <- deparse_var(.before)
.after <- deparse_var(.after)
has_before <- !is.null(.before)
has_after <- !is.null(.after)
if (has_before && has_after) {
stop("You must supply only one of `.before` and `.after`")
} else if (has_before) {
where <- min(match(.before, data_names))
col_pos <- c(setdiff(col_pos, where), where)
} else if (has_after) {
where <- max(match(.after, data_names))
col_pos <- c(where, setdiff(col_pos, where))
} else {
where <- 1L
col_pos <- union(col_pos, where)
}
lhs <- setdiff(seq(1L, where - 1L), col_pos)
rhs <- setdiff(seq(where + 1L, ncol(.data)), col_pos)
col_pos <- unique(c(lhs, col_pos, rhs))
col_pos <- col_pos[col_pos <= length(data_names)]
res <- .data[col_pos]
if (has_groups(.data)) res <- set_groups(res, get_groups(.data))
res
}
rename <- function(.data, ...) {
check_is_dataframe(.data)
new_names <- names(deparse_dots(...))
if (length(new_names) == 0L) {
warning("You didn't give any new names")
return(.data)
}
col_pos <- select_positions(.data, ...)
old_names <- colnames(.data)[col_pos]
new_names_zero <- nchar(new_names) == 0L
if (any(new_names_zero)) {
warning("You didn't provide new names for: ", paste0("`", old_names[new_names_zero], collapse = ", "), "`")
new_names[new_names_zero] <- old_names[new_names_zero]
}
colnames(.data)[col_pos] <- new_names
.data
}
rownames_to_column <- function(.data, var = "rowname") {
check_is_dataframe(.data)
col_names <- colnames(.data)
if (var %in% col_names) stop("Column `", var, "` already exists in `.data`")
.data[, var] <- rownames(.data)
rownames(.data) <- NULL
.data[, c(var, setdiff(col_names, var))]
}
select <- function(.data, ...) {
map <- names(deparse_dots(...))
col_pos <- select_positions(.data, ..., group_pos = TRUE)
res <- .data[, col_pos, drop = FALSE]
to_map <- nchar(map) > 0L
colnames(res)[to_map] <- map[to_map]
if (has_groups(.data)) res <- set_groups(res, get_groups(.data))
res
}
starts_with <- function(match, ignore.case = TRUE, vars = peek_vars()) {
grep(pattern = paste0("^", paste0(match, collapse = "|^")), x = vars, ignore.case = ignore.case)
}
ends_with <- function(match, ignore.case = TRUE, vars = peek_vars()) {
grep(pattern = paste0(paste0(match, collapse = "$|"), "$"), x = vars, ignore.case = ignore.case)
}
contains <- function(match, ignore.case = TRUE, vars = peek_vars()) {
matches <- lapply(
match,
function(x) {
if (isTRUE(ignore.case)) {
match_u <- toupper(x)
match_l <- tolower(x)
pos_u <- grep(pattern = match_u, x = toupper(vars), fixed = TRUE)
pos_l <- grep(pattern = match_l, x = tolower(vars), fixed = TRUE)
unique(c(pos_l, pos_u))
} else {
grep(pattern = x, x = vars, fixed = TRUE)
}
}
)
unique(matches)
}
matches <- function(match, ignore.case = TRUE, perl = FALSE, vars = peek_vars()) {
grep(pattern = match, x = vars, ignore.case = ignore.case, perl = perl)
}
num_range <- function(prefix, range, width = NULL, vars = peek_vars()) {
if (!is.null(width)) {
range <- sprintf(paste0("%0", width, "d"), range)
}
find <- paste0(prefix, range)
if (any(duplicated(vars))) {
stop("Column names must be unique")
} else {
x <- match(find, vars)
x[!is.na(x)]
}
}
all_of <- function(x, vars = peek_vars()) {
x_ <- !x %in% vars
if (any(x_)) {
which_x_ <- which(x_)
if (length(which_x_) == 1L) {
stop("The column ", x[which_x_], " does not exist.")
} else {
stop("The columns ", paste(x[which_x_], collapse = ", "), " do not exist.")
}
} else {
which(vars %in% x)
}
}
any_of <- function(x, vars = peek_vars()) {
which(vars %in% x)
}
everything <- function(vars = peek_vars()) {
seq_along(vars)
}
last_col <- function(offset = 0L, vars = peek_vars()) {
if (!is_wholenumber(offset)) stop("`offset` must be an integer")
n <- length(vars)
if (offset && n <= offset) {
stop("`offset` must be smaller than the number of `vars`")
} else if (n == 0) {
stop("Can't select last column when `vars` is empty")
} else {
n - offset
}
}
select_positions <- function(.data, ..., group_pos = FALSE) {
cols <- eval(substitute(alist(...)))
data_names <- colnames(.data)
select_env$.col_names <- data_names
on.exit(rm(list = ".col_names", envir = select_env))
exec_env <- parent.frame(2L)
pos <- unlist(lapply(cols, eval_expr, exec_env = exec_env))
if (isTRUE(group_pos)) {
groups <- get_groups(.data)
missing_groups <- !(groups %in% cols)
if (any(missing_groups)) {
message("Adding missing grouping variables: `", paste(groups[missing_groups], collapse = "`, `"), "`")
pos <- c(match(groups[missing_groups], data_names), pos)
}
}
unique(pos)
}
eval_expr <- function(x, exec_env) {
type <- typeof(x)
switch(
type,
"integer" = x,
"double" = as.integer(x),
"character" = select_char(x),
"symbol" = select_symbol(x, exec_env = exec_env),
"language" = eval_call(x),
stop("Expressions of type <", typeof(x), "> cannot be evaluated for use when subsetting.")
)
}
select_char <- function(expr) {
pos <- match(expr, select_env$.col_names)
if (is.na(pos)) stop("Column `", expr, "` does not exist")
pos
}
select_symbol <- function(expr, exec_env) {
res <- try(select_char(as.character(expr)), silent = TRUE)
if (inherits(res, "try-error")) {
res <- tryCatch(
select_char(eval(expr, envir = exec_env)),
error = function(e) stop("Column ", expr, " does not exist.")
)
}
res
}
eval_call <- function(x) {
type <- as.character(x[[1]])
switch(
type,
`:` = select_seq(x),
`!` = select_negate(x),
`-` = select_minus(x),
`c` = select_c(x),
`(` = select_bracket(x),
select_context(x)
)
}
select_seq <- function(expr) {
x <- eval_expr(expr[[2]])
y <- eval_expr(expr[[3]])
x:y
}
select_negate <- function(expr) {
x <- if (is_negated_colon(expr)) {
expr <- call(":", expr[[2]][[2]], expr[[2]][[3]][[2]])
eval_expr(expr)
} else {
eval_expr(expr[[2]])
}
x * -1L
}
is_negated_colon <- function(expr) {
expr[[1]] == "!" && length(expr[[2]]) > 1L && expr[[2]][[1]] == ":" && expr[[2]][[3]][[1]] == "!"
}
select_minus <- function(expr) {
x <- eval_expr(expr[[2]])
x * -1L
}
select_c <- function(expr) {
lst_expr <- as.list(expr)
lst_expr[[1]] <- NULL
unlist(lapply(lst_expr, eval_expr))
}
select_bracket <- function(expr) {
eval_expr(expr[[2]])
}
select_context <- function(expr) {
eval(expr, envir = context$.data)
}
slice <- function(.data, ...) {
check_is_dataframe(.data)
if ("grouped_data" %in% class(.data)) {
slice.grouped_data(.data, ...)
} else {
slice.default(.data, ...)
}
}
slice.default <- function(.data, ...) {
rows <- c(...)
stopifnot(is.numeric(rows) | is.integer(rows))
if (all(rows > 0L)) rows <- rows[rows <= nrow(.data)]
.data[rows, ]
}
slice.grouped_data <- function(.data, ...) {
apply_grouped_function(.data, "slice", ...)
}
summarise <- function(.data, ...) {
check_is_dataframe(.data)
if ("grouped_data" %in% class(.data)) {
summarise.grouped_data(.data, ...)
} else {
summarise.default(.data, ...)
}
}
summarise.default <- function(.data, ...) {
fns <- vapply(substitute(...()), deparse, NA_character_)
context$.data <- .data
on.exit(rm(.data, envir = context))
if (has_groups(.data)) {
group <- unique(.data[, get_groups(.data), drop = FALSE])
if (nrow(group) == 0L) return(NULL)
}
res <- lapply(fns, function(x) do.call(with, list(.data, str2lang(x))))
res <- as.data.frame(res)
fn_names <- names(fns)
colnames(res) <- if (is.null(fn_names)) fns else fn_names
if (has_groups(.data)) res <- cbind(group, res)
res
}
summarise.grouped_data <- function(.data, ...) {
groups <- get_groups(.data)
res <- apply_grouped_function(.data, "summarise", ...)
res <- res[do.call(order, lapply(groups, function(x) res[, x])), ]
rownames(res) <- NULL
res
}
summarize <- summarise
summarize.default <- summarise.default
summarize.grouped_data <- summarise.grouped_data
transmute <- function(.data, ...) {
check_is_dataframe(.data)
if ("grouped_data" %in% class(.data)) {
transmute.grouped_data(.data, ...)
} else {
transmute.default(.data, ...)
}
}
transmute.default <- function(.data, ...) {
conditions <- deparse_dots(...)
mutated <- mutate(.data, ...)
mutated[, names(conditions), drop = FALSE]
}
transmute.grouped_data <- function(.data, ...) {
rows <- rownames(.data)
res <- apply_grouped_function(.data, "transmute", ...)
res[rows, ]
}
deparse_dots <- function(...) {
vapply(substitute(...()), deparse, NA_character_)
}
deparse_var <- function(var) {
sub_var <- eval(substitute(substitute(var)), parent.frame())
if (is.symbol(sub_var)) var <- as.character(sub_var)
var
}
check_is_dataframe <- function(.data) {
parent_fn <- all.names(sys.call(-1L), max.names = 1L)
if (!is.data.frame(.data)) stop(parent_fn, " must be given a data.frame")
invisible()
}
is_wholenumber <- function(x) {
x %% 1L == 0L
}
set_names <- function(object = nm, nm) {
names(object) <- nm
object
}
cume_dist <- function(x) {
rank(x, ties.method = "max", na.last = "keep") / sum(!is.na(x))
}
dense_rank <- function(x) {
match(x, sort(unique(x)))
}
min_rank <- function(x) {
rank(x, ties.method = "min", na.last = "keep")
}
ntile <- function (x = row_number(), n) {
if (!missing(x)) x <- row_number(x)
len <- length(x) - sum(is.na(x))
n <- as.integer(floor(n))
if (len == 0L) {
rep(NA_integer_, length(x))
} else {
n_larger <- as.integer(len %% n)
n_smaller <- as.integer(n - n_larger)
size <- len / n
larger_size <- as.integer(ceiling(size))
smaller_size <- as.integer(floor(size))
larger_threshold <- larger_size * n_larger
bins <- if_else(
x <= larger_threshold,
(x + (larger_size - 1L)) / larger_size,
(x + (-larger_threshold + smaller_size - 1L)) / smaller_size + n_larger
)
as.integer(floor(bins))
}
}
percent_rank <- function(x) {
(min_rank(x) - 1) / (sum(!is.na(x)) - 1)
}
row_number <- function(x) {
if (missing(x)) seq_len(n()) else rank(x, ties.method = "first", na.last = "keep")
}

1589
R/aa_helper_pm_functions.R Normal file

File diff suppressed because it is too large Load Diff

80
R/ab.R
View File

@ -47,9 +47,9 @@
#'
#' European Commission Public Health PHARMACEUTICALS - COMMUNITY REGISTER: \url{http://ec.europa.eu/health/documents/community-register/html/atc.htm}
#' @aliases ab
#' @return Character (vector) with class [`ab`]. Unknown values will return `NA`.
#' @return A [character] [vector] with additional class [`ab`]
#' @seealso
#' * [antibiotics] for the dataframe that is being used to determine ATCs
#' * [antibiotics] for the [data.frame] that is being used to determine ATCs
#' * [ab_from_text()] for a function to retrieve antimicrobial drugs from clinical text (from health care records)
#' @inheritSection AMR Reference data publicly available
#' @inheritSection AMR Read more on our website!
@ -101,23 +101,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
x <- gsub('"', "", x, fixed = TRUE)
x_bak_clean <- x
if (already_regex == FALSE) {
# remove suffices
x_bak_clean <- gsub("_(MIC|RSI|DIS[CK])$", "", x_bak_clean)
# remove disk concentrations, like LVX_NM -> LVX
x_bak_clean <- gsub("_[A-Z]{2}[0-9_.]{0,3}$", "", x_bak_clean)
# remove part between brackets if that's followed by another string
x_bak_clean <- gsub("(.*)+ [(].*[)]", "\\1", x_bak_clean)
# keep only max 1 space
x_bak_clean <- trimws(gsub(" +", " ", x_bak_clean))
# non-character, space or number should be a slash
x_bak_clean <- gsub("[^A-Z0-9 -]", "/", x_bak_clean)
# spaces around non-characters must be removed: amox + clav -> amox/clav
x_bak_clean <- gsub("(.*[A-Z0-9]) ([^A-Z0-9].*)", "\\1\\2", x_bak_clean)
x_bak_clean <- gsub("(.*[^A-Z0-9]) ([A-Z0-9].*)", "\\1\\2", x_bak_clean)
# remove hyphen after a starting "co"
x_bak_clean <- gsub("^CO-", "CO", x_bak_clean)
# replace text 'and' with a slash
x_bak_clean <- gsub(" AND ", "/", x_bak_clean)
x_bak_clean <- generalise_antibiotic_name(x_bak_clean)
}
x <- unique(x_bak_clean)
@ -133,7 +117,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
}
if (initial_search == TRUE) {
progress <- progress_estimated(n = length(x), n_min = ifelse(isTRUE(info), 25, length(x) + 1)) # start if n >= 25
progress <- progress_ticker(n = length(x), n_min = ifelse(isTRUE(info), 25, length(x) + 1)) # start if n >= 25
on.exit(close(progress))
}
@ -161,7 +145,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
}
# exact name
found <- antibiotics[which(toupper(antibiotics$name) == x[i]), ]$ab
found <- antibiotics[which(AB_lookup$generalised_name == x[i]), ]$ab
if (length(found) > 0) {
x_new[i] <- found[1L]
next
@ -189,8 +173,8 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
}
# exact LOINC code
loinc_found <- unlist(lapply(antibiotics$loinc,
function(s) x[i] %in% s))
loinc_found <- unlist(lapply(AB_lookup$generalised_loinc,
function(s) generalise_antibiotic_name(x[i]) %in% s))
found <- antibiotics$ab[loinc_found == TRUE]
if (length(found) > 0) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
@ -198,8 +182,8 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
}
# exact synonym
synonym_found <- unlist(lapply(antibiotics$synonyms,
function(s) x[i] %in% toupper(s)))
synonym_found <- unlist(lapply(AB_lookup$generalised_synonyms,
function(s) generalise_antibiotic_name(x[i]) %in% s))
found <- antibiotics$ab[synonym_found == TRUE]
if (length(found) > 0) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
@ -207,8 +191,8 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
}
# exact abbreviation
abbr_found <- unlist(lapply(antibiotics$abbreviations,
function(a) x[i] %in% toupper(a)))
abbr_found <- unlist(lapply(AB_lookup$generalised_abbreviations,
function(s) generalise_antibiotic_name(x[i]) %in% s))
found <- antibiotics$ab[abbr_found == TRUE]
if (length(found) > 0) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
@ -246,21 +230,21 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
}
# try if name starts with it
found <- antibiotics[which(antibiotics$name %like% paste0("^", x_spelling)), ]$ab
found <- antibiotics[which(AB_lookup$generalised_name %like% paste0("^", x_spelling)), ]$ab
if (length(found) > 0) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
# try if name ends with it
found <- antibiotics[which(antibiotics$name %like% paste0(x_spelling, "$")), ]$ab
found <- antibiotics[which(AB_lookup$generalised_name %like% paste0(x_spelling, "$")), ]$ab
if (nchar(x[i]) >= 4 & length(found) > 0) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
# and try if any synonym starts with it
synonym_found <- unlist(lapply(antibiotics$synonyms,
function(s) any(s %like% paste0("^", x_spelling))))
synonym_found <- unlist(lapply(AB_lookup$generalised_synonyms,
function(s) any(generalise_antibiotic_name(s) %like% paste0("^", x_spelling))))
found <- antibiotics$ab[synonym_found == TRUE]
if (length(found) > 0) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
@ -291,7 +275,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
}
# transform back from other languages and try again
x_translated <- paste(lapply(strsplit(x[i], "[^A-Z0-9 ]"),
x_translated <- paste(lapply(strsplit(x[i], "[^A-Z0-9]"),
function(y) {
for (i in seq_len(length(y))) {
y[i] <- ifelse(tolower(y[i]) %in% tolower(translations_file$replacement),
@ -299,7 +283,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
!isFALSE(translations_file$fixed)), "pattern"],
y[i])
}
y
generalise_antibiotic_name(y)
})[[1]],
collapse = "/")
x_translated_guess <- suppressWarnings(as.ab(x_translated, initial_search = FALSE))
@ -317,7 +301,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
y_name,
y[i])
}
y
generalise_antibiotic_name(y)
})[[1]],
collapse = "/")
x_translated_guess <- suppressWarnings(as.ab(x_translated, initial_search = FALSE))
@ -449,9 +433,9 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
call. = FALSE)
}
x_result <- data.frame(x = x_bak_clean, stringsAsFactors = FALSE) %>%
left_join(data.frame(x = x, x_new = x_new, stringsAsFactors = FALSE), by = "x") %>%
pull(x_new)
x_result <- data.frame(x = x_bak_clean, stringsAsFactors = FALSE) %pm>%
pm_left_join(data.frame(x = x, x_new = x_new, stringsAsFactors = FALSE), by = "x") %pm>%
pm_pull(x_new)
if (length(x_result) == 0) {
x_result <- NA_character_
@ -538,3 +522,25 @@ c.ab <- function(x, ...) {
attributes(y) <- attributes(x)
class_integrity_check(y, "antimicrobial code", antibiotics$ab)
}
generalise_antibiotic_name <- function(x) {
x <- toupper(x)
# remove suffices
x <- gsub("_(MIC|RSI|DIS[CK])$", "", x)
# remove disk concentrations, like LVX_NM -> LVX
x <- gsub("_[A-Z]{2}[0-9_.]{0,3}$", "", x)
# remove part between brackets if that's followed by another string
x <- gsub("(.*)+ [(].*[)]", "\\1", x)
# keep only max 1 space
x <- trimws(gsub(" +", " ", x))
# non-character, space or number should be a slash
x <- gsub("[^A-Z0-9 -]", "/", x)
# spaces around non-characters must be removed: amox + clav -> amox/clav
x <- gsub("(.*[A-Z0-9]) ([^A-Z0-9].*)", "\\1\\2", x)
x <- gsub("(.*[^A-Z0-9]) ([A-Z0-9].*)", "\\1\\2", x)
# remove hyphen after a starting "co"
x <- gsub("^CO-", "CO", x)
# replace operators with a space
x <- gsub("(/| AND | WITH | W/|[+]|[-])+", " ", x)
x
}

View File

@ -47,7 +47,7 @@
#' With using `collapse`, this function will return a [character]:\cr
#' `df %>% mutate(abx = ab_from_text(clinical_text, collapse = "|"))`
#' @export
#' @return A [list], or a [character] if `collapse` is not `NULL`
#' @return A [list], or a [character] if `collapse` is not `NULL`
#' @inheritSection AMR Read more on our website!
#' @examples
#' # mind the bad spelling of amoxicillin in this line,
@ -97,7 +97,7 @@ ab_from_text <- function(text,
text <- tolower(as.character(text))
text_split_all <- strsplit(text, "[ ;.,:\\|]")
progress <- progress_estimated(n = length(text_split_all), n_min = 5)
progress <- progress_ticker(n = length(text_split_all), n_min = 5)
on.exit(close(progress))
if (type %like% "(drug|ab|anti)") {

View File

@ -38,10 +38,10 @@
#' @rdname ab_property
#' @name ab_property
#' @return
#' - An [`integer`] in case of [ab_cid()]
#' - A named [`list`] in case of [ab_info()] and multiple [ab_synonyms()]/[ab_tradenames()]
#' - A [`double`] in case of [ab_ddd()]
#' - A [`character`] in all other cases
#' - An [integer] in case of [ab_cid()]
#' - A named [list] in case of [ab_info()] and multiple [ab_synonyms()]/[ab_tradenames()]
#' - A [double] in case of [ab_ddd()]
#' - A [character] in all other cases
#' @export
#' @seealso [antibiotics]
#' @inheritSection AMR Reference data publicly available
@ -231,9 +231,9 @@ ab_validate <- function(x, property, ...) {
error = function(e) stop(e$message, call. = FALSE))
x_bak <- x
if (!all(x %in% antibiotics[, property])) {
x <- data.frame(ab = as.ab(x, ...), stringsAsFactors = FALSE) %>%
left_join(antibiotics, by = "ab") %>%
pull(property)
x <- data.frame(ab = as.ab(x, ...), stringsAsFactors = FALSE) %pm>%
pm_left_join(antibiotics, by = "ab") %pm>%
pm_pull(property)
}
if (property == "ab") {
return(structure(x, class = property))

View File

@ -135,7 +135,7 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE) {
#' filter(mo == as.mo("E. coli")) %>%
#' group_by(age_group = age_groups(age)) %>%
#' select(age_group, CIP) %>%
#' ggplot_rsi(x = "age_group")
#' ggplot_rsi(x = "age_group", minimum = 0)
#' }
age_groups <- function(x, split_at = c(12, 25, 55, 75), na.rm = FALSE) {
stop_ifnot(is.numeric(x), "`x` must be numeric, not ", paste0(class(x), collapse = "/"))

View File

@ -21,12 +21,13 @@
#' Get ATC properties from WHOCC website
#'
#' Gets data from the WHO to determine properties of an ATC (e.g. an antibiotic), such as the name, defined daily dose (DDD) or standard unit.
#' @inheritSection lifecycle Stable lifecycle
#' @description Gets data from the WHO to determine properties of an ATC (e.g. an antibiotic) like name, defined daily dose (DDD) or standard unit.
#' @param atc_code a character or character vector with ATC code(s) of antibiotic(s)
#' @param property property of an ATC code. Valid values are `"ATC"`, `"Name"`, `"DDD"`, `"U"` (`"unit"`), `"Adm.R"`, `"Note"` and `groups`. For this last option, all hierarchical groups of an ATC code will be returned, see Examples.
#' @param administration type of administration when using `property = "Adm.R"`, see Details
#' @param url url of website of the WHO. The sign `%s` can be used as a placeholder for ATC codes.
#' @param url url of website of the WHOCC. The sign `%s` can be used as a placeholder for ATC codes.
#' @param url_vet url of website of the WHOCC for veterinary medicine. The sign `%s` can be used as a placeholder for ATC_vet codes (that all start with "Q").
#' @param ... parameters to pass on to `atc_property`
#' @details
#' Options for parameter `administration`:
@ -74,7 +75,8 @@
atc_online_property <- function(atc_code,
property,
administration = "O",
url = "https://www.whocc.no/atc_ddd_index/?code=%s&showdescription=no") {
url = "https://www.whocc.no/atc_ddd_index/?code=%s&showdescription=no",
url_vet = "https://www.whocc.no/atcvet/atcvet_index/?code=%s&showdescription=no") {
has_internet <- import_fn("has_internet", "curl")
html_attr <- import_fn("html_attr", "rvest")
@ -122,25 +124,31 @@ atc_online_property <- function(atc_code,
returnvalue <- rep(NA_character_, length(atc_code))
}
progress <- progress_estimated(n = length(atc_code), 3)
progress <- progress_ticker(n = length(atc_code), 3)
on.exit(close(progress))
for (i in seq_len(length(atc_code))) {
progress$tick()
atc_url <- sub("%s", atc_code[i], url, fixed = TRUE)
if (atc_code[i] %like% "^Q") {
# veterinary drugs, ATC_vet codes start with a "Q"
atc_url <- url_vet
} else {
atc_url <- url
}
atc_url <- sub("%s", atc_code[i], atc_url, fixed = TRUE)
if (property == "groups") {
tbl <- read_html(atc_url) %>%
html_node("#content") %>%
html_children() %>%
tbl <- read_html(atc_url) %pm>%
html_node("#content") %pm>%
html_children() %pm>%
html_node("a")
# get URLS of items
hrefs <- tbl %>% html_attr("href")
hrefs <- tbl %pm>% html_attr("href")
# get text of items
texts <- tbl %>% html_text()
texts <- tbl %pm>% html_text()
# select only text items where URL like "code="
texts <- texts[grepl("?code=", tolower(hrefs), fixed = TRUE)]
# last one is antibiotics, skip it
@ -148,9 +156,9 @@ atc_online_property <- function(atc_code,
returnvalue <- c(list(texts), returnvalue)
} else {
tbl <- read_html(atc_url) %>%
html_nodes("table") %>%
html_table(header = TRUE) %>%
tbl <- read_html(atc_url) %pm>%
html_nodes("table") %pm>%
html_table(header = TRUE) %pm>%
as.data.frame(stringsAsFactors = FALSE)
# case insensitive column names

View File

@ -23,10 +23,10 @@
#'
#' Easy check for data availability of all columns in a data set. This makes it easy to get an idea of which antimicrobial combinations can be used for calculation with e.g. [susceptibility()] and [resistance()].
#' @inheritSection lifecycle Stable lifecycle
#' @param tbl a [`data.frame`] or [`list`]
#' @param tbl a [data.frame] or [list]
#' @param width number of characters to present the visual availability, defaults to filling the width of the console
#' @details The function returns a [`data.frame`] with columns `"resistant"` and `"visual_resistance"`. The values in that columns are calculated with [resistance()].
#' @return [`data.frame`] with column names of `tbl` as row names
#' @details The function returns a [data.frame] with columns `"resistant"` and `"visual_resistance"`. The values in that columns are calculated with [resistance()].
#' @return [data.frame] with column names of `tbl` as row names
#' @inheritSection AMR Read more on our website!
#' @export
#' @examples

View File

@ -35,7 +35,7 @@
#' @details The function [format()] calculates the resistance per bug-drug combination. Use `combine_IR = FALSE` (default) to test R vs. S+I and `combine_IR = TRUE` to test R+I vs. S.
#' @export
#' @rdname bug_drug_combinations
#' @return The function [bug_drug_combinations()] returns a [`data.frame`] with columns "mo", "ab", "S", "I", "R" and "total".
#' @return The function [bug_drug_combinations()] returns a [data.frame] with columns "mo", "ab", "S", "I", "R" and "total".
#' @source \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition}, 2014, *Clinical and Laboratory Standards Institute (CLSI)*. <https://clsi.org/standards/products/microbiology/documents/m39/>.
#' @inheritSection AMR Read more on our website!
#' @examples
@ -160,32 +160,33 @@ format.bug_drug_combinations <- function(x,
.data
}
y <- x %>%
y <- x %pm>%
create_var(ab = as.ab(x$ab),
ab_txt = give_ab_name(ab = x$ab, format = translate_ab, language = language)) %>%
group_by(ab, ab_txt, mo) %>%
summarise(isolates = sum(isolates, na.rm = TRUE),
total = sum(total, na.rm = TRUE)) %>%
ungroup()
ab_txt = give_ab_name(ab = x$ab, format = translate_ab, language = language)) %pm>%
pm_group_by(ab, ab_txt, mo) %pm>%
pm_summarise(isolates = sum(isolates, na.rm = TRUE),
total = sum(total, na.rm = TRUE)) %pm>%
pm_ungroup()
y <- y %>%
y <- y %pm>%
create_var(txt = paste0(percentage(y$isolates / y$total, decimal.mark = decimal.mark, big.mark = big.mark),
" (", trimws(format(y$isolates, big.mark = big.mark)), "/",
trimws(format(y$total, big.mark = big.mark)), ")")) %>%
select(ab, ab_txt, mo, txt) %>%
arrange(mo)
trimws(format(y$total, big.mark = big.mark)), ")")) %pm>%
pm_select(ab, ab_txt, mo, txt) %pm>%
pm_arrange(mo)
# replace tidyr::pivot_wider() from here
for (i in unique(y$mo)) {
mo_group <- y[which(y$mo == i), c("ab", "txt")]
colnames(mo_group) <- c("ab", i)
rownames(mo_group) <- NULL
y <- y %>%
left_join(mo_group, by = "ab")
y <- y %pm>%
pm_left_join(mo_group, by = "ab")
}
y <- y %>%
distinct(ab, .keep_all = TRUE) %>%
select(-mo, -txt) %>%
y <<- y
y <- y %pm>%
pm_distinct(ab, .keep_all = TRUE) %pm>%
pm_select(-mo, -txt) %pm>%
# replace tidyr::pivot_wider() until here
remove_NAs()
@ -193,21 +194,22 @@ format.bug_drug_combinations <- function(x,
.data[, c("ab_group", "ab_txt", colnames(.data)[!colnames(.data) %in% c("ab_group", "ab_txt", "ab")])]
}
y <- y %>%
create_var(ab_group = ab_group(y$ab, language = language)) %>%
select_ab_vars() %>%
arrange(ab_group, ab_txt)
y <- y %>%
create_var(ab_group = ifelse(y$ab_group != lag(y$ab_group) | is.na(lag(y$ab_group)), y$ab_group, ""))
y <- y %pm>%
create_var(ab_group = ab_group(y$ab, language = language)) %pm>%
select_ab_vars() %pm>%
pm_arrange(ab_group, ab_txt)
y <- y %pm>%
create_var(ab_group = ifelse(y$ab_group != lag(y$ab_group) | is.na(pm_lag(y$ab_group)), y$ab_group, ""))
if (add_ab_group == FALSE) {
y <- y %>%
select(-ab_group) %>%
rename("Drug" = ab_txt)
y <- y %pm>%
pm_select(-ab_group) %pm>%
pm_rename("Drug" = ab_txt)
colnames(y)[1] <- translate_AMR(colnames(y)[1], language = get_locale(), only_unknown = FALSE)
} else {
y <- y %>% rename("Group" = ab_group,
"Drug" = ab_txt)
y <- y %pm>%
pm_rename("Group" = ab_group,
"Drug" = ab_txt)
colnames(y)[1:2] <- translate_AMR(colnames(y)[1:2], language = get_locale(), only_unknown = FALSE)
}

View File

@ -80,7 +80,7 @@ NULL
#' This function returns information about the included data from the Catalogue of Life.
#' @seealso [microorganisms]
#' @details For DSMZ, see [microorganisms].
#' @return a [`list`], which prints in pretty format
#' @return a [list], which prints in pretty format
#' @inheritSection catalogue_of_life Catalogue of Life
#' @inheritSection AMR Read more on our website!
#' @export
@ -92,12 +92,12 @@ catalogue_of_life_version <- function() {
lst <- list(catalogue_of_life =
list(version = gsub("{year}", catalogue_of_life$year, catalogue_of_life$version, fixed = TRUE),
url = gsub("{year}", catalogue_of_life$year, catalogue_of_life$url_CoL, fixed = TRUE),
n = nrow(filter(microorganisms, source == "CoL"))),
n = nrow(pm_filter(microorganisms, source == "CoL"))),
deutsche_sammlung_von_mikroorganismen_und_zellkulturen =
list(version = "Prokaryotic Nomenclature Up-to-Date from DSMZ",
url = catalogue_of_life$url_DSMZ,
yearmonth = catalogue_of_life$yearmonth_DSMZ,
n = nrow(filter(microorganisms, source == "DSMZ"))),
n = nrow(pm_filter(microorganisms, source == "DSMZ"))),
total_included =
list(
n_total_species = nrow(microorganisms),

View File

@ -37,7 +37,7 @@
#' The function [count_df()] takes any variable from `data` that has an [`rsi`] class (created with [as.rsi()]) and counts the number of S's, I's and R's. It also supports grouped variables. The function [rsi_df()] works exactly like [count_df()], but adds the percentage of S, I and R.
#' @inheritSection proportion Combination therapy
#' @seealso [`proportion_*`][proportion] to calculate microbial resistance and susceptibility.
#' @return An [`integer`]
#' @return An [integer]
#' @rdname count
#' @name count
#' @export

View File

@ -23,7 +23,7 @@
#'
#' Two data sets containing all antibiotics/antimycotics and antivirals. Use [as.ab()] or one of the [ab_property()] functions to retrieve values from the [antibiotics] data set. Three identifiers are included in this data set: an antibiotic ID (`ab`, primarily used in this package) as defined by WHONET/EARS-Net, an ATC code (`atc`) as defined by the WHO, and a Compound ID (`cid`) as found in PubChem. Other properties in this data set are derived from one or more of these codes.
#' @format
#' ### For the [antibiotics] data set: a [`data.frame`] with `r nrow(antibiotics)` observations and `r ncol(antibiotics)` variables:
#' ### For the [antibiotics] data set: a [data.frame] with `r nrow(antibiotics)` observations and `r ncol(antibiotics)` variables:
#' - `ab`\cr Antibiotic ID as used in this package (like `AMC`), using the official EARS-Net (European Antimicrobial Resistance Surveillance Network) codes where available
#' - `atc`\cr ATC code (Anatomical Therapeutic Chemical) as defined by the WHOCC, like `J01CR02`
#' - `cid`\cr Compound ID as found in PubChem
@ -39,7 +39,7 @@
#' - `iv_units`\cr Units of `iv_ddd`
#' - `loinc`\cr All LOINC codes (Logical Observation Identifiers Names and Codes) associated with the name of the antimicrobial agent. Use [ab_loinc()] to retrieve them quickly, see [ab_property()].
#'
#' ### For the [antivirals] data set: a [`data.frame`] with `r nrow(antivirals)` observations and `r ncol(antivirals)` variables:
#' ### For the [antivirals] data set: a [data.frame] with `r nrow(antivirals)` observations and `r ncol(antivirals)` variables:
#' - `atc`\cr ATC code (Anatomical Therapeutic Chemical) as defined by the WHOCC
#' - `cid`\cr Compound ID as found in PubChem
#' - `name`\cr Official name as used by WHONET/EARS-Net or the WHO
@ -81,7 +81,7 @@
#'
#' A data set containing the microbial taxonomy of six kingdoms from the Catalogue of Life. MO codes can be looked up using [as.mo()].
#' @inheritSection catalogue_of_life Catalogue of Life
#' @format A [`data.frame`] with `r format(nrow(microorganisms), big.mark = ",")` observations and `r ncol(microorganisms)` variables:
#' @format A [data.frame] with `r format(nrow(microorganisms), big.mark = ",")` observations and `r ncol(microorganisms)` variables:
#' - `mo`\cr ID of microorganism as used by this package
#' - `fullname`\cr Full name, like `"Escherichia coli"`
#' - `kingdom`, `phylum`, `class`, `order`, `family`, `genus`, `species`, `subspecies`\cr Taxonomic rank of the microorganism
@ -99,7 +99,7 @@
#' - 1 entry of *Blastocystis* (*Blastocystis hominis*), although it officially does not exist (Noel *et al.* 2005, PMID 15634993)
#' - 5 other 'undefined' entries (unknown, unknown Gram negatives, unknown Gram positives, unknown yeast and unknown fungus)
#' - 6 families under the Enterobacterales order, according to Adeolu *et al.* (2016, PMID 27620848), that are not (yet) in the Catalogue of Life
#' - `r format(nrow(filter(microorganisms, source == "DSMZ")), big.mark = ",")` species from the DSMZ (Deutsche Sammlung von Mikroorganismen und Zellkulturen) since the DSMZ contain the latest taxonomic information based on recent publications
#' - `r format(nrow(subset(microorganisms, source == "DSMZ")), big.mark = ",")` species from the DSMZ (Deutsche Sammlung von Mikroorganismen und Zellkulturen) since the DSMZ contain the latest taxonomic information based on recent publications
#'
#' ### Direct download
#' This data set is available as 'flat file' for use even without R - you can find the file here:
@ -136,7 +136,7 @@ catalogue_of_life <- list(
#'
#' A data set containing old (previously valid or accepted) taxonomic names according to the Catalogue of Life. This data set is used internally by [as.mo()].
#' @inheritSection catalogue_of_life Catalogue of Life
#' @format A [`data.frame`] with `r format(nrow(microorganisms.old), big.mark = ",")` observations and `r ncol(microorganisms.old)` variables:
#' @format A [data.frame] with `r format(nrow(microorganisms.old), big.mark = ",")` observations and `r ncol(microorganisms.old)` variables:
#' - `fullname`\cr Old full taxonomic name of the microorganism
#' - `fullname_new`\cr New full taxonomic name of the microorganism
#' - `ref`\cr Author(s) and year of concerning scientific publication
@ -152,7 +152,7 @@ catalogue_of_life <- list(
#' Data set with `r format(nrow(microorganisms.codes), big.mark = ",")` common microorganism codes
#'
#' A data set containing commonly used codes for microorganisms, from laboratory systems and WHONET. Define your own with [set_mo_source()]. They will all be searched when using [as.mo()] and consequently all the [`mo_*`][mo_property()] functions.
#' @format A [`data.frame`] with `r format(nrow(microorganisms.codes), big.mark = ",")` observations and `r ncol(microorganisms.codes)` variables:
#' @format A [data.frame] with `r format(nrow(microorganisms.codes), big.mark = ",")` observations and `r ncol(microorganisms.codes)` variables:
#' - `code`\cr Commonly used code of a microorganism
#' - `mo`\cr ID of the microorganism in the [microorganisms] data set
#' @inheritSection AMR Reference data publicly available
@ -164,7 +164,7 @@ catalogue_of_life <- list(
#' Data set with `r format(nrow(example_isolates), big.mark = ",")` example isolates
#'
#' A data set containing `r format(nrow(example_isolates), big.mark = ",")` microbial isolates with their full antibiograms. The data set reflects reality and can be used to practice AMR analysis. For examples, please read [the tutorial on our website](https://msberends.github.io/AMR/articles/AMR.html).
#' @format A [`data.frame`] with `r format(nrow(example_isolates), big.mark = ",")` observations and `r ncol(example_isolates)` variables:
#' @format A [data.frame] with `r format(nrow(example_isolates), big.mark = ",")` observations and `r ncol(example_isolates)` variables:
#' - `date`\cr date of receipt at the laboratory
#' - `hospital_id`\cr ID of the hospital, from A to D
#' - `ward_icu`\cr logical to determine if ward is an intensive care unit
@ -182,7 +182,7 @@ catalogue_of_life <- list(
#' Data set with unclean data
#'
#' A data set containing `r format(nrow(example_isolates_unclean), big.mark = ",")` microbial isolates that are not cleaned up and consequently not ready for AMR analysis. This data set can be used for practice.
#' @format A [`data.frame`] with `r format(nrow(example_isolates_unclean), big.mark = ",")` observations and `r ncol(example_isolates_unclean)` variables:
#' @format A [data.frame] with `r format(nrow(example_isolates_unclean), big.mark = ",")` observations and `r ncol(example_isolates_unclean)` variables:
#' - `patient_id`\cr ID of the patient
#' - `date`\cr date of receipt at the laboratory
#' - `hospital`\cr ID of the hospital, from A to C
@ -195,7 +195,7 @@ catalogue_of_life <- list(
#' Data set with `r format(nrow(WHONET), big.mark = ",")` isolates - WHONET example
#'
#' This example data set has the exact same structure as an export file from WHONET. Such files can be used with this package, as this example data set shows. The antibiotic results are from our [example_isolates] data set. All patient names are created using online surname generators and are only in place for practice purposes.
#' @format A [`data.frame`] with `r format(nrow(WHONET), big.mark = ",")` observations and `r ncol(WHONET)` variables:
#' @format A [data.frame] with `r format(nrow(WHONET), big.mark = ",")` observations and `r ncol(WHONET)` variables:
#' - `Identification number`\cr ID of the sample
#' - `Specimen number`\cr ID of the specimen
#' - `Organism`\cr Name of the microorganism. Before analysis, you should transform this to a valid microbial class, using [as.mo()].
@ -229,7 +229,7 @@ catalogue_of_life <- list(
#' Data set for R/SI interpretation
#'
#' Data set to interpret MIC and disk diffusion to R/SI values. Included guidelines are CLSI (`r min(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "CLSI")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "CLSI")$guideline)))`) and EUCAST (`r min(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "EUCAST")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "EUCAST")$guideline)))`). Use [as.rsi()] to transform MICs or disks measurements to R/SI values.
#' @format A [`data.frame`] with `r format(nrow(rsi_translation), big.mark = ",")` observations and `r ncol(rsi_translation)` variables:
#' @format A [data.frame] with `r format(nrow(rsi_translation), big.mark = ",")` observations and `r ncol(rsi_translation)` variables:
#' - `guideline`\cr Name of the guideline
#' - `method`\cr Either "MIC" or "DISK"
#' - `site`\cr Body site, e.g. "Oral" or "Respiratory"
@ -249,7 +249,7 @@ catalogue_of_life <- list(
#' Data set with bacterial intrinsic resistance
#'
#' Data set containing defined intrinsic resistance by EUCAST of all bug-drug combinations.
#' @format A [`data.frame`] with `r format(nrow(intrinsic_resistant), big.mark = ",")` observations and `r ncol(intrinsic_resistant)` variables:
#' @format A [data.frame] with `r format(nrow(intrinsic_resistant), big.mark = ",")` observations and `r ncol(intrinsic_resistant)` variables:
#' - `microorganism`\cr Name of the microorganism
#' - `antibiotic`\cr Name of the antibiotic drug
#' @details The repository of this `AMR` package contains a file comprising this exact data set: <https://github.com/msberends/AMR/blob/master/data-raw/intrinsic_resistant.txt>. This file **allows for machine reading EUCAST guidelines about intrinsic resistance**, which is almost impossible with the Excel and PDF files distributed by EUCAST. The file is updated automatically.

View File

@ -27,7 +27,7 @@
#' @param x vector
#' @param na.rm a logical indicating whether missing values should be removed
#' @details Interpret disk values as RSI values with [as.rsi()]. It supports guidelines from EUCAST and CLSI.
#' @return An [`integer`] with additional new class [`disk`]
#' @return An [integer] with additional class [`disk`]
#' @aliases disk
#' @export
#' @seealso [as.rsi()]
@ -54,7 +54,7 @@
#' }
as.disk <- function(x, na.rm = FALSE) {
if (!is.disk(x)) {
x <- x %>% unlist()
x <- x %pm>% unlist()
if (na.rm == TRUE) {
x <- x[!is.na(x)]
}
@ -89,8 +89,8 @@ as.disk <- function(x, na.rm = FALSE) {
na_after <- length(x[is.na(x)])
if (na_before != na_after) {
list_missing <- x.bak[is.na(x) & !is.na(x.bak)] %>%
unique() %>%
list_missing <- x.bak[is.na(x) & !is.na(x.bak)] %pm>%
unique() %pm>%
sort()
list_missing <- paste0('"', list_missing, '"', collapse = ", ")
warning(na_after - na_before, " results truncated (",

View File

@ -136,7 +136,7 @@ EUCAST_VERSION_EXPERT_RULES <- "3.1, 2016"
#' @aliases EUCAST
#' @rdname eucast_rules
#' @export
#' @return The input of `x`, possibly with edited values of antibiotics. Or, if `verbose = TRUE`, a [`data.frame`] with all original and new values of the affected bug-drug combinations.
#' @return The input of `x`, possibly with edited values of antibiotics. Or, if `verbose = TRUE`, a [data.frame] with all original and new values of the affected bug-drug combinations.
#' @source
#' - EUCAST Expert Rules. Version 2.0, 2012. \cr
#' Leclercq et al. **EUCAST expert rules in antimicrobial susceptibility testing.** *Clin Microbiol Infect.* 2013;19(2):141-60. \cr
@ -442,8 +442,8 @@ eucast_rules <- function(x,
warning = function(w) {
if (w$message %like% "invalid factor level") {
xyz <- sapply(cols, function(col) {
x_original[, col] <<- factor(x = as.character(pull(x_original, col)), levels = c(to, levels(pull(x_original, col))))
x[, col] <<- factor(x = as.character(pull(x, col)), levels = c(to, levels(pull(x, col))))
x_original[, col] <<- factor(x = as.character(pm_pull(x_original, col)), levels = c(to, levels(pm_pull(x_original, col))))
x[, col] <<- factor(x = as.character(pm_pull(x, col)), levels = c(to, levels(pm_pull(x, col))))
invisible()
})
x_original[rows, cols] <<- to
@ -492,12 +492,12 @@ eucast_rules <- function(x,
rule_name = font_stripstyle(rule[3]),
stringsAsFactors = FALSE)
colnames(verbose_new) <- c("row", "col", "mo_fullname", "old", "new", "rule", "rule_group", "rule_name")
verbose_new <- verbose_new %>% filter(old != new | is.na(old))
verbose_new <- verbose_new %pm>% pm_filter(old != new | is.na(old))
# save changes to data set 'verbose_info'
verbose_info <<- rbind(verbose_info, verbose_new)
# count adds and changes
track_changes$added <- track_changes$added + verbose_new %>% filter(is.na(old)) %>% nrow()
track_changes$changed <- track_changes$changed + verbose_new %>% filter(!is.na(old)) %>% nrow()
track_changes$added <- track_changes$added + verbose_new %pm>% pm_filter(is.na(old)) %pm>% nrow()
track_changes$changed <- track_changes$changed + verbose_new %pm>% pm_filter(!is.na(old)) %pm>% nrow()
}
# after the applied changes: return list with counts of added and changed
return(track_changes)
@ -520,13 +520,13 @@ eucast_rules <- function(x,
# save original table, with the new .rowid column
x_original.bak <- x
# keep only unique rows for MO and ABx
x <- x %>% distinct(`.rowid`, .keep_all = TRUE)
x <- x %pm>% pm_distinct(`.rowid`, .keep_all = TRUE)
x_original <- x
# join to microorganisms data set
x <- as.data.frame(x, stringsAsFactors = FALSE)
x[, col_mo] <- as.mo(x[, col_mo, drop = TRUE])
x <- x %>%
x <- x %pm>%
left_join_microorganisms(by = col_mo, suffix = c("_oldcols", ""))
x$gramstain <- mo_gramstain(x[, col_mo, drop = TRUE], language = NULL)
x$genus_species <- paste(x$genus, x$species)
@ -568,12 +568,12 @@ eucast_rules <- function(x,
y[y != "" & y %in% colnames(df)]
}
get_antibiotic_names <- function(x) {
x <- x %>%
strsplit(",") %>%
unlist() %>%
trimws() %>%
sapply(function(x) if (x %in% antibiotics$ab) ab_name(x, language = NULL, tolower = TRUE) else x) %>%
sort() %>%
x <- x %pm>%
strsplit(",") %pm>%
unlist() %pm>%
trimws() %pm>%
sapply(function(x) if (x %in% antibiotics$ab) ab_name(x, language = NULL, tolower = TRUE) else x) %pm>%
sort() %pm>%
paste(collapse = ", ")
x <- gsub("_", " ", x, fixed = TRUE)
x <- gsub("except CAZ", paste("except", ab_name("CAZ", language = NULL, tolower = TRUE)), x, fixed = TRUE)
@ -856,17 +856,17 @@ eucast_rules <- function(x,
wouldve <- ""
}
verbose_info <- verbose_info %>%
arrange(row, rule_group, rule_name, col)
verbose_info <- verbose_info %pm>%
pm_arrange(row, rule_group, rule_name, col)
cat(paste0("\n", font_grey(strrep("-", options()$width - 1)), "\n"))
cat(font_bold(paste("The rules", paste0(wouldve, "affected"),
formatnr(n_distinct(verbose_info$row)),
formatnr(pm_n_distinct(verbose_info$row)),
"out of", formatnr(nrow(x_original)),
"rows, making a total of", formatnr(nrow(verbose_info)), "edits\n")))
n_added <- verbose_info %>% filter(is.na(old)) %>% nrow()
n_changed <- verbose_info %>% filter(!is.na(old)) %>% nrow()
n_added <- verbose_info %pm>% pm_filter(is.na(old)) %pm>% nrow()
n_changed <- verbose_info %pm>% pm_filter(!is.na(old)) %pm>% nrow()
# print added values ----
if (n_added == 0) {
@ -875,15 +875,15 @@ eucast_rules <- function(x,
colour <- font_green # is function
}
cat(colour(paste0("=> ", wouldve, "added ",
font_bold(formatnr(verbose_info %>%
filter(is.na(old)) %>%
font_bold(formatnr(verbose_info %pm>%
pm_filter(is.na(old)) %pm>%
nrow()), "test results"),
"\n")))
if (n_added > 0) {
added_summary <- verbose_info %>%
filter(is.na(old)) %>%
group_by(new) %>%
summarise(n = n())
added_summary <- verbose_info %pm>%
pm_filter(is.na(old)) %pm>%
pm_group_by(new) %pm>%
pm_summarise(n = pm_n())
cat(paste(" -",
paste0(formatnr(added_summary$n), " test result", ifelse(added_summary$n > 1, "s", ""),
" added as ", added_summary$new), collapse = "\n"))
@ -899,15 +899,15 @@ eucast_rules <- function(x,
cat("\n")
}
cat(colour(paste0("=> ", wouldve, "changed ",
font_bold(formatnr(verbose_info %>%
filter(!is.na(old)) %>%
font_bold(formatnr(verbose_info %pm>%
pm_filter(!is.na(old)) %pm>%
nrow()), "test results"),
"\n")))
if (n_changed > 0) {
changed_summary <- verbose_info %>%
filter(!is.na(old)) %>%
group_by(old, new) %>%
summarise(n = n())
changed_summary <- verbose_info %pm>%
pm_filter(!is.na(old)) %pm>%
pm_group_by(old, new) %pm>%
pm_summarise(n = pm_n())
cat(paste(" -",
paste0(formatnr(changed_summary$n), " test result", ifelse(changed_summary$n > 1, "s", ""), " changed from ",
changed_summary$old, " to ", changed_summary$new), collapse = "\n"))
@ -936,8 +936,8 @@ eucast_rules <- function(x,
# reset original attributes
x_original <- x_original[, c(col_mo, cols_ab, ".rowid"), drop = FALSE]
x_original.bak <- x_original.bak[, setdiff(colnames(x_original.bak), c(col_mo, cols_ab)), drop = FALSE]
x_original.bak <- x_original.bak %>%
left_join(x_original, by = ".rowid")
x_original.bak <- x_original.bak %pm>%
pm_left_join(x_original, by = ".rowid")
x_original.bak <- x_original.bak[, old_cols, drop = FALSE]
attributes(x_original.bak) <- old_attributes
x_original.bak

View File

@ -336,14 +336,14 @@ find_ab_group <- function(ab_class) {
"macrolide",
"tetracycline"),
paste0(ab_class, "s"),
antibiotics %>%
antibiotics %pm>%
subset(group %like% ab_class |
atc_group1 %like% ab_class |
atc_group2 %like% ab_class) %>%
pull(group) %>%
unique() %>%
tolower() %>%
sort() %>%
atc_group2 %like% ab_class) %pm>%
pm_pull(group) %pm>%
unique() %pm>%
tolower() %pm>%
sort() %pm>%
paste(collapse = "/")
)
}

View File

@ -23,7 +23,7 @@
#'
#' Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type.
#' @inheritSection lifecycle Stable lifecycle
#' @param x a [`data.frame`] containing isolates.
#' @param x a [data.frame] containing isolates.
#' @param col_date column name of the result date (or date that is was received on the lab), defaults to the first column of with a date class
#' @param col_patient_id column name of the unique IDs of the patients, defaults to the first column that starts with 'patient' or 'patid' (case insensitive)
#' @param col_mo column name of the IDs of the microorganisms (see [as.mo()]), defaults to the first column of class [`mo`]. Values will be coerced using [as.mo()].
@ -147,7 +147,7 @@ first_isolate <- function(x,
dots <- unlist(list(...))
if (length(dots) != 0) {
# backwards compatibility with old parameters
dots.names <- dots %>% names()
dots.names <- dots %pm>% names()
if ("filter_specimen" %in% dots.names) {
specimen_group <- dots[which(dots.names == "filter_specimen")]
}
@ -269,16 +269,16 @@ first_isolate <- function(x,
row.end <- nrow(x)
} else {
# filtering on specimen and only analyse these rows to save time
x <- x[order(pull(x, col_specimen),
x <- x[order(pm_pull(x, col_specimen),
x$newvar_patient_id,
x$newvar_genus_species,
x$newvar_date), ]
rownames(x) <- NULL
suppressWarnings(
row.start <- which(x %>% pull(col_specimen) == specimen_group) %>% min(na.rm = TRUE)
row.start <- which(x %pm>% pm_pull(col_specimen) == specimen_group) %pm>% min(na.rm = TRUE)
)
suppressWarnings(
row.end <- which(x %>% pull(col_specimen) == specimen_group) %>% max(na.rm = TRUE)
row.end <- which(x %pm>% pm_pull(col_specimen) == specimen_group) %pm>% max(na.rm = TRUE)
)
}
@ -319,8 +319,8 @@ first_isolate <- function(x,
}
# Analysis of first isolate ----
x$other_pat_or_mo <- ifelse(x$newvar_patient_id == lag(x$newvar_patient_id) &
x$newvar_genus_species == lag(x$newvar_genus_species),
x$other_pat_or_mo <- ifelse(x$newvar_patient_id == pm_lag(x$newvar_patient_id) &
x$newvar_genus_species == pm_lag(x$newvar_genus_species),
FALSE,
TRUE)
x$episode_group <- paste(x$newvar_patient_id, x$newvar_genus_species)
@ -349,13 +349,13 @@ first_isolate <- function(x,
type_param <- type
x$other_key_ab <- !key_antibiotics_equal(y = x$newvar_key_ab,
z = lag(x$newvar_key_ab),
z = pm_lag(x$newvar_key_ab),
type = type_param,
ignore_I = ignore_I,
points_threshold = points_threshold,
info = info)
# with key antibiotics
x$newvar_first_isolate <- if_else(x$newvar_row_index_sorted >= row.start &
x$newvar_first_isolate <- pm_if_else(x$newvar_row_index_sorted >= row.start &
x$newvar_row_index_sorted <= row.end &
x$newvar_genus_species != "" &
(x$other_pat_or_mo | x$more_than_episode_ago | x$other_key_ab),
@ -364,7 +364,7 @@ first_isolate <- function(x,
} else {
# no key antibiotics
x$newvar_first_isolate <- if_else(x$newvar_row_index_sorted >= row.start &
x$newvar_first_isolate <- pm_if_else(x$newvar_row_index_sorted >= row.start &
x$newvar_row_index_sorted <= row.end &
x$newvar_genus_species != "" &
(x$other_pat_or_mo | x$more_than_episode_ago),
@ -413,8 +413,14 @@ first_isolate <- function(x,
if (info == TRUE) {
n_found <- sum(x$newvar_first_isolate, na.rm = TRUE)
p_found_total <- percentage(n_found / nrow(x[which(!is.na(x$newvar_mo)), , drop = FALSE]))
p_found_scope <- percentage(n_found / scope.size)
p_found_total <- percentage(n_found / nrow(x[which(!is.na(x$newvar_mo)), , drop = FALSE]), digits = 1)
p_found_scope <- percentage(n_found / scope.size, digits = 1)
if (!p_found_total %like% "[.]") {
p_found_total <- gsub("%", ".0%", p_found_total, fixed = TRUE)
}
if (!p_found_scope %like% "[.]") {
p_found_scope <- gsub("%", ".0%", p_found_scope, fixed = TRUE)
}
# mark up number of found
n_found <- format(n_found, big.mark = big.mark, decimal.mark = decimal.mark)
if (p_found_total != p_found_scope) {

View File

@ -23,7 +23,7 @@
#'
#' Use these functions to create bar plots for antimicrobial resistance analysis. All functions rely on [ggplot2][ggplot2::ggplot()] functions.
#' @inheritSection lifecycle Maturing lifecycle
#' @param data a [`data.frame`] with column(s) of class [`rsi`] (see [as.rsi()])
#' @param data a [data.frame] with column(s) of class [`rsi`] (see [as.rsi()])
#' @param position position adjustment of bars, either `"fill"`, `"stack"` or `"dodge"`
#' @param x variable to show on x axis, either `"antibiotic"` (default) or `"interpretation"` or a grouping variable
#' @param fill variable to categorise using the plots legend, either `"antibiotic"` (default) or `"interpretation"` or a grouping variable
@ -147,6 +147,7 @@ ggplot_rsi <- function(data,
translate_ab = "name",
combine_SI = TRUE,
combine_IR = FALSE,
minimum = 30,
language = get_locale(),
nrow = NULL,
colours = c(S = "#61a8ff",
@ -194,6 +195,7 @@ ggplot_rsi <- function(data,
p <- ggplot2::ggplot(data = data) +
geom_rsi(position = position, x = x, fill = fill, translate_ab = translate_ab,
minimum = minimum, language = language,
combine_SI = combine_SI, combine_IR = combine_IR, ...) +
theme_rsi()
@ -215,6 +217,8 @@ ggplot_rsi <- function(data,
p <- p + labels_rsi_count(position = position,
x = x,
translate_ab = translate_ab,
minimum = minimum,
language = language,
combine_SI = combine_SI,
combine_IR = combine_IR,
datalabels.size = datalabels.size,
@ -240,13 +244,14 @@ geom_rsi <- function(position = NULL,
x = c("antibiotic", "interpretation"),
fill = "interpretation",
translate_ab = "name",
minimum = 30,
language = get_locale(),
combine_SI = TRUE,
combine_IR = FALSE,
...) {
stop_ifnot_installed("ggplot2")
stop_if(is.data.frame(position), "`position` is invalid. Did you accidentally use '%>%' instead of '+'?")
stop_if(is.data.frame(position), "`position` is invalid. Did you accidentally use '%pm>%' instead of '+'?")
y <- "value"
if (missing(position) | is.null(position)) {
@ -280,6 +285,7 @@ geom_rsi <- function(position = NULL,
rsi_df(data = x,
translate_ab = translate_ab,
language = language,
minimum = minimum,
combine_SI = combine_SI,
combine_IR = combine_IR)
})
@ -365,6 +371,8 @@ theme_rsi <- function() {
labels_rsi_count <- function(position = NULL,
x = "antibiotic",
translate_ab = "name",
minimum = 30,
language = get_locale(),
combine_SI = TRUE,
combine_IR = FALSE,
datalabels.size = 3,
@ -389,12 +397,14 @@ labels_rsi_count <- function(position = NULL,
transformed <- rsi_df(data = x,
translate_ab = translate_ab,
combine_SI = combine_SI,
combine_IR = combine_IR)
combine_IR = combine_IR,
minimum = minimum,
language = language)
transformed$gr <- transformed[, x_name, drop = TRUE]
transformed %>%
group_by(gr) %>%
mutate(lbl = paste0("n=", isolates)) %>%
ungroup() %>%
select(-gr)
transformed %pm>%
pm_group_by(gr) %pm>%
pm_mutate(lbl = paste0("n=", isolates)) %pm>%
pm_ungroup() %pm>%
pm_select(-gr)
})
}

View File

@ -23,7 +23,7 @@
#'
#' This tries to find a column name in a data set based on information from the [antibiotics] data set. Also supports WHONET abbreviations.
#' @inheritSection lifecycle Maturing lifecycle
#' @param x a [`data.frame`]
#' @param x a [data.frame]
#' @param search_string a text to search `x` for, will be checked with [as.ab()] if this value is not a column in `x`
#' @param verbose a logical to indicate whether additional info should be printed
#' @details You can look for an antibiotic (trade) name or abbreviation and it will search `x` and the [antibiotics] data set for any column containing a name or code of that antibiotic. **Longer columns names take precendence over shorter column names.**
@ -82,7 +82,7 @@ guess_ab_col <- function(x = NULL, search_string = NULL, verbose = FALSE) {
} else {
# sort colnames on length - longest first
cols <- colnames(x[, x %>% colnames() %>% nchar() %>% order() %>% rev()])
cols <- colnames(x[, x %pm>% colnames() %pm>% nchar() %pm>% order() %pm>% rev()])
df_trans <- data.frame(cols = cols,
abs = suppressWarnings(as.ab(cols)),
stringsAsFactors = FALSE)
@ -147,7 +147,7 @@ get_column_abx <- function(x,
names(x) <- df_trans$abcode
# add from self-defined dots (...):
# such as get_column_abx(example_isolates %>% rename(thisone = AMX), amox = "thisone")
# such as get_column_abx(example_isolates %pm>% rename(thisone = AMX), amox = "thisone")
dots <- list(...)
if (length(dots) > 0) {
newnames <- suppressWarnings(as.ab(names(dots), info = FALSE))

View File

@ -30,7 +30,7 @@
#' @param by a variable to join by - if left empty will search for a column with class [`mo`] (created with [as.mo()]) or will be `"mo"` if that column name exists in `x`, could otherwise be a column name of `x` with values that exist in `microorganisms$mo` (like `by = "bacteria_id"`), or another column in [microorganisms] (but then it should be named, like `by = c("my_genus_species" = "fullname")`)
#' @param suffix if there are non-joined duplicate variables in `x` and `y`, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2.
#' @param ... ignored
#' @details **Note:** As opposed to the `join()` functions of `dplyr`, [`character`] vectors are supported and at default existing columns will get a suffix `"2"` and the newly joined columns will not get a suffix.
#' @details **Note:** As opposed to the `join()` functions of `dplyr`, [character] vectors are supported and at default existing columns will get a suffix `"2"` and the newly joined columns will not get a suffix.
#'
#' These functions rely on [merge()], a base R function to do joins.
#' @inheritSection AMR Read more on our website!
@ -61,7 +61,7 @@ inner_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
x <- checked$x
by <- checked$by
join <- suppressWarnings(
inner_join(x = x, y = microorganisms, by = by, suffix = suffix, ...)
pm_inner_join(x = x, y = microorganisms, by = by, suffix = suffix, ...)
)
if (NROW(join) > NROW(x)) {
warning("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.")
@ -80,7 +80,7 @@ left_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
x <- checked$x
by <- checked$by
join <- suppressWarnings(
left_join(x = x, y = microorganisms, by = by, suffix = suffix, ...)
pm_left_join(x = x, y = microorganisms, by = by, suffix = suffix, ...)
)
if (NROW(join) > NROW(x)) {
warning("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.")
@ -99,7 +99,7 @@ right_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
x <- checked$x
by <- checked$by
join <- suppressWarnings(
right_join(x = x, y = microorganisms, by = by, suffix = suffix, ...)
pm_right_join(x = x, y = microorganisms, by = by, suffix = suffix, ...)
)
if (NROW(join) > NROW(x)) {
warning("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.")
@ -118,7 +118,7 @@ full_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
x <- checked$x
by <- checked$by
join <- suppressWarnings(
full_join(x = x, y = microorganisms, by = by, suffix = suffix, ...)
pm_full_join(x = x, y = microorganisms, by = by, suffix = suffix, ...)
)
if (NROW(join) > NROW(x)) {
warning("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.")
@ -137,7 +137,7 @@ semi_join_microorganisms <- function(x, by = NULL, ...) {
x <- checked$x
by <- checked$by
join <- suppressWarnings(
semi_join(x = x, y = microorganisms, by = by, ...)
pm_semi_join(x = x, y = microorganisms, by = by, ...)
)
class(join) <- x_class
join
@ -153,7 +153,7 @@ anti_join_microorganisms <- function(x, by = NULL, ...) {
x <- checked$x
by <- checked$by
join <- suppressWarnings(
anti_join(x = x, y = microorganisms, by = by, ...)
pm_anti_join(x = x, y = microorganisms, by = by, ...)
)
class(join) <- x_class
join

View File

@ -125,7 +125,7 @@ key_antibiotics <- function(x,
dots <- unlist(list(...))
if (length(dots) != 0) {
# backwards compatibility with old parameters
dots.names <- dots %>% names()
dots.names <- dots %pm>% names()
if ("info" %in% dots.names) {
warnings <- dots[which(dots.names == "info")]
}
@ -162,7 +162,7 @@ key_antibiotics <- function(x,
if (!all(col.list %in% colnames(x))) {
if (warnings == TRUE) {
warning("Some columns do not exist and will be ignored: ",
col.list.bak[!(col.list %in% colnames(x))] %>% toString(),
col.list.bak[!(col.list %in% colnames(x))] %pm>% toString(),
".\nTHIS MAY STRONGLY INFLUENCE THE OUTCOME.",
immediate. = TRUE,
call. = FALSE)
@ -218,7 +218,7 @@ key_antibiotics <- function(x,
x$key_ab <- NA_character_
# Gram +
x$key_ab <- if_else(x$gramstain == "Gram-positive",
x$key_ab <- pm_if_else(x$gramstain == "Gram-positive",
tryCatch(apply(X = x[, gram_positive],
MARGIN = 1,
FUN = function(x) paste(x, collapse = "")),
@ -226,7 +226,7 @@ key_antibiotics <- function(x,
x$key_ab)
# Gram -
x$key_ab <- if_else(x$gramstain == "Gram-negative",
x$key_ab <- pm_if_else(x$gramstain == "Gram-negative",
tryCatch(apply(X = x[, gram_negative],
MARGIN = 1,
FUN = function(x) paste(x, collapse = "")),
@ -236,7 +236,7 @@ key_antibiotics <- function(x,
# format
key_abs <- toupper(gsub("[^SIR]", ".", gsub("(NA|NULL)", ".", x$key_ab)))
if (n_distinct(key_abs) == 1) {
if (pm_n_distinct(key_abs) == 1) {
warning("No distinct key antibiotics determined.", call. = FALSE)
}
@ -266,7 +266,7 @@ key_antibiotics_equal <- function(y,
result <- logical(length(x))
if (info_needed == TRUE) {
p <- progress_estimated(length(x))
p <- progress_ticker(length(x))
on.exit(close(p))
}
@ -315,10 +315,10 @@ key_antibiotics_equal <- function(y,
# - S|R <-> R|S is 1 point
# use the levels of as.rsi (S = 1, I = 2, R = 3)
suppressWarnings(x_split <- x_split %>% as.rsi() %>% as.double())
suppressWarnings(y_split <- y_split %>% as.rsi() %>% as.double())
suppressWarnings(x_split <- x_split %pm>% as.rsi() %pm>% as.double())
suppressWarnings(y_split <- y_split %pm>% as.rsi() %pm>% as.double())
points <- (x_split - y_split) %>% abs() %>% sum(na.rm = TRUE) / 2
points <- (x_split - y_split) %pm>% abs() %pm>% sum(na.rm = TRUE) / 2
result[i] <- points >= points_threshold
} else {

View File

@ -23,7 +23,7 @@
#'
#' @description Kurtosis is a measure of the "tailedness" of the probability distribution of a real-valued random variable.
#' @inheritSection lifecycle Questioning lifecycle
#' @param x a vector of values, a [`matrix`] or a [`data.frame`]
#' @param x a vector of values, a [`matrix`] or a [data.frame]
#' @param na.rm a logical value indicating whether `NA` values should be stripped before the computation proceeds.
#' @seealso [skewness()]
#' @rdname kurtosis

View File

@ -24,7 +24,7 @@
#' Convenient wrapper around [grep()] to match a pattern: `x %like% pattern`. It always returns a [`logical`] vector and is always case-insensitive (use `x %like_case% pattern` for case-sensitive matching). Also, `pattern` can be as long as `x` to compare items of each index in both vectors, or they both can have the same length to iterate over all cases.
#' @inheritSection lifecycle Stable lifecycle
#' @param x a character vector where matches are sought, or an object which can be coerced by [as.character()] to a character vector.
#' @param pattern a character string containing a regular expression (or [`character`] string for `fixed = TRUE`) to be matched in the given character vector. Coerced by [as.character()] to a character string if possible. If a [`character`] vector of length 2 or more is supplied, the first element is used with a warning.
#' @param pattern a character string containing a regular expression (or [character] string for `fixed = TRUE`) to be matched in the given character vector. Coerced by [as.character()] to a character string if possible. If a [character] vector of length 2 or more is supplied, the first element is used with a warning.
#' @param ignore.case if `FALSE`, the pattern matching is *case sensitive* and if `TRUE`, case is ignored during matching.
#' @return A [`logical`] vector
#' @name like

View File

@ -52,13 +52,13 @@
#' @inheritSection as.rsi Interpretation of R and S/I
#' @return
#' - CMI 2012 paper - function [mdr_cmi2012()] or [mdro()]:\cr
#' Ordered [`factor`] with levels `Negative` < `Multi-drug-resistant (MDR)` < `Extensively drug-resistant (XDR)` < `Pandrug-resistant (PDR)`
#' Ordered [factor] with levels `Negative` < `Multi-drug-resistant (MDR)` < `Extensively drug-resistant (XDR)` < `Pandrug-resistant (PDR)`
#' - TB guideline - function [mdr_tb()] or [`mdro(..., guideline = "TB")`][mdro()]:\cr
#' Ordered [`factor`] with levels `Negative` < `Mono-resistant` < `Poly-resistant` < `Multi-drug-resistant` < `Extensively drug-resistant`
#' Ordered [factor] with levels `Negative` < `Mono-resistant` < `Poly-resistant` < `Multi-drug-resistant` < `Extensively drug-resistant`
#' - German guideline - function [mrgn()] or [`mdro(..., guideline = "MRGN")`][mdro()]:\cr
#' Ordered [`factor`] with levels `Negative` < `3MRGN` < `4MRGN`
#' Ordered [factor] with levels `Negative` < `3MRGN` < `4MRGN`
#' - Everything else:\cr
#' Ordered [`factor`] with levels `Negative` < `Positive, unconfirmed` < `Positive`. The value `"Positive, unconfirmed"` means that, according to the guideline, it is not entirely sure if the isolate is multi-drug resistant and this should be confirmed with additional (e.g. molecular) tests
#' Ordered [factor] with levels `Negative` < `Positive, unconfirmed` < `Positive`. The value `"Positive, unconfirmed"` means that, according to the guideline, it is not entirely sure if the isolate is multi-drug resistant and this should be confirmed with additional (e.g. molecular) tests
#' @rdname mdro
#' @aliases MDR XDR PDR BRMO 3MRGN 4MRGN
#' @export

38
R/mic.R
View File

@ -19,15 +19,15 @@
# Visit our website for more info: https://msberends.github.io/AMR. #
# ==================================================================== #
#' Transform input to minimum inhibitory concentrations
#' Transform input to minimum inhibitory concentrations (MIC)
#'
#' This transforms a vector to a new class [`mic`], which is an ordered [`factor`] with valid minimum inhibitory concentrations (MIC) as levels. Invalid MIC values will be translated as `NA` with a warning.
#' This transforms a vector to a new class [`mic`], which is an ordered [factor] with valid minimum inhibitory concentrations (MIC) as levels. Invalid MIC values will be translated as `NA` with a warning.
#' @inheritSection lifecycle Stable lifecycle
#' @rdname as.mic
#' @param x vector
#' @param na.rm a logical indicating whether missing values should be removed
#' @details To interpret MIC values as RSI values, use [as.rsi()] on MIC values. It supports guidelines from EUCAST and CLSI.
#' @return Ordered [`factor`] with new class [`mic`]
#' @return Ordered [factor] with additional class [`mic`]
#' @aliases mic
#' @export
#' @seealso [as.rsi()]
@ -55,7 +55,7 @@ as.mic <- function(x, na.rm = FALSE) {
if (is.mic(x)) {
x
} else {
x <- x %>% unlist()
x <- x %pm>% unlist()
if (na.rm == TRUE) {
x <- x[!is.na(x)]
}
@ -109,13 +109,13 @@ as.mic <- function(x, na.rm = FALSE) {
c(t(sapply(ops, function(x) paste0(x, c(10:98)[9:98 %% 2 == TRUE])))),
c(t(sapply(ops, function(x) paste0(x, sort(c(2 ^ c(7:10), 80 * c(2:12))))))))
na_before <- x[is.na(x) | x == ""] %>% length()
na_before <- x[is.na(x) | x == ""] %pm>% length()
x[!x %in% lvls] <- NA
na_after <- x[is.na(x) | x == ""] %>% length()
na_after <- x[is.na(x) | x == ""] %pm>% length()
if (na_before != na_after) {
list_missing <- x.bak[is.na(x) & !is.na(x.bak) & x.bak != ""] %>%
unique() %>%
list_missing <- x.bak[is.na(x) & !is.na(x.bak) & x.bak != ""] %pm>%
unique() %pm>%
sort()
list_missing <- paste0('"', list_missing, '"', collapse = ", ")
warning(na_after - na_before, " results truncated (",
@ -196,15 +196,15 @@ print.mic <- function(x, ...) {
#' @noRd
summary.mic <- function(object, ...) {
x <- object
n_total <- x %>% length()
n_total <- length(x)
x <- x[!is.na(x)]
n <- x %>% length()
c(
"Class" = "mic",
"<NA>" = n_total - n,
"Min." = sort(x)[1] %>% as.character(),
"Max." = sort(x)[n] %>% as.character()
)
n <- length(x)
value <- c("Class" = "mic",
"<NA>" = n_total - n,
"Min." = as.character(sort(x)[1]),
"Max." = as.character(sort(x)[n]))
class(value) <- c("summaryDefault", "table")
value
}
#' @method plot mic
@ -283,7 +283,7 @@ barplot.mic <- function(height,
#' @export
#' @noRd
c.mic <- function(x, ...) {
y <- NextMethod()
attributes(y) <- attributes(x)
y
y <- unlist(lapply(list(...), as.character))
x <- as.character(x)
as.mic(c(x, y))
}

122
R/mo.R
View File

@ -23,7 +23,7 @@
#'
#' Use this function to determine a valid microorganism ID ([`mo`]). Determination is done using intelligent rules and the complete taxonomic kingdoms Bacteria, Chromista, Protozoa, Archaea and most microbial species from the kingdom Fungi (see Source). The input can be almost anything: a full name (like `"Staphylococcus aureus"`), an abbreviated name (like `"S. aureus"`), an abbreviation known in the field (like `"MRSA"`), or just a genus. Please see *Examples*.
#' @inheritSection lifecycle Stable lifecycle
#' @param x a character vector or a [`data.frame`] with one or two columns
#' @param x a character vector or a [data.frame] with one or two columns
#' @param Becker a logical to indicate whether *Staphylococci* should be categorised into coagulase-negative *Staphylococci* ("CoNS") and coagulase-positive *Staphylococci* ("CoPS") instead of their own species, according to Karsten Becker *et al.* (1,2). Note that this does not include species that were newly named after these publications, like *S. caeli*.
#'
#' This excludes *Staphylococcus aureus* at default, use `Becker = "all"` to also categorise *S. aureus* as "CoPS".
@ -31,7 +31,7 @@
#'
#' This excludes *Enterococci* at default (who are in group D), use `Lancefield = "all"` to also categorise all *Enterococci* as group D.
#' @param allow_uncertain a number between `0` (or `"none"`) and `3` (or `"all"`), or `TRUE` (= `2`) or `FALSE` (= `0`) to indicate whether the input should be checked for less probable results, please see *Details*
#' @param reference_df a [`data.frame`] to be used for extra reference when translating `x` to a valid [`mo`]. See [set_mo_source()] and [get_mo_source()] to automate the usage of your own codes (e.g. used in your analysis or organisation).
#' @param reference_df a [data.frame] to be used for extra reference when translating `x` to a valid [`mo`]. See [set_mo_source()] and [get_mo_source()] to automate the usage of your own codes (e.g. used in your analysis or organisation).
#' @param ignore_pattern a regular expression (case-insensitive) of which all matches in `x` must return `NA`. This can be convenient to exclude known non-relevant input and can also be set with the option `AMR_ignore_pattern`, e.g. `options(AMR_ignore_pattern = "(not reported|contaminated flora)")`.
#' @param language language to translate text like "no growth", which defaults to the system language (see [get_locale()])
#' @param ... other parameters passed on to functions
@ -69,7 +69,7 @@
#' 2. Taxonomic kingdom: the function starts with determining Bacteria, then Fungi, then Protozoa, then others;
#' 3. Breakdown of input values to identify possible matches.
#'
#' This will lead to the effect that e.g. `"E. coli"` (a microorganism highly prevalent in humans) will return the microbial ID of *Escherichia coli* and not *Entamoeba coli* (a microorganism less prevalent in humans), although the latter would alphabetically come first.
#' This will lead to the effect that e.g. `"E. coli"` (a microorganism highly prevalent in humans) will return the microbial ID of *Escherichia coli* and not *Entamoeba coli* (a microorganism less prevalent in humans), although the latter would alphabetically come first.
#'
#' ## Coping with uncertain results
#'
@ -87,9 +87,9 @@
#' - `"Fluoroquinolone-resistant Neisseria gonorrhoeae"`. The first word will be stripped, after which the function will try to find a match. A warning will be thrown that the result *Neisseria gonorrhoeae* (``r as.mo("Neisseria gonorrhoeae")``) needs review.
#'
#' There are three helper functions that can be run after using the [as.mo()] function:
#' - Use [mo_uncertainties()] to get a [`data.frame`] that prints in a pretty format with all taxonomic names that were guessed. The output contains a score that is based on the human pathogenic prevalence and the [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) between the user input and the full taxonomic name.
#' - Use [mo_failures()] to get a [`character`] [`vector`] with all values that could not be coerced to a valid value.
#' - Use [mo_renamed()] to get a [`data.frame`] with all values that could be coerced based on old, previously accepted taxonomic names.
#' - Use [mo_uncertainties()] to get a [data.frame] that prints in a pretty format with all taxonomic names that were guessed. The output contains the matching score for all matches (see *Background on matching score*).
#' - Use [mo_failures()] to get a [character] [vector] with all values that could not be coerced to a valid value.
#' - Use [mo_renamed()] to get a [data.frame] with all values that could be coerced based on old, previously accepted taxonomic names.
#'
#' ## Microbial prevalence of pathogens in humans
#'
@ -100,6 +100,21 @@
#' Group 2 consists of all microorganisms where the taxonomic phylum is Proteobacteria, Firmicutes, Actinobacteria or Sarcomastigophora, or where the taxonomic genus is *Aspergillus*, *Bacteroides*, *Candida*, *Capnocytophaga*, *Chryseobacterium*, *Cryptococcus*, *Elisabethkingia*, *Flavobacterium*, *Fusobacterium*, *Giardia*, *Leptotrichia*, *Mycoplasma*, *Prevotella*, *Rhodotorula*, *Treponema*, *Trichophyton* or *Ureaplasma*. This group consequently contains all less common and rare human pathogens.
#'
#' Group 3 (least prevalent microorganisms) consists of all other microorganisms. This group contains microorganisms most probably not found in humans.
#'
#' ## Background on matching scores
#' With ambiguous user input, the returned results are chosen based on their matching score using [mo_matching_score()]. This matching score is based on four parameters:
#'
#' 1. The prevalence \eqn{P} is categorised into group 1, 2 and 3 as stated above;
#' 2. A kingdom index \eqn{K} is set as follows: Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, and all others = 5;
#' 3. The level of uncertainty \eqn{U} needed to get to the result, as stated above (1 to 3);
#' 4. The [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) \eqn{L} is the distance between the user input and all taxonomic full names, with the text length of the user input being the maximum distance. A modified version of the Levenshtein distance \eqn{L'} based on the text length of the full name \eqn{F} is calculated as:
#'
#' \deqn{L' = F - \frac{0.5 \times L}{F}}{L' = F - (0.5 * L) / F}
#'
#' The final matching score \eqn{M} is calculated as:
#' \deqn{M = L' \times \frac{1}{P \times K} * \frac{1}{U}}{M = L' * (1 / (P * K)) * (1 / U)}
#'
#' All matches are sorted descending on their matching score and for all user input values, the top match will be returned.
#' @inheritSection catalogue_of_life Catalogue of Life
# (source as a section here, so it can be inherited by other man pages:)
#' @section Source:
@ -108,8 +123,8 @@
#' 3. Lancefield RC **A serological differentiation of human and other groups of hemolytic streptococci**. 1933. J Exp Med. 57(4): 57195. <https://dx.doi.org/10.1084/jem.57.4.571>
#' 4. Catalogue of Life: Annual Checklist (public online taxonomic database), <http://www.catalogueoflife.org> (check included annual version with [catalogue_of_life_version()]).
#' @export
#' @return A [`character`] [`vector`] with additional class [`mo`]
#' @seealso [microorganisms] for the [`data.frame`] that is being used to determine ID's.
#' @return A [character] [vector] with additional class [`mo`]
#' @seealso [microorganisms] for the [data.frame] that is being used to determine ID's.
#'
#' The [mo_property()] functions (like [mo_genus()], [mo_gramstain()]) to get properties based on the returned code.
#' @inheritSection AMR Reference data publicly available
@ -218,7 +233,7 @@ as.mo <- function(x,
# has valid own reference_df
# (data.table not faster here)
reference_df <- reference_df %>% filter(!is.na(mo))
reference_df <- reference_df %pm>% pm_filter(!is.na(mo))
# keep only first two columns, second must be mo
if (colnames(reference_df)[1] == "mo") {
reference_df <- reference_df[, c(2, 1)]
@ -231,9 +246,9 @@ as.mo <- function(x,
reference_df[] <- lapply(reference_df, as.character)
)
suppressWarnings(
y <- data.frame(x = x, stringsAsFactors = FALSE) %>%
left_join(reference_df, by = "x") %>%
pull("mo")
y <- data.frame(x = x, stringsAsFactors = FALSE) %pm>%
pm_left_join(reference_df, by = "x") %pm>%
pm_pull("mo")
)
} else if (all(x %in% MO_lookup$mo)
@ -315,7 +330,10 @@ exec_as.mo <- function(x,
res_df <- haystack[which(eval(substitute(needle), envir = haystack, enclos = parent.frame())), , drop = FALSE]
if (NROW(res_df) > 1 & uncertainty != -1) {
# sort the findings on matching score
res_df <- res_df[order(mo_matching_score(input, res_df[, "fullname", drop = TRUE]), decreasing = TRUE), , drop = FALSE]
scores <- mo_matching_score(x = input,
fullname = res_df[, "fullname", drop = TRUE],
uncertainty = uncertainty)
res_df <- res_df[order(scores, decreasing = TRUE), , drop = FALSE]
}
res <- as.character(res_df[, column, drop = TRUE])
if (length(res) == 0) {
@ -402,7 +420,7 @@ exec_as.mo <- function(x,
if (!is.null(reference_df)) {
mo_source_isvalid(reference_df)
reference_df <- reference_df %>% filter(!is.na(mo))
reference_df <- reference_df %pm>% pm_filter(!is.na(mo))
# keep only first two columns, second must be mo
if (colnames(reference_df)[1] == "mo") {
reference_df <- reference_df[, c(2, 1)]
@ -580,7 +598,7 @@ exec_as.mo <- function(x,
}
if (initial_search == TRUE) {
progress <- progress_estimated(n = length(x), n_min = 25) # start if n >= 25
progress <- progress_ticker(n = length(x), n_min = 25) # start if n >= 25
on.exit(close(progress))
}
@ -955,9 +973,9 @@ exec_as.mo <- function(x,
if (nchar(g.x_backup_without_spp) <= 6) {
x_length <- nchar(g.x_backup_without_spp)
x_split <- paste0("^",
g.x_backup_without_spp %>% substr(1, x_length / 2),
g.x_backup_without_spp %pm>% substr(1, x_length / 2),
".* ",
g.x_backup_without_spp %>% substr((x_length / 2) + 1, x_length))
g.x_backup_without_spp %pm>% substr((x_length / 2) + 1, x_length))
found <- lookup(fullname_lower %like_case% x_split,
haystack = data_to_check)
if (!is.na(found)) {
@ -1149,7 +1167,7 @@ exec_as.mo <- function(x,
if (isTRUE(debug)) {
cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (6) try to strip off half an element from end and check the remains\n"))
}
x_strip <- a.x_backup %>% strsplit("[ .]") %>% unlist()
x_strip <- a.x_backup %pm>% strsplit("[ .]") %pm>% unlist()
if (length(x_strip) > 1) {
for (i in seq_len(length(x_strip) - 1)) {
lastword <- x_strip[length(x_strip) - i + 1]
@ -1232,7 +1250,7 @@ exec_as.mo <- function(x,
if (isTRUE(debug)) {
cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (9) try to strip off one element from start and check the remains (only allow >= 2-part name outcome)\n"))
}
x_strip <- a.x_backup %>% strsplit("[ .]") %>% unlist()
x_strip <- a.x_backup %pm>% strsplit("[ .]") %pm>% unlist()
if (length(x_strip) > 1 & nchar(g.x_backup_without_spp) >= 6) {
for (i in 2:(length(x_strip))) {
x_strip_collapsed <- paste(x_strip[i:length(x_strip)], collapse = " ")
@ -1267,7 +1285,7 @@ exec_as.mo <- function(x,
if (isTRUE(debug)) {
cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (10) try to strip off one element from start and check the remains (any text size)\n"))
}
x_strip <- a.x_backup %>% strsplit("[ .]") %>% unlist()
x_strip <- a.x_backup %pm>% strsplit("[ .]") %pm>% unlist()
if (length(x_strip) > 1 & nchar(g.x_backup_without_spp) >= 6) {
for (i in 2:(length(x_strip))) {
x_strip_collapsed <- paste(x_strip[i:length(x_strip)], collapse = " ")
@ -1398,16 +1416,16 @@ exec_as.mo <- function(x,
if (length(failures) > 0 & initial_search == TRUE) {
options(mo_failures = sort(unique(failures)))
plural <- c("value", "it", "was")
if (n_distinct(failures) > 1) {
if (pm_n_distinct(failures) > 1) {
plural <- c("values", "them", "were")
}
x_input_clean <- trimws2(x_input)
total_failures <- length(x_input_clean[as.character(x_input_clean) %in% as.character(failures) & !x_input %in% c(NA, NULL, NaN)])
total_n <- length(x_input[!x_input %in% c(NA, NULL, NaN)])
msg <- paste0(nr2char(n_distinct(failures)), " unique ", plural[1],
msg <- paste0(nr2char(pm_n_distinct(failures)), " unique ", plural[1],
" (covering ", percentage(total_failures / total_n),
") could not be coerced and ", plural[3], " considered 'unknown'")
if (n_distinct(failures) <= 10) {
if (pm_n_distinct(failures) <= 10) {
msg <- paste0(msg, ": ", paste('"', unique(failures), '"', sep = "", collapse = ", "))
}
msg <- paste0(msg,
@ -1421,7 +1439,7 @@ exec_as.mo <- function(x,
}
# handling uncertainties ----
if (NROW(uncertainties) > 0 & initial_search == TRUE) {
uncertainties <- as.list(distinct(uncertainties, input, .keep_all = TRUE))
uncertainties <- as.list(pm_distinct(uncertainties, input, .keep_all = TRUE))
options(mo_uncertainties = uncertainties)
plural <- c("", "it", "was")
@ -1633,8 +1651,8 @@ freq.mo <- function(x, ...) {
decimal.mark = "."),
" (", percentage(sum(grams == "Gram-positive", na.rm = TRUE) / length(grams), digits = digits),
")"),
`No. of genera` = n_distinct(mo_genus(x_noNA, language = NULL)),
`No. of species` = n_distinct(paste(mo_genus(x_noNA, language = NULL),
`No. of genera` = pm_n_distinct(mo_genus(x_noNA, language = NULL)),
`No. of species` = pm_n_distinct(paste(mo_genus(x_noNA, language = NULL),
mo_species(x_noNA, language = NULL)))))
}
@ -1662,7 +1680,7 @@ summary.mo <- function(object, ...) {
top_3 <- top[order(-top$n), 1][1:3]
value <- c("Class" = "mo",
"<NA>" = length(x[is.na(x)]),
"Unique" = n_distinct(x[!is.na(x)]),
"Unique" = pm_n_distinct(x[!is.na(x)]),
"#1" = top_3[1],
"#2" = top_3[2],
"#3" = top_3[3])
@ -1752,14 +1770,16 @@ print.mo_uncertainties <- function(x, ...) {
if (NROW(x) == 0) {
return(NULL)
}
cat(font_blue(strwrap(c("Scores are based on human pathogenic prevalence and the resemblance between the input and the full taxonomic name. Furthermore, an indication is given about the likelihood of the match - the more transformations are needed for coercion, the more unlikely the result.")), collapse = "\n"))
cat(font_blue(strwrap(c("Matching scores are based on human pathogenic prevalence and the resemblance between the input and the full taxonomic name. Furthermore, an indication is given about the probability of the match - the more transformations are needed for coercion, the more improbable the result.")), collapse = "\n"))
cat("\n")
msg <- ""
for (i in seq_len(nrow(x))) {
if (x[i, ]$candidates != "") {
candidates <- unlist(strsplit(x[i, ]$candidates, ", ", fixed = TRUE))
scores <- mo_matching_score(x[i, ]$input, candidates) * (1 / x[i, ]$uncertainty)
scores <- mo_matching_score(x = x[i, ]$input,
fullname = candidates,
uncertainty = x[i, ]$uncertainty)
# sort on descending scores
candidates <- candidates[order(1 - scores)]
n_candidates <- length(candidates)
@ -1768,23 +1788,26 @@ print.mo_uncertainties <- function(x, ...) {
candidates <- paste(candidates, collapse = ", ")
# align with input after arrow
candidates <- paste0("\n", strrep(" ", nchar(x[i, ]$input) + 6),
"Less likely", ifelse(n_candidates == 25, " (max 25)", ""), ": ", candidates)
"Also matched", ifelse(n_candidates == 25, " (max 25)", ""), ": ", candidates)
} else {
candidates <- ""
}
if (x[i, ]$uncertainty == 1) {
uncertainty_interpretation <- font_green("* VERY LIKELY *")
uncertainty_interpretation <- font_green("* MOST PROBABLE *")
} else if (x[i, ]$uncertainty == 1) {
uncertainty_interpretation <- font_yellow("* LIKELY *")
uncertainty_interpretation <- font_yellow("* PROBABLE *")
} else {
uncertainty_interpretation <- font_red("* UNLIKELY *")
uncertainty_interpretation <- font_red("* IMPROBABLE *")
}
msg <- paste(msg,
paste0('"', x[i, ]$input, '" -> ',
paste0(font_bold(font_italic(x[i, ]$fullname)),
ifelse(!is.na(x[i, ]$renamed_to), paste(", renamed to", font_italic(x[i, ]$renamed_to)), ""),
" (", x[i, ]$mo,
", score: ", trimws(percentage(mo_matching_score(x[i, ]$input, x[i, ]$fullname) * (1 / x[i, ]$uncertainty), digits = 1)),
", matching score = ", trimws(percentage(mo_matching_score(x = x[i, ]$input,
fullname = x[i, ]$fullname,
uncertainty = x[i, ]$uncertainty),
digits = 1)),
") "),
uncertainty_interpretation,
candidates),
@ -1800,7 +1823,7 @@ mo_renamed <- function() {
if (is.null(items)) {
items <- data.frame()
} else {
items <- distinct(items, old_name, .keep_all = TRUE)
items <- pm_distinct(items, old_name, .keep_all = TRUE)
}
structure(.Data = items,
class = c("mo_renamed", "data.frame"))
@ -1872,27 +1895,6 @@ load_mo_failures_uncertainties_renamed <- function(metadata) {
options("mo_renamed" = metadata$renamed)
}
mo_matching_score <- function(input, fullname) {
# fullname is always a taxonomically valid full name
levenshtein <- double(length = length(input))
if (length(fullname) == 1) {
fullname <- rep(fullname, length(input))
}
if (length(input) == 1) {
input <- rep(input, length(fullname))
}
for (i in seq_len(length(input))) {
# determine Levenshtein distance, but maximise to nchar of fullname
levenshtein[i] <- min(as.double(utils::adist(input[i], fullname[i], ignore.case = FALSE)),
nchar(fullname[i]))
}
# self-made score between 0 and 1 (for % certainty, so 0 means huge distance, 1 means no distance)
dist <- (nchar(fullname) - 0.5 * levenshtein) / nchar(fullname)
index_in_MO_lookup <- tryCatch((nrow(MO_lookup) - match(fullname, MO_lookup$fullname)) / nrow(MO_lookup),
error = function(e) rep(1, length(fullname)))
(0.25 * dist) + (0.75 * index_in_MO_lookup)
}
trimws2 <- function(x) {
trimws(gsub("[\\s]+", " ", x, perl = TRUE))
}
@ -1903,13 +1905,13 @@ parse_and_convert <- function(x) {
if (NCOL(x) > 2) {
stop("a maximum of two columns is allowed", call. = FALSE)
} else if (NCOL(x) == 2) {
# support Tidyverse selection like: df %>% select(colA, colB)
# support Tidyverse selection like: df %pm>% select(colA, colB)
# paste these columns together
x <- as.data.frame(x, stringsAsFactors = FALSE)
colnames(x) <- c("A", "B")
x <- paste(x$A, x$B)
} else {
# support Tidyverse selection like: df %>% select(colA)
# support Tidyverse selection like: df %pm>% select(colA)
x <- as.data.frame(x, stringsAsFactors = FALSE)[[1]]
}
}
@ -1950,8 +1952,8 @@ replace_ignore_pattern <- function(x, ignore_pattern) {
}
left_join_MO_lookup <- function(x, ...) {
left_join(x = x, y = MO_lookup, ...)
pm_left_join(x = x, y = MO_lookup, ...)
}
left_join_MO.old_lookup <- function(x, ...) {
left_join(x = x, y = MO.old_lookup, ...)
pm_left_join(x = x, y = MO.old_lookup, ...)
}

63
R/mo_matching_score.R Executable file
View File

@ -0,0 +1,63 @@
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# SOURCE #
# https://github.com/msberends/AMR #
# #
# LICENCE #
# (c) 2018-2020 Berends MS, Luz CF et al. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
# Visit our website for more info: https://msberends.github.io/AMR. #
# ==================================================================== #
#' Calculate the matching score for microorganisms
#'
#' This helper function is used by [as.mo()] to determine the most probable match of taxonomic records, based on user input.
#' @param x Any user input value(s)
#' @param fullname A full taxonomic name, that exists in [`microorganisms$fullname`][microorganisms]
#' @param uncertainty The level of uncertainty set in [as.mo()], see `allow_uncertain` in that function (here, it defaults to 1, but is automatically determined in [as.mo()] based on the number of transformations needed to get to a result)
#' @details The matching score is based on four parameters:
#'
#' 1. A human pathogenic prevalence \eqn{P}, that is categorised into group 1, 2 and 3 (see [as.mo()]);
#' 2. A kingdom index \eqn{K} is set as follows: Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, and all others = 5;
#' 3. The level of uncertainty \eqn{U} that is needed to get to a result (1 to 3, see [as.mo()]);
#' 4. The [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) \eqn{L} is the distance between the user input and all taxonomic full names, with the text length of the user input being the maximum distance. A modified version of the Levenshtein distance \eqn{L'} based on the text length of the full name \eqn{F} is calculated as:
#'
#' \deqn{L' = F - \frac{0.5 \times L}{F}}{L' = F - (0.5 * L) / F}
#'
#' The final matching score \eqn{M} is calculated as:
#' \deqn{M = L' \times \frac{1}{P \times K} * \frac{1}{U}}{M = L' * (1 / (P * K)) * (1 / U)}
#'
#' @export
#' @examples
#' as.mo("E. coli")
#' mo_uncertainties()
mo_matching_score <- function(x, fullname, uncertainty = 1) {
# fullname is always a taxonomically valid full name
levenshtein <- double(length = length(x))
if (length(fullname) == 1) {
fullname <- rep(fullname, length(x))
}
if (length(x) == 1) {
x <- rep(x, length(fullname))
}
for (i in seq_len(length(x))) {
# determine Levenshtein distance, but maximise to nchar of fullname
levenshtein[i] <- min(as.double(utils::adist(x[i], fullname[i], ignore.case = FALSE)),
nchar(fullname[i]))
}
# self-made score between 0 and 1 (for % certainty, so 0 means huge distance, 1 means no distance)
dist <- (nchar(fullname) - 0.5 * levenshtein) / nchar(fullname)
prevalence_kingdom_index <- tryCatch(MO_lookup[match(fullname, MO_lookup$fullname), "prevalence_kingdom_index", drop = TRUE],
error = function(e) rep(1, length(fullname)))
dist * (1 / prevalence_kingdom_index) * (1 / uncertainty)
}

View File

@ -47,11 +47,11 @@
#' @rdname mo_property
#' @name mo_property
#' @return
#' - An [`integer`] in case of [mo_year()]
#' - A [`list`] in case of [mo_taxonomy()] and [mo_info()]
#' - A named [`character`] in case of [mo_url()]
#' - A [`double`] in case of [mo_snomed()]
#' - A [`character`] in all other cases
#' - An [integer] in case of [mo_year()]
#' - A [list] in case of [mo_taxonomy()] and [mo_info()]
#' - A named [character] in case of [mo_url()]
#' - A [double] in case of [mo_snomed()]
#' - A [character] in all other cases
#' @export
#' @seealso [microorganisms]
#' @inheritSection AMR Reference data publicly available
@ -378,8 +378,8 @@ mo_url <- function(x, open = FALSE, language = get_locale(), ...) {
mo_names <- mo_name(mo)
metadata <- get_mo_failures_uncertainties_renamed()
df <- data.frame(mo, stringsAsFactors = FALSE) %>%
left_join(select(microorganisms, mo, source, species_id), by = "mo")
df <- data.frame(mo, stringsAsFactors = FALSE) %pm>%
pm_left_join(pm_select(microorganisms, mo, source, species_id), by = "mo")
df$url <- ifelse(df$source == "CoL",
paste0(catalogue_of_life$url_CoL, "details/species/id/", df$species_id, "/"),
ifelse(df$source == "DSMZ",

View File

@ -29,11 +29,13 @@
#' @rdname mo_source
#' @name mo_source
#' @aliases set_mo_source get_mo_source
#' @details The reference file can be a text file seperated with commas (CSV) or tabs or pipes, an Excel file (either 'xls' or 'xlsx' format) or an R object file (extension '.rds'). To use an Excel file, you need to have the `readxl` package installed.
#' @details The reference file can be a text file separated with commas (CSV) or tabs or pipes, an Excel file (either 'xls' or 'xlsx' format) or an R object file (extension '.rds'). To use an Excel file, you will need to have the `readxl` package installed.
#'
#' [set_mo_source()] will check the file for validity: it must be a [`data.frame`], must have a column named `"mo"` which contains values from [`microorganisms$mo`][microorganisms] and must have a reference column with your own defined values. If all tests pass, [set_mo_source()] will read the file into R and export it to `"~/.mo_source.rds"`. This compressed data file will then be used at default for MO determination (function [as.mo()] and consequently all `mo_*` functions like [mo_genus()] and [mo_gramstain()]). The location of the original file will be saved as option with `options(mo_source = path)`. Its timestamp will be saved with `options(mo_source_datetime = ...)`.
#'
#' [get_mo_source()] will return the data set by reading `"~/.mo_source.rds"` with [readRDS()]. If the original file has changed (the file defined with `path`), it will call [set_mo_source()] to update the data file automatically.
#' [set_mo_source()] will check the file for validity: it must be a [data.frame], must have a column named `"mo"` which contains values from [`microorganisms$mo`][microorganisms] and must have a reference column with your own defined values. If all tests pass, [set_mo_source()] will read the file into R and export it to `"~/.mo_source.rds"` after the user **specifically confirms and allows** that this file will be created. For this reason, this function only works in interactive sessions.
#'
#' The created compressed data file `"~/.mo_source.rds"` will be used at default for MO determination (function [as.mo()] and consequently all `mo_*` functions like [mo_genus()] and [mo_gramstain()]). The location of the original file will be saved as an R option with `options(mo_source = path)`. Its timestamp will be saved with `options(mo_source_datetime = ...)`.
#'
#' The function [get_mo_source()] will return the data set by reading `"~/.mo_source.rds"` with [readRDS()]. If the original file has changed (by checking the aforementioned options `mo_source` and `mo_source_datetime`), it will call [set_mo_source()] to update the data file automatically.
#'
#' Reading an Excel file (`.xlsx`) with only one row has a size of 8-9 kB. The compressed file created with [set_mo_source()] will then have a size of 0.1 kB and can be read by [get_mo_source()] in only a couple of microseconds (millionths of a second).
#'

View File

@ -78,7 +78,7 @@ pca <- function(x,
error = function(e) stop(e$message, call. = FALSE))
if (length(new_list[[i]]) == 1) {
if (is.character(new_list[[i]]) & new_list[[i]] %in% colnames(x)) {
# this is to support quoted variables: df %>% pca("mycol1", "mycol2")
# this is to support quoted variables: df %pm>% pca("mycol1", "mycol2")
new_list[[i]] <- x[, new_list[[i]]]
} else {
# remove item - it's a parameter like `center`
@ -102,7 +102,7 @@ pca <- function(x,
x <- cbind(x.bak[, sapply(x.bak, function(y) !is.numeric(y) & !all(is.na(y))), drop = FALSE], x)
}
x <- ungroup(x) # would otherwise select the grouping vars
x <- pm_ungroup(x) # would otherwise select the grouping vars
x <- x[rowSums(is.na(x)) == 0, ] # remove columns containing NAs
pca_data <- x[, which(sapply(x, function(x) is.numeric(x)))]

View File

@ -29,7 +29,7 @@
#' @param minimum the minimum allowed number of available (tested) isolates. Any isolate count lower than `minimum` will return `NA` with a warning. The default number of `30` isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.
#' @param as_percent a logical to indicate whether the output must be returned as a hundred fold with % sign (a character). A value of `0.123456` will then be returned as `"12.3%"`.
#' @param only_all_tested (for combination therapies, i.e. using more than one variable for `...`): a logical to indicate that isolates must be tested for all antibiotics, see section *Combination therapy* below
#' @param data a [`data.frame`] containing columns with class [`rsi`] (see [as.rsi()])
#' @param data a [data.frame] containing columns with class [`rsi`] (see [as.rsi()])
#' @param translate_ab a column name of the [antibiotics] data set to translate the antibiotic abbreviations to, using [ab_property()]. Use a value
#' @inheritParams ab_property
#' @param combine_SI a logical to indicate whether all values of S and I must be merged into one, so the output only consists of S+I vs. R (susceptible vs. resistant). This used to be the parameter `combine_IR`, but this now follows the redefinition by EUCAST about the interpretion of I (increased exposure) in 2019, see section 'Interpretation of S, I and R' below. Default is `TRUE`.
@ -79,7 +79,7 @@
#' Using `only_all_tested` has no impact when only using one antibiotic as input.
#' @source **M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition**, 2014, *Clinical and Laboratory Standards Institute (CLSI)*. <https://clsi.org/standards/products/microbiology/documents/m39/>.
#' @seealso [AMR::count()] to count resistant and susceptible isolates.
#' @return A [`double`] or, when `as_percent = TRUE`, a [`character`].
#' @return A [double] or, when `as_percent = TRUE`, a [character].
#' @rdname proportion
#' @aliases portion
#' @name proportion

View File

@ -43,7 +43,7 @@
#' - `"binomial"` or `"binom"` or `"logit"`: a generalised linear regression model with binomial distribution
#' - `"loglin"` or `"poisson"`: a generalised log-linear regression model with poisson distribution
#' - `"lin"` or `"linear"`: a linear regression model
#' @return A [`data.frame`] with extra class [`resistance_predict`] with columns:
#' @return A [data.frame] with extra class [`resistance_predict`] with columns:
#' - `year`
#' - `value`, the same as `estimated` when `preserve_measurements = FALSE`, and a combination of `observed` and `estimated` otherwise
#' - `se_min`, the lower bound of the standard error with a minimum of `0` (so the standard error will never go below 0%)
@ -134,7 +134,7 @@ resistance_predict <- function(x,
dots <- unlist(list(...))
if (length(dots) != 0) {
# backwards compatibility with old parameters
dots.names <- dots %>% names()
dots.names <- dots %pm>% names()
if ("tbl" %in% dots.names) {
x <- dots[which(dots.names == "tbl")]
}
@ -264,8 +264,8 @@ resistance_predict <- function(x,
observations = df$R + df$S,
observed = df$R / (df$R + df$S),
stringsAsFactors = FALSE)
df_prediction <- df_prediction %>%
left_join(df_observations, by = "year")
df_prediction <- df_prediction %pm>%
pm_left_join(df_observations, by = "year")
df_prediction$estimated <- df_prediction$value
if (preserve_measurements == TRUE) {

83
R/rsi.R
View File

@ -215,8 +215,9 @@ is.rsi.eligible <- function(x, threshold = 0.05) {
as.rsi.default <- function(x, ...) {
if (is.rsi(x)) {
x
} else if (identical(levels(x), c("S", "I", "R"))) {
structure(x, class = c("rsi", "ordered", "factor"))
} else if (all(is.na(x)) || identical(levels(x), c("S", "I", "R"))) {
structure(.Data = factor(x, levels = c("S", "I", "R"), ordered = TRUE),
class = c("rsi", "ordered", "factor"))
} else if (inherits(x, "integer") & all(x %in% c(1:3, NA))) {
x[x == 1] <- "S"
x[x == 2] <- "I"
@ -263,8 +264,8 @@ as.rsi.default <- function(x, ...) {
if (!isFALSE(list(...)$warn)) { # so as.rsi(..., warn = FALSE) will never throw a warning
if (na_before != na_after) {
list_missing <- x.bak[is.na(x) & !is.na(x.bak) & x.bak != ""] %>%
unique() %>%
list_missing <- x.bak[is.na(x) & !is.na(x.bak) & x.bak != ""] %pm>%
unique() %pm>%
sort()
list_missing <- paste0('"', list_missing, '"', collapse = ", ")
warning(na_after - na_before, " results truncated (",
@ -324,7 +325,7 @@ as.rsi.mic <- function(x,
mo_coerced <- suppressWarnings(as.mo(mo))
guideline_coerced <- get_guideline(guideline)
if (is.na(ab_coerced)) {
message(font_red(paste0("Unknown drug: `", font_bold(ab), "`. Rename this column to a drug name or code, and check the output with as.ab().")))
message(font_red(paste0("Returning NAs for unknown drug: `", font_bold(ab), "`. Rename this column to a drug name or code, and check the output with as.ab().")))
return(as.rsi(rep(NA, length(x))))
}
if (length(mo_coerced) == 1) {
@ -394,7 +395,7 @@ as.rsi.disk <- function(x,
mo_coerced <- suppressWarnings(as.mo(mo))
guideline_coerced <- get_guideline(guideline)
if (is.na(ab_coerced)) {
message(font_red(paste0("Unknown drug: `", font_bold(ab), "`. Rename this column to a drug name or code, and check the output with as.ab().")))
message(font_red(paste0("Returning NAs for unknown drug: `", font_bold(ab), "`. Rename this column to a drug name or code, and check the output with as.ab().")))
return(as.rsi(rep(NA, length(x))))
}
if (length(mo_coerced) == 1) {
@ -509,15 +510,15 @@ as.rsi.data.frame <- function(x,
for (i in seq_len(length(ab_cols))) {
if (types[i] == "mic") {
x[, ab_cols[i]] <- as.rsi.mic(x = x %>% pull(ab_cols[i]),
mo = x %>% pull(col_mo),
x[, ab_cols[i]] <- as.rsi.mic(x = x %pm>% pm_pull(ab_cols[i]),
mo = x %pm>% pm_pull(col_mo),
ab = ab_cols[i],
guideline = guideline,
uti = uti,
conserve_capped_values = conserve_capped_values)
} else if (types[i] == "disk") {
x[, ab_cols[i]] <- as.rsi.disk(x = x %>% pull(ab_cols[i]),
mo = x %>% pull(col_mo),
x[, ab_cols[i]] <- as.rsi.disk(x = x %pm>% pm_pull(ab_cols[i]),
mo = x %pm>% pm_pull(col_mo),
ab = ab_cols[i],
guideline = guideline,
uti = uti)
@ -554,6 +555,8 @@ exec_as.rsi <- function(method,
conserve_capped_values,
add_intrinsic_resistance) {
metadata_mo <- get_mo_failures_uncertainties_renamed()
x_bak <- data.frame(x_mo = paste0(x, mo))
df <- unique(data.frame(x, mo), stringsAsFactors = FALSE)
x <- df$x
@ -582,7 +585,7 @@ exec_as.rsi <- function(method,
new_rsi <- rep(NA_character_, length(x))
ab_param <- ab
trans <- rsi_translation %>%
trans <- rsi_translation %pm>%
subset(guideline == guideline_coerced & method == method_param & ab == ab_param)
trans$lookup <- paste(trans$mo, trans$ab)
@ -614,7 +617,7 @@ exec_as.rsi <- function(method,
}
}
get_record <- trans %>%
get_record <- trans %pm>%
# no subsetting to UTI for now
subset(lookup %in% c(lookup_mo[i],
lookup_genus[i],
@ -625,14 +628,14 @@ exec_as.rsi <- function(method,
lookup_other[i]))
if (isTRUE(uti[i])) {
get_record <- get_record %>%
get_record <- get_record %pm>%
# be as specific as possible (i.e. prefer species over genus):
# desc(uti) = TRUE on top and FALSE on bottom
arrange(desc(uti), desc(nchar(mo))) # 'uti' is a column in data set 'rsi_translation'
# pm_desc(uti) = TRUE on top and FALSE on bottom
pm_arrange(pm_desc(uti), pm_desc(nchar(mo))) # 'uti' is a column in data set 'rsi_translation'
} else {
get_record <- get_record %>%
filter(uti == FALSE) %>% # 'uti' is a column in rsi_translation
arrange(desc(nchar(mo)))
get_record <- get_record %pm>%
pm_filter(uti == FALSE) %pm>% # 'uti' is a column in rsi_translation
pm_arrange(pm_desc(nchar(mo)))
}
get_record <- get_record[1L, ]
@ -643,29 +646,43 @@ exec_as.rsi <- function(method,
mic_input <- x[i]
mic_S <- as.mic(get_record$breakpoint_S)
mic_R <- as.mic(get_record$breakpoint_R)
new_rsi[i] <- ifelse(isTRUE(conserve_capped_values) & mic_input %like% "^<[0-9]", "S",
ifelse(isTRUE(conserve_capped_values) & mic_input %like% "^>[0-9]", "R",
ifelse(isTRUE(which(levels(mic_input) == mic_input) <= which(levels(mic_S) == mic_S)), "S",
ifelse(isTRUE(which(levels(mic_input) == mic_input) >= which(levels(mic_R) == mic_R)), "R",
ifelse(!is.na(get_record$breakpoint_S) & !is.na(get_record$breakpoint_R), "I",
NA_character_)))))
new_rsi[i] <- quick_case_when(isTRUE(conserve_capped_values) & mic_input %like% "^<[0-9]" ~ "S",
isTRUE(conserve_capped_values) & mic_input %like% "^>[0-9]" ~ "R",
# start interpreting: EUCAST uses <= S and > R, CLSI uses <=S and >= R
isTRUE(which(levels(mic_input) == mic_input) <= which(levels(mic_S) == mic_S)) ~ "S",
guideline_coerced %like% "ECUAST" &
isTRUE(which(levels(mic_input) == mic_input) > which(levels(mic_R) == mic_R)) ~ "R",
guideline_coerced %like% "CLSI" &
isTRUE(which(levels(mic_input) == mic_input) >= which(levels(mic_R) == mic_R)) ~ "R",
# return "I" when not match the bottom or top
!is.na(get_record$breakpoint_S) & !is.na(get_record$breakpoint_R) ~ "I",
# and NA otherwise
TRUE ~ NA_character_)
} else if (method == "disk") {
new_rsi[i] <- ifelse(isTRUE(as.double(x[i]) >= as.double(get_record$breakpoint_S)), "S",
ifelse(isTRUE(as.double(x[i]) <= as.double(get_record$breakpoint_R)), "R",
ifelse(!is.na(get_record$breakpoint_S) & !is.na(get_record$breakpoint_R), "I",
NA_character_)))
new_rsi[i] <- quick_case_when(isTRUE(as.double(x[i]) >= as.double(get_record$breakpoint_S)) ~ "S",
# start interpreting: EUCAST uses >= S and < R, CLSI uses >=S and <= R
guideline_coerced %like% "ECUAST" &
isTRUE(as.double(x[i]) < as.double(get_record$breakpoint_R)) ~ "R",
guideline_coerced %like% "CLSI" &
isTRUE(as.double(x[i]) <= as.double(get_record$breakpoint_R)) ~ "R",
# return "I" when not match the bottom or top
!is.na(get_record$breakpoint_S) & !is.na(get_record$breakpoint_R) ~ "I",
# and NA otherwise
TRUE ~ NA_character_)
}
}
}
new_rsi <- x_bak %>%
left_join(data.frame(x_mo = paste0(df$x, df$mo), new_rsi), by = "x_mo") %>%
pull(new_rsi)
new_rsi <- x_bak %pm>%
pm_left_join(data.frame(x_mo = paste0(df$x, df$mo), new_rsi), by = "x_mo") %pm>%
pm_pull(new_rsi)
if (warned == FALSE) {
message(font_green("OK."))
}
load_mo_failures_uncertainties_renamed(metadata_mo)
structure(.Data = factor(new_rsi, levels = c("S", "I", "R"), ordered = TRUE),
class = c("rsi", "ordered", "factor"))
}
@ -781,7 +798,7 @@ plot.rsi <- function(x,
# don't use as.rsi() here, it will confuse plot()
data$x <- factor(data$x, levels = c("S", "I", "R"), ordered = TRUE)
ymax <- if_else(max(data$s) > 95, 105, 100)
ymax <- pm_if_else(max(data$s) > 95, 105, 100)
# get plot() generic; this was moved from the 'graphics' pkg to the 'base' pkg in R 4.0.0
if (as.integer(R.Version()$major) >= 4) {
@ -799,7 +816,7 @@ plot.rsi <- function(x,
axes = axes,
...)
# x axis
axis(side = 1, at = 1:n_distinct(data$x), labels = levels(data$x), lwd = 0)
axis(side = 1, at = 1:pm_n_distinct(data$x), labels = levels(data$x), lwd = 0)
# y axis, 0-100%
axis(side = 2, at = seq(0, 100, 5))

View File

@ -54,7 +54,7 @@ rsi_calc <- function(...,
ndots <- length(dots)
if (is.data.frame(dots_df)) {
# data.frame passed with other columns, like: example_isolates %>% proportion_S(AMC, GEN)
# data.frame passed with other columns, like: example_isolates %pm>% proportion_S(AMC, GEN)
dots <- as.character(dots)
# remove first element, it's the data.frame
@ -64,12 +64,12 @@ rsi_calc <- function(...,
dots <- dots[2:length(dots)]
}
if (length(dots) == 0 | all(dots == "df")) {
# for complete data.frames, like example_isolates %>% select(AMC, GEN) %>% proportion_S()
# for complete data.frames, like example_isolates %pm>% select(AMC, GEN) %pm>% proportion_S()
# and the old rsi function, which has "df" as name of the first parameter
x <- dots_df
} else {
# get dots that are in column names already, and the ones that will be once evaluated using dots_df or global env
# this is to support susceptibility(example_isolates, AMC, dplyr::all_of(some_vector_with_AB_names))
# this is to support susceptibility(example_isolates, AMC, any_of(some_vector_with_AB_names))
dots <- c(dots[dots %in% colnames(dots_df)],
eval(parse(text = dots[!dots %in% colnames(dots_df)]), envir = dots_df, enclos = globalenv()))
dots_not_exist <- dots[!dots %in% colnames(dots_df)]
@ -77,14 +77,14 @@ rsi_calc <- function(...,
x <- dots_df[, dots, drop = FALSE]
}
} else if (ndots == 1) {
# only 1 variable passed (can also be data.frame), like: proportion_S(example_isolates$AMC) and example_isolates$AMC %>% proportion_S()
# only 1 variable passed (can also be data.frame), like: proportion_S(example_isolates$AMC) and example_isolates$AMC %pm>% proportion_S()
x <- dots_df
} else {
# multiple variables passed without pipe, like: proportion_S(example_isolates$AMC, example_isolates$GEN)
x <- NULL
try(x <- as.data.frame(dots, stringsAsFactors = FALSE), silent = TRUE)
if (is.null(x)) {
# support for example_isolates %>% group_by(hospital_id) %>% summarise(amox = susceptibility(GEN, AMX))
# support for example_isolates %pm>% group_by(hospital_id) %pm>% summarise(amox = susceptibility(GEN, AMX))
x <- as.data.frame(list(...), stringsAsFactors = FALSE)
}
}
@ -138,7 +138,7 @@ rsi_calc <- function(...,
}
if (print_warning == TRUE) {
warning("Increase speed by transforming to class <rsi> on beforehand: your_data %>% mutate_if(is.rsi.eligible, as.rsi)",
warning("Increase speed by transforming to class <rsi> on beforehand: your_data %pm>% mutate_if(is.rsi.eligible, as.rsi)",
call. = FALSE)
}
@ -187,9 +187,9 @@ rsi_calc_df <- function(type, # "proportion", "count" or "both"
translate_ab <- get_translate_ab(translate_ab)
# select only groups and antibiotics
if (has_groups(data)) {
if (pm_has_groups(data)) {
data_has_groups <- TRUE
groups <- setdiff(names(get_groups(data)), ".rows") # get_groups is from poorman.R
groups <- setdiff(names(pm_get_group_details(data)), ".rows")
data <- data[, c(groups, colnames(data)[sapply(data, is.rsi)]), drop = FALSE]
} else {
data_has_groups <- FALSE
@ -261,13 +261,14 @@ rsi_calc_df <- function(type, # "proportion", "count" or "both"
out
}
# support dplyr groups
apply_group <- function(.data, fn, groups, ...) {
grouped <- split(x = .data, f = lapply(groups, function(x, .data) as.factor(.data[, x]), .data))
# based on pm_apply_grouped_function
apply_group <- function(.data, fn, groups, drop = FALSE, ...) {
#groups <- get_groups(.data)
grouped <- pm_split_into_groups(.data, groups, drop)
res <- do.call(rbind, unname(lapply(grouped, fn, ...)))
if (any(groups %in% colnames(res))) {
class(res) <- c("grouped_data", class(res))
attr(res, "groups") <- groups[groups %in% colnames(res)]
res <- set_groups(res, groups[groups %in% colnames(res)])
}
res
}
@ -291,7 +292,7 @@ rsi_calc_df <- function(type, # "proportion", "count" or "both"
if (data_has_groups) {
# ordering by the groups and two more: "antibiotic" and "interpretation"
out <- ungroup(out[do.call("order", out[, seq_len(length(groups) + 2)]), ])
out <- pm_ungroup(out[do.call("order", out[, seq_len(length(groups) + 2)]), ])
} else {
out <- out[order(out$antibiotic, out$interpretation), ]
}

View File

@ -25,7 +25,7 @@
#'
#' When negative: the left tail is longer; the mass of the distribution is concentrated on the right of the figure. When positive: the right tail is longer; the mass of the distribution is concentrated on the left of the figure.
#' @inheritSection lifecycle Questioning lifecycle
#' @param x a vector of values, a [`matrix`] or a [`data.frame`]
#' @param x a vector of values, a [`matrix`] or a [data.frame]
#' @param na.rm a logical value indicating whether `NA` values should be stripped before the computation proceeds.
#' @seealso [kurtosis()]
#' @rdname skewness

19
R/zzz.R
View File

@ -20,6 +20,10 @@
# ==================================================================== #
.onLoad <- function(libname, pkgname) {
assign(x = "AB_lookup",
value = create_AB_lookup(),
envir = asNamespace("AMR"))
assign(x = "MO_lookup",
value = create_MO_lookup(),
envir = asNamespace("AMR"))
@ -60,14 +64,27 @@
"\n[ prevent his notice with suppressPackageStartupMessages(library(AMR)) or use options(AMR_silentstart = TRUE) ]")
}
create_AB_lookup <- function() {
AB_lookup <- AMR::antibiotics
AB_lookup$generalised_name <- generalise_antibiotic_name(AB_lookup$name)
AB_lookup$generalised_synonyms <- lapply(AB_lookup$synonyms, generalise_antibiotic_name)
AB_lookup$generalised_abbreviations <- lapply(AB_lookup$abbreviations, generalise_antibiotic_name)
AB_lookup$generalised_loinc <- lapply(AB_lookup$loinc, generalise_antibiotic_name)
AB_lookup
}
create_MO_lookup <- function() {
MO_lookup <- AMR::microorganisms
MO_lookup$kingdom_index <- 99
MO_lookup$kingdom_index <- NA_real_
MO_lookup[which(MO_lookup$kingdom == "Bacteria" | MO_lookup$mo == "UNKNOWN"), "kingdom_index"] <- 1
MO_lookup[which(MO_lookup$kingdom == "Fungi"), "kingdom_index"] <- 2
MO_lookup[which(MO_lookup$kingdom == "Protozoa"), "kingdom_index"] <- 3
MO_lookup[which(MO_lookup$kingdom == "Archaea"), "kingdom_index"] <- 4
# all the rest
MO_lookup[which(is.na(MO_lookup$kingdom_index)), "kingdom_index"] <- 5
MO_lookup$prevalence_kingdom_index <- MO_lookup$prevalence * MO_lookup$kingdom_index
# use this paste instead of `fullname` to work with Viridans Group Streptococci, etc.
MO_lookup$fullname_lower <- tolower(trimws(paste(MO_lookup$genus,