AMR/R/bug_drug_combinations.R

363 lines
15 KiB
R
Raw Normal View History

2019-08-25 22:53:22 +02:00
# ==================================================================== #
# TITLE #
2022-10-05 09:12:22 +02:00
# AMR: An R Package for Working with Antimicrobial Resistance Data #
2019-08-25 22:53:22 +02:00
# #
# SOURCE #
2020-07-08 14:48:06 +02:00
# https://github.com/msberends/AMR #
2019-08-25 22:53:22 +02:00
# #
2022-10-05 09:12:22 +02:00
# CITE AS #
# Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C #
# (2022). AMR: An R Package for Working with Antimicrobial Resistance #
# Data. Journal of Statistical Software, 104(3), 1-31. #
# doi:10.18637/jss.v104.i03 #
# #
2022-12-27 15:16:15 +01:00
# Developed at the University of Groningen and the University Medical #
# Center Groningen in The Netherlands, in collaboration with many #
# colleagues from around the world, see our website. #
2019-08-25 22:53:22 +02:00
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
2020-10-08 11:16:03 +02:00
# #
# Visit our website for the full manual and a complete tutorial about #
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
2019-08-25 22:53:22 +02:00
# ==================================================================== #
#' Determine Bug-Drug Combinations
2022-08-28 10:31:50 +02:00
#'
2021-07-04 15:26:50 +02:00
#' Determine antimicrobial resistance (AMR) of all bug-drug combinations in your data set where at least 30 (default) isolates are available per species. Use [format()] on the result to prettify it to a publishable/printable format, see *Examples*.
2019-08-25 22:53:22 +02:00
#' @inheritParams eucast_rules
#' @param combine_SI a [logical] to indicate whether values S and I should be summed, so resistance will be based on only R, defaults to `TRUE`
2021-05-12 18:15:03 +02:00
#' @param add_ab_group a [logical] to indicate where the group of the antimicrobials must be included as a first column
#' @param remove_intrinsic_resistant [logical] to indicate that rows and columns with 100% resistance for all tested antimicrobials must be removed from the table
2022-08-28 10:31:50 +02:00
#' @param FUN the function to call on the `mo` column to transform the microorganism codes, defaults to [mo_shortname()]
2021-05-12 18:15:03 +02:00
#' @param translate_ab a [character] of length 1 containing column names of the [antibiotics] data set
#' @param ... arguments passed on to `FUN`
2023-01-21 23:47:20 +01:00
#' @inheritParams sir_df
2019-09-23 13:53:50 +02:00
#' @inheritParams base::formatC
#' @details The function [format()] calculates the resistance per bug-drug combination. Use `combine_SI = TRUE` (default) to test R vs. S+I and `combine_SI = FALSE` to test R+I vs. S.
2019-08-25 22:53:22 +02:00
#' @export
2019-08-27 22:41:09 +02:00
#' @rdname bug_drug_combinations
#' @return The function [bug_drug_combinations()] returns a [data.frame] with columns "mo", "ab", "S", "I", "R" and "total".
2022-08-28 10:31:50 +02:00
#' @examples
2019-08-25 22:53:22 +02:00
#' \donttest{
#' #' # example_isolates is a data set available in the AMR package.
#' # run ?example_isolates for more info.
#' example_isolates
#'
#' x <- bug_drug_combinations(example_isolates)
2022-08-21 16:37:20 +02:00
#' head(x)
2019-10-11 17:21:02 +02:00
#' format(x, translate_ab = "name (atc)")
2022-08-28 10:31:50 +02:00
#'
2019-09-23 13:53:50 +02:00
#' # Use FUN to change to transformation of microorganism codes
#' bug_drug_combinations(example_isolates,
2022-08-28 10:31:50 +02:00
#' FUN = mo_gramstain
#' )
#'
#' bug_drug_combinations(example_isolates,
#' FUN = function(x) {
#' ifelse(x == as.mo("Escherichia coli"),
#' "E. coli",
#' "Others"
#' )
#' }
#' )
2019-08-25 22:53:22 +02:00
#' }
2022-08-28 10:31:50 +02:00
bug_drug_combinations <- function(x,
col_mo = NULL,
2019-09-23 13:53:50 +02:00
FUN = mo_shortname,
...) {
2023-01-21 23:47:20 +01:00
meet_criteria(x, allow_class = "data.frame", contains_column_class = "sir")
meet_criteria(col_mo, allow_class = "character", is_in = colnames(x), has_length = 1, allow_NULL = TRUE)
meet_criteria(FUN, allow_class = "function", has_length = 1)
2019-08-25 22:53:22 +02:00
# try to find columns based on type
# -- mo
if (is.null(col_mo)) {
col_mo <- search_type_in_df(x = x, type = "mo")
2021-01-15 22:44:52 +01:00
stop_if(is.null(col_mo), "`col_mo` must be set")
} else {
stop_ifnot(col_mo %in% colnames(x), "column '", col_mo, "' (`col_mo`) not found")
2019-08-25 22:53:22 +02:00
}
# use dplyr and tidyr if they are available, they are much faster!
if (pkg_is_available("dplyr", min_version = "1.0.0", also_load = FALSE) &&
pkg_is_available("tidyr", min_version = "1.0.0", also_load = FALSE)) {
across <- import_fn("across", "dplyr")
pivot_longer <- import_fn("pivot_longer", "tidyr")
out <- x %>%
ungroup() %>%
mutate(mo = FUN(ungroup(x)[, col_mo, drop = TRUE], ...)) %>%
pivot_longer(where(is.sir), names_to = "ab") %>%
group_by(across(c(group_vars(x), mo, ab))) %>%
summarise(S = sum(value == "S", na.rm = TRUE),
I = sum(value == "I", na.rm = TRUE),
R = sum(value == "R", na.rm = TRUE),
.groups = "drop") %>%
mutate(total = S + I + R)
out <- out %>% arrange(mo, ab)
return(structure(out,
class = c("bug_drug_combinations",
ifelse(is_null_or_grouped_tbl(x), "grouped", character(0)),
class(out))))
}
# no dplyr or tidyr available, so use base R
2021-06-01 15:33:06 +02:00
x.bak <- x
2020-05-18 13:59:34 +02:00
x <- as.data.frame(x, stringsAsFactors = FALSE)
x[, col_mo] <- FUN(x[, col_mo, drop = TRUE], ...)
2022-08-28 10:31:50 +02:00
2020-05-18 13:59:34 +02:00
unique_mo <- sort(unique(x[, col_mo, drop = TRUE]))
2022-08-28 10:31:50 +02:00
2021-06-01 15:33:06 +02:00
# select only groups and antibiotics
if (is_null_or_grouped_tbl(x.bak)) {
2021-06-01 15:33:06 +02:00
data_has_groups <- TRUE
groups <- setdiff(names(attributes(x.bak)$groups), ".rows")
2023-01-21 23:47:20 +01:00
x <- x[, c(groups, col_mo, colnames(x)[vapply(FUN.VALUE = logical(1), x, is.sir)]), drop = FALSE]
2021-06-01 15:33:06 +02:00
} else {
data_has_groups <- FALSE
2023-01-21 23:47:20 +01:00
x <- x[, c(col_mo, names(which(vapply(FUN.VALUE = logical(1), x, is.sir)))), drop = FALSE]
2021-06-01 15:33:06 +02:00
}
2022-08-28 10:31:50 +02:00
2021-06-01 15:33:06 +02:00
run_it <- function(x) {
2022-08-28 10:31:50 +02:00
out <- data.frame(
mo = character(0),
ab = character(0),
S = integer(0),
I = integer(0),
R = integer(0),
total = integer(0),
stringsAsFactors = FALSE
)
2021-06-01 15:33:06 +02:00
if (data_has_groups) {
group_values <- unique(x[, which(colnames(x) %in% groups), drop = FALSE])
rownames(group_values) <- NULL
x <- x[, which(!colnames(x) %in% groups), drop = FALSE]
}
2022-08-28 10:31:50 +02:00
2021-06-01 15:33:06 +02:00
for (i in seq_len(length(unique_mo))) {
2023-01-21 23:47:20 +01:00
# filter on MO group and only select SIR columns
x_mo_filter <- x[which(x[, col_mo, drop = TRUE] == unique_mo[i]), names(which(vapply(FUN.VALUE = logical(1), x, is.sir))), drop = FALSE]
2021-06-01 15:33:06 +02:00
# turn and merge everything
pivot <- lapply(x_mo_filter, function(x) {
m <- as.matrix(table(x))
data.frame(S = m["S", ], I = m["I", ], R = m["R", ], stringsAsFactors = FALSE)
})
merged <- do.call(rbind, pivot)
2022-08-28 10:31:50 +02:00
out_group <- data.frame(
mo = rep(unique_mo[i], NROW(merged)),
ab = rownames(merged),
S = merged$S,
I = merged$I,
R = merged$R,
total = merged$S + merged$I + merged$R,
stringsAsFactors = FALSE
)
2021-06-01 15:33:06 +02:00
if (data_has_groups) {
if (nrow(group_values) < nrow(out_group)) {
# repeat group_values for the number of rows in out_group
repeated <- rep(seq_len(nrow(group_values)),
2022-08-28 10:31:50 +02:00
each = nrow(out_group) / nrow(group_values)
)
2021-06-01 15:33:06 +02:00
group_values <- group_values[repeated, , drop = FALSE]
}
out_group <- cbind(group_values, out_group)
}
out <- rbind(out, out_group, stringsAsFactors = FALSE)
}
out
}
# based on pm_apply_grouped_function
apply_group <- function(.data, fn, groups, drop = FALSE, ...) {
grouped <- pm_split_into_groups(.data, groups, drop)
res <- do.call(rbind, unname(lapply(grouped, fn, ...)))
if (any(groups %in% colnames(res))) {
class(res) <- c("grouped_data", class(res))
res <- pm_set_groups(res, groups[groups %in% colnames(res)])
}
res
}
2022-08-28 10:31:50 +02:00
2021-06-01 15:33:06 +02:00
if (data_has_groups) {
out <- apply_group(x, "run_it", groups)
} else {
out <- run_it(x)
}
2022-08-27 20:49:37 +02:00
rownames(out) <- NULL
out <- out %>% arrange(mo, ab)
2023-01-05 14:43:18 +01:00
out <- as_original_data_class(out, class(x.bak)) # will remove tibble groups
2022-08-27 20:49:37 +02:00
structure(out, class = c("bug_drug_combinations", ifelse(data_has_groups, "grouped", character(0)), class(out)))
2019-08-25 22:53:22 +02:00
}
2020-05-28 16:48:55 +02:00
#' @method format bug_drug_combinations
2019-08-25 22:53:22 +02:00
#' @export
2019-08-27 22:41:09 +02:00
#' @rdname bug_drug_combinations
2019-09-25 15:43:22 +02:00
format.bug_drug_combinations <- function(x,
translate_ab = "name (ab, atc)",
language = get_AMR_locale(),
2019-09-25 15:43:22 +02:00
minimum = 30,
combine_SI = TRUE,
2019-09-23 13:53:50 +02:00
add_ab_group = TRUE,
2019-09-25 15:43:22 +02:00
remove_intrinsic_resistant = FALSE,
2019-09-23 13:53:50 +02:00
decimal.mark = getOption("OutDec"),
big.mark = ifelse(decimal.mark == ",", " ", ","),
2019-09-23 14:37:24 +02:00
...) {
meet_criteria(x, allow_class = "data.frame")
meet_criteria(translate_ab, allow_class = c("character", "logical"), has_length = 1, allow_NA = TRUE)
2022-10-05 09:12:22 +02:00
language <- validate_language(language)
meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE)
meet_criteria(combine_SI, allow_class = "logical", has_length = 1)
meet_criteria(add_ab_group, allow_class = "logical", has_length = 1)
meet_criteria(remove_intrinsic_resistant, allow_class = "logical", has_length = 1)
meet_criteria(decimal.mark, allow_class = "character", has_length = 1)
meet_criteria(big.mark, allow_class = "character", has_length = 1)
2022-08-28 10:31:50 +02:00
2022-08-27 20:49:37 +02:00
x.bak <- x
2021-06-01 15:33:06 +02:00
if (inherits(x, "grouped")) {
# bug_drug_combinations() has been run on groups, so de-group here
warning_("in `format()`: formatting the output of `bug_drug_combinations()` does not support grouped variables, they were ignored")
2022-08-27 20:49:37 +02:00
x <- as.data.frame(x, stringsAsFactors = FALSE)
2021-06-01 15:33:06 +02:00
idx <- split(seq_len(nrow(x)), paste0(x$mo, "%%", x$ab))
2022-08-28 10:31:50 +02:00
x <- data.frame(
mo = gsub("(.*)%%(.*)", "\\1", names(idx)),
ab = gsub("(.*)%%(.*)", "\\2", names(idx)),
2022-10-05 09:12:22 +02:00
S = vapply(FUN.VALUE = double(1), idx, function(i) sum(x$S[i], na.rm = TRUE)),
I = vapply(FUN.VALUE = double(1), idx, function(i) sum(x$I[i], na.rm = TRUE)),
R = vapply(FUN.VALUE = double(1), idx, function(i) sum(x$R[i], na.rm = TRUE)),
total = vapply(FUN.VALUE = double(1), idx, function(i) {
2022-08-28 10:31:50 +02:00
sum(x$S[i], na.rm = TRUE) +
sum(x$I[i], na.rm = TRUE) +
sum(x$R[i], na.rm = TRUE)
}),
stringsAsFactors = FALSE
)
2021-06-01 15:33:06 +02:00
}
2022-08-28 10:31:50 +02:00
2020-06-17 15:14:37 +02:00
x <- as.data.frame(x, stringsAsFactors = FALSE)
2020-05-18 13:59:34 +02:00
x <- subset(x, total >= minimum)
2022-08-28 10:31:50 +02:00
2019-09-25 15:43:22 +02:00
if (remove_intrinsic_resistant == TRUE) {
2020-05-18 13:59:34 +02:00
x <- subset(x, R != total)
2019-09-25 15:43:22 +02:00
}
if (combine_SI == TRUE) {
2019-08-25 22:53:22 +02:00
x$isolates <- x$R
} else {
x$isolates <- x$R + x$I
}
2022-08-28 10:31:50 +02:00
2019-09-25 15:43:22 +02:00
give_ab_name <- function(ab, format, language) {
format <- tolower(format)
ab_txt <- rep(format, length(ab))
2019-10-11 17:21:02 +02:00
for (i in seq_len(length(ab_txt))) {
2022-10-05 09:12:22 +02:00
ab_txt[i] <- gsub("ab", as.character(as.ab(ab[i])), ab_txt[i], fixed = TRUE)
ab_txt[i] <- gsub("cid", ab_cid(ab[i]), ab_txt[i], fixed = TRUE)
ab_txt[i] <- gsub("group", ab_group(ab[i], language = language), ab_txt[i], fixed = TRUE)
ab_txt[i] <- gsub("atc_group1", ab_atc_group1(ab[i], language = language), ab_txt[i], fixed = TRUE)
ab_txt[i] <- gsub("atc_group2", ab_atc_group2(ab[i], language = language), ab_txt[i], fixed = TRUE)
ab_txt[i] <- gsub("atc", ab_atc(ab[i], only_first = TRUE), ab_txt[i], fixed = TRUE)
ab_txt[i] <- gsub("name", ab_name(ab[i], language = language), ab_txt[i], fixed = TRUE)
2019-09-25 15:43:22 +02:00
ab_txt[i]
}
ab_txt
}
2022-08-28 10:31:50 +02:00
2020-05-16 13:05:47 +02:00
remove_NAs <- function(.data) {
2020-05-18 13:59:34 +02:00
cols <- colnames(.data)
.data <- as.data.frame(lapply(.data, function(x) ifelse(is.na(x), "", x)),
2022-08-28 10:31:50 +02:00
stringsAsFactors = FALSE
)
2020-05-18 13:59:34 +02:00
colnames(.data) <- cols
.data
2020-05-16 13:05:47 +02:00
}
2022-08-28 10:31:50 +02:00
y <- x %>%
mutate(
2022-08-28 10:31:50 +02:00
ab = as.ab(x$ab),
ab_txt = give_ab_name(ab = x$ab, format = translate_ab, language = language)
) %>%
group_by(ab, ab_txt, mo) %>%
summarise(
2022-08-28 10:31:50 +02:00
isolates = sum(isolates, na.rm = TRUE),
total = sum(total, na.rm = TRUE)
) %>%
ungroup()
2022-08-28 10:31:50 +02:00
y <- y %>%
mutate(txt = paste0(
2022-08-28 10:31:50 +02:00
percentage(y$isolates / y$total, decimal.mark = decimal.mark, big.mark = big.mark),
" (", trimws(format(y$isolates, big.mark = big.mark)), "/",
trimws(format(y$total, big.mark = big.mark)), ")"
)) %>%
select(ab, ab_txt, mo, txt) %>%
arrange(mo)
2022-08-28 10:31:50 +02:00
2020-05-18 13:59:34 +02:00
# replace tidyr::pivot_wider() from here
for (i in unique(y$mo)) {
2022-08-27 20:49:37 +02:00
mo_group <- y[which(y$mo == i), c("ab", "txt"), drop = FALSE]
2020-05-18 13:59:34 +02:00
colnames(mo_group) <- c("ab", i)
rownames(mo_group) <- NULL
y <- y %>%
left_join(mo_group, by = "ab")
2020-05-18 13:59:34 +02:00
}
y <- y %>%
distinct(ab, .keep_all = TRUE) %>%
select(-mo, -txt) %>%
2020-05-18 13:59:34 +02:00
# replace tidyr::pivot_wider() until here
2020-05-16 13:05:47 +02:00
remove_NAs()
2020-05-16 13:05:47 +02:00
select_ab_vars <- function(.data) {
2022-08-27 20:49:37 +02:00
.data[, c("ab_group", "ab_txt", colnames(.data)[!colnames(.data) %in% c("ab_group", "ab_txt", "ab")]), drop = FALSE]
2020-05-16 13:05:47 +02:00
}
2022-08-28 10:31:50 +02:00
y <- y %>%
mutate(ab_group = ab_group(y$ab, language = language)) %>%
select_ab_vars() %>%
arrange(ab_group, ab_txt)
y <- y %>%
mutate(ab_group = ifelse(y$ab_group != lag(y$ab_group) | is.na(lag(y$ab_group)), y$ab_group, ""))
2022-08-28 10:31:50 +02:00
2019-08-25 22:53:22 +02:00
if (add_ab_group == FALSE) {
y <- y %>%
select(-ab_group) %>%
rename("Drug" = ab_txt)
2022-08-19 12:33:14 +02:00
colnames(y)[1] <- translate_into_language(colnames(y)[1], language, only_unknown = FALSE)
2019-09-23 13:53:50 +02:00
} else {
y <- y %>%
rename(
2022-08-28 10:31:50 +02:00
"Group" = ab_group,
"Drug" = ab_txt
)
}
2022-08-28 10:31:50 +02:00
if (!is.null(language)) {
2022-08-19 12:33:14 +02:00
colnames(y) <- translate_into_language(colnames(y), language, only_unknown = FALSE)
}
2022-08-28 10:31:50 +02:00
if (remove_intrinsic_resistant == TRUE) {
2022-10-05 09:12:22 +02:00
y <- y[, !vapply(FUN.VALUE = logical(1), y, function(col) all(col %like% "100", na.rm = TRUE) & !anyNA(col)), drop = FALSE]
2019-08-25 22:53:22 +02:00
}
2022-08-28 10:31:50 +02:00
2020-05-18 13:59:34 +02:00
rownames(y) <- NULL
2023-01-05 14:43:18 +01:00
as_original_data_class(y, class(x.bak)) # will remove tibble groups
2019-08-25 22:53:22 +02:00
}
2020-05-28 16:48:55 +02:00
#' @method print bug_drug_combinations
2019-08-25 22:53:22 +02:00
#' @export
2019-08-27 22:41:09 +02:00
print.bug_drug_combinations <- function(x, ...) {
2020-06-17 15:14:37 +02:00
x_class <- class(x)
2022-08-28 10:31:50 +02:00
print(
set_clean_class(x,
new_class = x_class[!x_class %in% c("bug_drug_combinations", "grouped")]
),
...
)
2021-06-01 15:33:06 +02:00
message_("Use 'format()' on this result to get a publishable/printable format.",
2022-08-28 10:31:50 +02:00
ifelse(inherits(x, "grouped"), " Note: The grouping variable(s) will be ignored.", ""),
as_note = FALSE
)
2019-08-25 22:53:22 +02:00
}