2019-08-25 22:53:22 +02:00
# ==================================================================== #
# TITLE #
2022-10-05 09:12:22 +02:00
# AMR: An R Package for Working with Antimicrobial Resistance Data #
2019-08-25 22:53:22 +02:00
# #
# SOURCE #
2020-07-08 14:48:06 +02:00
# https://github.com/msberends/AMR #
2019-08-25 22:53:22 +02:00
# #
2022-10-05 09:12:22 +02:00
# CITE AS #
# Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C #
# (2022). AMR: An R Package for Working with Antimicrobial Resistance #
# Data. Journal of Statistical Software, 104(3), 1-31. #
# doi:10.18637/jss.v104.i03 #
# #
2022-12-27 15:16:15 +01:00
# Developed at the University of Groningen and the University Medical #
# Center Groningen in The Netherlands, in collaboration with many #
# colleagues from around the world, see our website. #
2019-08-25 22:53:22 +02:00
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
2020-01-05 17:22:09 +01:00
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
2020-10-08 11:16:03 +02:00
# #
# Visit our website for the full manual and a complete tutorial about #
2021-02-02 23:57:35 +01:00
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
2019-08-25 22:53:22 +02:00
# ==================================================================== #
2021-01-18 16:57:56 +01:00
#' Determine Bug-Drug Combinations
2022-08-28 10:31:50 +02:00
#'
2021-07-04 15:26:50 +02:00
#' Determine antimicrobial resistance (AMR) of all bug-drug combinations in your data set where at least 30 (default) isolates are available per species. Use [format()] on the result to prettify it to a publishable/printable format, see *Examples*.
2019-08-25 22:53:22 +02:00
#' @inheritParams eucast_rules
2022-10-20 16:08:01 +02:00
#' @param combine_SI a [logical] to indicate whether values S and I should be summed, so resistance will be based on only R, defaults to `TRUE`
2021-05-12 18:15:03 +02:00
#' @param add_ab_group a [logical] to indicate where the group of the antimicrobials must be included as a first column
#' @param remove_intrinsic_resistant [logical] to indicate that rows and columns with 100% resistance for all tested antimicrobials must be removed from the table
2022-08-28 10:31:50 +02:00
#' @param FUN the function to call on the `mo` column to transform the microorganism codes, defaults to [mo_shortname()]
2021-05-12 18:15:03 +02:00
#' @param translate_ab a [character] of length 1 containing column names of the [antibiotics] data set
2019-11-28 22:32:17 +01:00
#' @param ... arguments passed on to `FUN`
2023-01-21 23:47:20 +01:00
#' @inheritParams sir_df
2019-09-23 13:53:50 +02:00
#' @inheritParams base::formatC
2022-10-20 16:08:01 +02:00
#' @details The function [format()] calculates the resistance per bug-drug combination. Use `combine_SI = TRUE` (default) to test R vs. S+I and `combine_SI = FALSE` to test R+I vs. S.
2019-08-25 22:53:22 +02:00
#' @export
2019-08-27 22:41:09 +02:00
#' @rdname bug_drug_combinations
2020-09-18 16:05:53 +02:00
#' @return The function [bug_drug_combinations()] returns a [data.frame] with columns "mo", "ab", "S", "I", "R" and "total".
2022-08-28 10:31:50 +02:00
#' @examples
2019-08-25 22:53:22 +02:00
#' \donttest{
2023-02-06 11:57:22 +01:00
#' #' # example_isolates is a data set available in the AMR package.
#' # run ?example_isolates for more info.
#' example_isolates
#'
2019-08-27 16:45:42 +02:00
#' x <- bug_drug_combinations(example_isolates)
2019-10-11 17:21:02 +02:00
#' format(x, translate_ab = "name (atc)")
2022-08-28 10:31:50 +02:00
#'
2019-09-23 13:53:50 +02:00
#' # Use FUN to change to transformation of microorganism codes
2020-09-29 23:35:46 +02:00
#' bug_drug_combinations(example_isolates,
2022-08-28 10:31:50 +02:00
#' FUN = mo_gramstain
#' )
#'
#' bug_drug_combinations(example_isolates,
#' FUN = function(x) {
#' ifelse(x == as.mo("Escherichia coli"),
#' "E. coli",
#' "Others"
#' )
#' }
#' )
2019-08-25 22:53:22 +02:00
#' }
2022-08-28 10:31:50 +02:00
bug_drug_combinations <- function ( x ,
col_mo = NULL ,
2019-09-23 13:53:50 +02:00
FUN = mo_shortname ,
... ) {
2023-01-21 23:47:20 +01:00
meet_criteria ( x , allow_class = " data.frame" , contains_column_class = " sir" )
2020-10-19 17:09:19 +02:00
meet_criteria ( col_mo , allow_class = " character" , is_in = colnames ( x ) , has_length = 1 , allow_NULL = TRUE )
meet_criteria ( FUN , allow_class = " function" , has_length = 1 )
2019-08-25 22:53:22 +02:00
# try to find columns based on type
# -- mo
if ( is.null ( col_mo ) ) {
col_mo <- search_type_in_df ( x = x , type = " mo" )
2021-01-15 22:44:52 +01:00
stop_if ( is.null ( col_mo ) , " `col_mo` must be set" )
} else {
stop_ifnot ( col_mo %in% colnames ( x ) , " column '" , col_mo , " ' (`col_mo`) not found" )
2019-08-25 22:53:22 +02:00
}
2023-02-06 11:57:22 +01:00
# use dplyr and tidyr if they are available, they are much faster!
2023-02-08 13:48:06 +01:00
if ( identical ( pivot_longer , import_fn ( " pivot_longer" , " tidyr" , error_on_fail = FALSE ) ) ) {
2023-02-06 11:57:22 +01:00
out <- x %>%
ungroup ( ) %>%
mutate ( mo = FUN ( ungroup ( x ) [ , col_mo , drop = TRUE ] , ... ) ) %>%
pivot_longer ( where ( is.sir ) , names_to = " ab" ) %>%
group_by ( across ( c ( group_vars ( x ) , mo , ab ) ) ) %>%
summarise ( S = sum ( value == " S" , na.rm = TRUE ) ,
I = sum ( value == " I" , na.rm = TRUE ) ,
R = sum ( value == " R" , na.rm = TRUE ) ,
.groups = " drop" ) %>%
mutate ( total = S + I + R )
out <- out %>% arrange ( mo , ab )
return ( structure ( out ,
class = c ( " bug_drug_combinations" ,
ifelse ( is_null_or_grouped_tbl ( x ) , " grouped" , character ( 0 ) ) ,
class ( out ) ) ) )
}
# no dplyr or tidyr available, so use base R
2021-06-01 15:33:06 +02:00
x.bak <- x
2020-05-18 13:59:34 +02:00
x <- as.data.frame ( x , stringsAsFactors = FALSE )
2020-09-03 12:31:48 +02:00
x [ , col_mo ] <- FUN ( x [ , col_mo , drop = TRUE ] , ... )
2022-08-28 10:31:50 +02:00
2020-05-18 13:59:34 +02:00
unique_mo <- sort ( unique ( x [ , col_mo , drop = TRUE ] ) )
2022-08-28 10:31:50 +02:00
2021-06-01 15:33:06 +02:00
# select only groups and antibiotics
2021-07-08 22:23:28 +02:00
if ( is_null_or_grouped_tbl ( x.bak ) ) {
2021-06-01 15:33:06 +02:00
data_has_groups <- TRUE
groups <- setdiff ( names ( attributes ( x.bak ) $ groups ) , " .rows" )
2023-01-21 23:47:20 +01:00
x <- x [ , c ( groups , col_mo , colnames ( x ) [vapply ( FUN.VALUE = logical ( 1 ) , x , is.sir ) ] ) , drop = FALSE ]
2021-06-01 15:33:06 +02:00
} else {
data_has_groups <- FALSE
2023-01-21 23:47:20 +01:00
x <- x [ , c ( col_mo , names ( which ( vapply ( FUN.VALUE = logical ( 1 ) , x , is.sir ) ) ) ) , drop = FALSE ]
2021-06-01 15:33:06 +02:00
}
2022-08-28 10:31:50 +02:00
2021-06-01 15:33:06 +02:00
run_it <- function ( x ) {
2022-08-28 10:31:50 +02:00
out <- data.frame (
mo = character ( 0 ) ,
ab = character ( 0 ) ,
S = integer ( 0 ) ,
I = integer ( 0 ) ,
R = integer ( 0 ) ,
total = integer ( 0 ) ,
stringsAsFactors = FALSE
)
2021-06-01 15:33:06 +02:00
if ( data_has_groups ) {
group_values <- unique ( x [ , which ( colnames ( x ) %in% groups ) , drop = FALSE ] )
rownames ( group_values ) <- NULL
x <- x [ , which ( ! colnames ( x ) %in% groups ) , drop = FALSE ]
}
2022-08-28 10:31:50 +02:00
2021-06-01 15:33:06 +02:00
for ( i in seq_len ( length ( unique_mo ) ) ) {
2023-01-21 23:47:20 +01:00
# filter on MO group and only select SIR columns
x_mo_filter <- x [which ( x [ , col_mo , drop = TRUE ] == unique_mo [i ] ) , names ( which ( vapply ( FUN.VALUE = logical ( 1 ) , x , is.sir ) ) ) , drop = FALSE ]
2021-06-01 15:33:06 +02:00
# turn and merge everything
pivot <- lapply ( x_mo_filter , function ( x ) {
m <- as.matrix ( table ( x ) )
data.frame ( S = m [ " S" , ] , I = m [ " I" , ] , R = m [ " R" , ] , stringsAsFactors = FALSE )
} )
merged <- do.call ( rbind , pivot )
2022-08-28 10:31:50 +02:00
out_group <- data.frame (
mo = rep ( unique_mo [i ] , NROW ( merged ) ) ,
ab = rownames ( merged ) ,
S = merged $ S ,
I = merged $ I ,
R = merged $ R ,
total = merged $ S + merged $ I + merged $ R ,
stringsAsFactors = FALSE
)
2021-06-01 15:33:06 +02:00
if ( data_has_groups ) {
if ( nrow ( group_values ) < nrow ( out_group ) ) {
# repeat group_values for the number of rows in out_group
repeated <- rep ( seq_len ( nrow ( group_values ) ) ,
2022-08-28 10:31:50 +02:00
each = nrow ( out_group ) / nrow ( group_values )
)
2021-06-01 15:33:06 +02:00
group_values <- group_values [repeated , , drop = FALSE ]
}
out_group <- cbind ( group_values , out_group )
}
out <- rbind ( out , out_group , stringsAsFactors = FALSE )
}
out
}
# based on pm_apply_grouped_function
apply_group <- function ( .data , fn , groups , drop = FALSE , ... ) {
grouped <- pm_split_into_groups ( .data , groups , drop )
res <- do.call ( rbind , unname ( lapply ( grouped , fn , ... ) ) )
if ( any ( groups %in% colnames ( res ) ) ) {
class ( res ) <- c ( " grouped_data" , class ( res ) )
2023-02-08 16:51:41 +01:00
res <- pm_groups_set ( res , groups [groups %in% colnames ( res ) ] )
2021-06-01 15:33:06 +02:00
}
res
}
2022-08-28 10:31:50 +02:00
2021-06-01 15:33:06 +02:00
if ( data_has_groups ) {
out <- apply_group ( x , " run_it" , groups )
} else {
out <- run_it ( x )
}
2022-08-27 20:49:37 +02:00
rownames ( out ) <- NULL
2023-02-06 11:57:22 +01:00
out <- out %>% arrange ( mo , ab )
2023-01-05 14:43:18 +01:00
out <- as_original_data_class ( out , class ( x.bak ) ) # will remove tibble groups
2022-08-27 20:49:37 +02:00
structure ( out , class = c ( " bug_drug_combinations" , ifelse ( data_has_groups , " grouped" , character ( 0 ) ) , class ( out ) ) )
2019-08-25 22:53:22 +02:00
}
2020-05-28 16:48:55 +02:00
#' @method format bug_drug_combinations
2019-08-25 22:53:22 +02:00
#' @export
2019-08-27 22:41:09 +02:00
#' @rdname bug_drug_combinations
2019-09-25 15:43:22 +02:00
format.bug_drug_combinations <- function ( x ,
translate_ab = " name (ab, atc)" ,
2021-12-12 09:42:03 +01:00
language = get_AMR_locale ( ) ,
2019-09-25 15:43:22 +02:00
minimum = 30 ,
combine_SI = TRUE ,
2019-09-23 13:53:50 +02:00
add_ab_group = TRUE ,
2019-09-25 15:43:22 +02:00
remove_intrinsic_resistant = FALSE ,
2019-09-23 13:53:50 +02:00
decimal.mark = getOption ( " OutDec" ) ,
2023-02-06 11:57:22 +01:00
big.mark = ifelse ( decimal.mark == " ," , " " , " ," ) ,
2019-09-23 14:37:24 +02:00
... ) {
2020-10-19 17:09:19 +02:00
meet_criteria ( x , allow_class = " data.frame" )
meet_criteria ( translate_ab , allow_class = c ( " character" , " logical" ) , has_length = 1 , allow_NA = TRUE )
2022-10-05 09:12:22 +02:00
language <- validate_language ( language )
2023-02-06 11:57:22 +01:00
meet_criteria ( minimum , allow_class = c ( " numeric" , " integer" ) , has_length = 1 , is_positive_or_zero = TRUE , is_finite = TRUE )
2020-10-19 17:09:19 +02:00
meet_criteria ( combine_SI , allow_class = " logical" , has_length = 1 )
meet_criteria ( add_ab_group , allow_class = " logical" , has_length = 1 )
meet_criteria ( remove_intrinsic_resistant , allow_class = " logical" , has_length = 1 )
meet_criteria ( decimal.mark , allow_class = " character" , has_length = 1 )
meet_criteria ( big.mark , allow_class = " character" , has_length = 1 )
2022-08-28 10:31:50 +02:00
2022-08-27 20:49:37 +02:00
x.bak <- x
2021-06-01 15:33:06 +02:00
if ( inherits ( x , " grouped" ) ) {
# bug_drug_combinations() has been run on groups, so de-group here
2022-03-02 15:38:55 +01:00
warning_ ( " in `format()`: formatting the output of `bug_drug_combinations()` does not support grouped variables, they were ignored" )
2022-08-27 20:49:37 +02:00
x <- as.data.frame ( x , stringsAsFactors = FALSE )
2021-06-01 15:33:06 +02:00
idx <- split ( seq_len ( nrow ( x ) ) , paste0 ( x $ mo , " %%" , x $ ab ) )
2022-08-28 10:31:50 +02:00
x <- data.frame (
mo = gsub ( " (.*)%%(.*)" , " \\1" , names ( idx ) ) ,
ab = gsub ( " (.*)%%(.*)" , " \\2" , names ( idx ) ) ,
2022-10-05 09:12:22 +02:00
S = vapply ( FUN.VALUE = double ( 1 ) , idx , function ( i ) sum ( x $ S [i ] , na.rm = TRUE ) ) ,
I = vapply ( FUN.VALUE = double ( 1 ) , idx , function ( i ) sum ( x $ I [i ] , na.rm = TRUE ) ) ,
R = vapply ( FUN.VALUE = double ( 1 ) , idx , function ( i ) sum ( x $ R [i ] , na.rm = TRUE ) ) ,
total = vapply ( FUN.VALUE = double ( 1 ) , idx , function ( i ) {
2022-08-28 10:31:50 +02:00
sum ( x $ S [i ] , na.rm = TRUE ) +
sum ( x $ I [i ] , na.rm = TRUE ) +
sum ( x $ R [i ] , na.rm = TRUE )
} ) ,
stringsAsFactors = FALSE
)
2021-06-01 15:33:06 +02:00
}
2022-08-28 10:31:50 +02:00
2020-06-17 15:14:37 +02:00
x <- as.data.frame ( x , stringsAsFactors = FALSE )
2020-05-18 13:59:34 +02:00
x <- subset ( x , total >= minimum )
2022-08-28 10:31:50 +02:00
2019-09-25 15:43:22 +02:00
if ( remove_intrinsic_resistant == TRUE ) {
2020-05-18 13:59:34 +02:00
x <- subset ( x , R != total )
2019-09-25 15:43:22 +02:00
}
2022-10-20 16:08:01 +02:00
if ( combine_SI == TRUE ) {
2019-08-25 22:53:22 +02:00
x $ isolates <- x $ R
} else {
x $ isolates <- x $ R + x $ I
}
2022-08-28 10:31:50 +02:00
2019-09-25 15:43:22 +02:00
give_ab_name <- function ( ab , format , language ) {
format <- tolower ( format )
ab_txt <- rep ( format , length ( ab ) )
2019-10-11 17:21:02 +02:00
for ( i in seq_len ( length ( ab_txt ) ) ) {
2022-10-05 09:12:22 +02:00
ab_txt [i ] <- gsub ( " ab" , as.character ( as.ab ( ab [i ] ) ) , ab_txt [i ] , fixed = TRUE )
ab_txt [i ] <- gsub ( " cid" , ab_cid ( ab [i ] ) , ab_txt [i ] , fixed = TRUE )
ab_txt [i ] <- gsub ( " group" , ab_group ( ab [i ] , language = language ) , ab_txt [i ] , fixed = TRUE )
ab_txt [i ] <- gsub ( " atc_group1" , ab_atc_group1 ( ab [i ] , language = language ) , ab_txt [i ] , fixed = TRUE )
ab_txt [i ] <- gsub ( " atc_group2" , ab_atc_group2 ( ab [i ] , language = language ) , ab_txt [i ] , fixed = TRUE )
ab_txt [i ] <- gsub ( " atc" , ab_atc ( ab [i ] , only_first = TRUE ) , ab_txt [i ] , fixed = TRUE )
ab_txt [i ] <- gsub ( " name" , ab_name ( ab [i ] , language = language ) , ab_txt [i ] , fixed = TRUE )
2019-09-25 15:43:22 +02:00
ab_txt [i ]
}
ab_txt
}
2022-08-28 10:31:50 +02:00
2020-05-16 13:05:47 +02:00
remove_NAs <- function ( .data ) {
2020-05-18 13:59:34 +02:00
cols <- colnames ( .data )
2020-12-28 22:24:33 +01:00
.data <- as.data.frame ( lapply ( .data , function ( x ) ifelse ( is.na ( x ) , " " , x ) ) ,
2022-08-28 10:31:50 +02:00
stringsAsFactors = FALSE
)
2020-05-18 13:59:34 +02:00
colnames ( .data ) <- cols
.data
2020-05-16 13:05:47 +02:00
}
2022-08-28 10:31:50 +02:00
2023-02-06 11:57:22 +01:00
y <- x %>%
mutate (
2022-08-28 10:31:50 +02:00
ab = as.ab ( x $ ab ) ,
ab_txt = give_ab_name ( ab = x $ ab , format = translate_ab , language = language )
2023-02-06 11:57:22 +01:00
) %>%
group_by ( ab , ab_txt , mo ) %>%
summarise (
2022-08-28 10:31:50 +02:00
isolates = sum ( isolates , na.rm = TRUE ) ,
total = sum ( total , na.rm = TRUE )
2023-02-06 11:57:22 +01:00
) %>%
ungroup ( )
2022-08-28 10:31:50 +02:00
2023-02-06 11:57:22 +01:00
y <- y %>%
mutate ( txt = paste0 (
2022-08-28 10:31:50 +02:00
percentage ( y $ isolates / y $ total , decimal.mark = decimal.mark , big.mark = big.mark ) ,
" (" , trimws ( format ( y $ isolates , big.mark = big.mark ) ) , " /" ,
trimws ( format ( y $ total , big.mark = big.mark ) ) , " )"
2023-02-06 11:57:22 +01:00
) ) %>%
select ( ab , ab_txt , mo , txt ) %>%
arrange ( mo )
2022-08-28 10:31:50 +02:00
2020-05-18 13:59:34 +02:00
# replace tidyr::pivot_wider() from here
for ( i in unique ( y $ mo ) ) {
2022-08-27 20:49:37 +02:00
mo_group <- y [which ( y $ mo == i ) , c ( " ab" , " txt" ) , drop = FALSE ]
2020-05-18 13:59:34 +02:00
colnames ( mo_group ) <- c ( " ab" , i )
rownames ( mo_group ) <- NULL
2023-02-06 11:57:22 +01:00
y <- y %>%
left_join ( mo_group , by = " ab" )
2020-05-18 13:59:34 +02:00
}
2023-02-06 11:57:22 +01:00
y <- y %>%
distinct ( ab , .keep_all = TRUE ) %>%
select ( - mo , - txt ) %>%
2020-05-18 13:59:34 +02:00
# replace tidyr::pivot_wider() until here
2020-05-16 13:05:47 +02:00
remove_NAs ( )
2020-09-29 23:35:46 +02:00
2020-05-16 13:05:47 +02:00
select_ab_vars <- function ( .data ) {
2022-08-27 20:49:37 +02:00
.data [ , c ( " ab_group" , " ab_txt" , colnames ( .data ) [ ! colnames ( .data ) %in% c ( " ab_group" , " ab_txt" , " ab" ) ] ) , drop = FALSE ]
2020-05-16 13:05:47 +02:00
}
2022-08-28 10:31:50 +02:00
2023-02-06 11:57:22 +01:00
y <- y %>%
mutate ( ab_group = ab_group ( y $ ab , language = language ) ) %>%
select_ab_vars ( ) %>%
arrange ( ab_group , ab_txt )
y <- y %>%
mutate ( ab_group = ifelse ( y $ ab_group != lag ( y $ ab_group ) | is.na ( lag ( y $ ab_group ) ) , y $ ab_group , " " ) )
2022-08-28 10:31:50 +02:00
2019-08-25 22:53:22 +02:00
if ( add_ab_group == FALSE ) {
2023-02-06 11:57:22 +01:00
y <- y %>%
select ( - ab_group ) %>%
rename ( " Drug" = ab_txt )
2022-08-19 12:33:14 +02:00
colnames ( y ) [1 ] <- translate_into_language ( colnames ( y ) [1 ] , language , only_unknown = FALSE )
2019-09-23 13:53:50 +02:00
} else {
2023-02-06 11:57:22 +01:00
y <- y %>%
rename (
2022-08-28 10:31:50 +02:00
" Group" = ab_group ,
" Drug" = ab_txt
)
2020-09-29 23:35:46 +02:00
}
2022-08-28 10:31:50 +02:00
2020-09-29 23:35:46 +02:00
if ( ! is.null ( language ) ) {
2022-08-19 12:33:14 +02:00
colnames ( y ) <- translate_into_language ( colnames ( y ) , language , only_unknown = FALSE )
2020-09-29 23:35:46 +02:00
}
2022-08-28 10:31:50 +02:00
2020-09-29 23:35:46 +02:00
if ( remove_intrinsic_resistant == TRUE ) {
2022-10-05 09:12:22 +02:00
y <- y [ , ! vapply ( FUN.VALUE = logical ( 1 ) , y , function ( col ) all ( col %like% " 100" , na.rm = TRUE ) & ! anyNA ( col ) ) , drop = FALSE ]
2019-08-25 22:53:22 +02:00
}
2022-08-28 10:31:50 +02:00
2020-05-18 13:59:34 +02:00
rownames ( y ) <- NULL
2023-01-05 14:43:18 +01:00
as_original_data_class ( y , class ( x.bak ) ) # will remove tibble groups
2019-08-25 22:53:22 +02:00
}
2020-05-28 16:48:55 +02:00
#' @method print bug_drug_combinations
2019-08-25 22:53:22 +02:00
#' @export
2019-08-27 22:41:09 +02:00
print.bug_drug_combinations <- function ( x , ... ) {
2020-06-17 15:14:37 +02:00
x_class <- class ( x )
2022-08-28 10:31:50 +02:00
print (
set_clean_class ( x ,
new_class = x_class [ ! x_class %in% c ( " bug_drug_combinations" , " grouped" ) ]
) ,
...
)
2021-06-01 15:33:06 +02:00
message_ ( " Use 'format()' on this result to get a publishable/printable format." ,
2022-08-28 10:31:50 +02:00
ifelse ( inherits ( x , " grouped" ) , " Note: The grouping variable(s) will be ignored." , " " ) ,
as_note = FALSE
)
2019-08-25 22:53:22 +02:00
}