2018-08-23 00:40:36 +02:00
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
2019-01-02 23:24:07 +01:00
# SOURCE #
# https://gitlab.com/msberends/AMR #
2018-08-23 00:40:36 +02:00
# #
# LICENCE #
2020-01-05 17:22:09 +01:00
# (c) 2018-2020 Berends MS, Luz CF et al. #
2018-08-23 00:40:36 +02:00
# #
2019-01-02 23:24:07 +01:00
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# #
2020-01-05 17:22:09 +01:00
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
2019-04-05 18:47:39 +02:00
# Visit our website for more info: https://msberends.gitlab.io/AMR. #
2018-08-23 00:40:36 +02:00
# ==================================================================== #
2019-06-27 11:57:45 +02:00
#' @importFrom rlang enquos as_label
dots2vars <- function ( ... ) {
2019-08-20 11:40:54 +02:00
# this function is to give more informative output about
2019-11-10 12:16:56 +01:00
# variable names in count_* and proportion_* functions
2019-06-27 11:57:45 +02:00
paste (
unlist (
lapply ( enquos ( ... ) ,
function ( x ) {
l <- as_label ( x )
if ( l != " ." ) {
l
} else {
character ( 0 )
}
} )
) ,
collapse = " , " )
}
2018-10-19 13:53:31 +02:00
#' @importFrom dplyr %>% pull all_vars any_vars filter_all funs mutate_all
2019-11-03 22:24:42 +01:00
#' @importFrom cleaner percentage
2018-08-23 00:40:36 +02:00
rsi_calc <- function ( ... ,
2019-07-01 14:03:15 +02:00
ab_result ,
minimum = 0 ,
as_percent = FALSE ,
only_all_tested = FALSE ,
only_count = FALSE ) {
2019-11-10 12:16:56 +01:00
2019-06-27 11:57:45 +02:00
data_vars <- dots2vars ( ... )
2019-11-10 12:16:56 +01:00
2018-08-23 00:40:36 +02:00
if ( ! is.numeric ( minimum ) ) {
2019-10-11 17:21:02 +02:00
stop ( " `minimum` must be numeric" , call. = FALSE )
2018-08-23 00:40:36 +02:00
}
if ( ! is.logical ( as_percent ) ) {
2019-10-11 17:21:02 +02:00
stop ( " `as_percent` must be logical" , call. = FALSE )
2018-08-23 00:40:36 +02:00
}
2019-07-01 14:03:15 +02:00
if ( ! is.logical ( only_all_tested ) ) {
2019-10-11 17:21:02 +02:00
stop ( " `only_all_tested` must be logical" , call. = FALSE )
2018-10-19 13:53:31 +02:00
}
2019-11-10 12:16:56 +01:00
2018-08-24 11:08:20 +02:00
dots_df <- ...elt ( 1 ) # it needs this evaluation
dots <- base :: eval ( base :: substitute ( base :: alist ( ... ) ) )
2019-07-01 14:03:15 +02:00
if ( " also_single_tested" %in% names ( dots ) ) {
2019-11-10 12:16:56 +01:00
stop ( " `also_single_tested` was replaced by `only_all_tested`. Please read Details in the help page (`?proportion`) as this may have a considerable impact on your analysis." , call. = FALSE )
2019-07-01 14:03:15 +02:00
}
2018-08-24 11:08:20 +02:00
ndots <- length ( dots )
2019-11-10 12:16:56 +01:00
if ( " data.frame" %in% class ( dots_df ) ) {
# data.frame passed with other columns, like: example_isolates %>% proportion_S(amcl, gent)
dots <- as.character ( dots )
dots <- dots [dots != " ." ]
2018-08-24 11:08:20 +02:00
if ( length ( dots ) == 0 | all ( dots == " df" ) ) {
2019-11-10 12:16:56 +01:00
# for complete data.frames, like example_isolates %>% select(amcl, gent) %>% proportion_S()
2018-08-24 11:08:20 +02:00
# and the old rsi function, that has "df" as name of the first parameter
x <- dots_df
} else {
x <- dots_df [ , dots ]
2018-08-23 00:40:36 +02:00
}
2018-08-24 11:08:20 +02:00
} else if ( ndots == 1 ) {
2019-11-10 12:16:56 +01:00
# only 1 variable passed (can also be data.frame), like: proportion_S(example_isolates$amcl) and example_isolates$amcl %>% proportion_S()
2018-08-24 11:08:20 +02:00
x <- dots_df
2018-08-23 00:40:36 +02:00
} else {
2019-11-10 12:16:56 +01:00
# multiple variables passed without pipe, like: proportion_S(example_isolates$amcl, example_isolates$gent)
2018-08-24 11:08:20 +02:00
x <- NULL
try ( x <- as.data.frame ( dots ) , silent = TRUE )
if ( is.null ( x ) ) {
2019-11-10 12:16:56 +01:00
# support for: with(example_isolates, proportion_S(amcl, gent))
2018-08-24 11:08:20 +02:00
x <- as.data.frame ( rlang :: list2 ( ... ) )
}
2018-08-23 00:40:36 +02:00
}
2019-11-10 12:16:56 +01:00
2019-05-10 16:44:59 +02:00
if ( is.null ( x ) ) {
warning ( " argument is NULL (check if columns exist): returning NA" , call. = FALSE )
return ( NA )
}
2019-11-10 12:16:56 +01:00
2018-08-23 00:40:36 +02:00
print_warning <- FALSE
2019-11-10 12:16:56 +01:00
2019-07-01 14:03:15 +02:00
ab_result <- as.rsi ( ab_result )
2019-11-10 12:16:56 +01:00
2018-08-23 00:40:36 +02:00
if ( is.data.frame ( x ) ) {
2018-10-19 13:53:31 +02:00
rsi_integrity_check <- character ( 0 )
2019-10-11 17:21:02 +02:00
for ( i in seq_len ( ncol ( x ) ) ) {
2018-10-19 13:53:31 +02:00
# check integrity of columns: force rsi class
2018-08-23 00:40:36 +02:00
if ( ! is.rsi ( x %>% pull ( i ) ) ) {
2018-10-19 13:53:31 +02:00
rsi_integrity_check <- c ( rsi_integrity_check , x %>% pull ( i ) %>% as.character ( ) )
2019-07-01 14:03:15 +02:00
x [ , i ] <- suppressWarnings ( x %>% pull ( i ) %>% as.rsi ( ) ) # warning will be given later
2018-08-23 00:40:36 +02:00
print_warning <- TRUE
}
}
2018-10-19 13:53:31 +02:00
if ( length ( rsi_integrity_check ) > 0 ) {
# this will give a warning for invalid results, of all input columns (so only 1 warning)
rsi_integrity_check <- as.rsi ( rsi_integrity_check )
}
2019-11-10 12:16:56 +01:00
2019-07-01 14:03:15 +02:00
if ( only_all_tested == TRUE ) {
# THE NUMBER OF ISOLATES WHERE *ALL* ABx ARE S/I/R
2019-07-02 16:48:52 +02:00
x <- apply ( X = x %>% mutate_all ( as.integer ) ,
MARGIN = 1 ,
FUN = base :: min )
numerator <- sum ( as.integer ( x ) %in% as.integer ( ab_result ) , na.rm = TRUE )
denominator <- length ( x ) - sum ( is.na ( x ) )
2019-10-11 17:21:02 +02:00
2018-10-19 13:53:31 +02:00
} else {
2019-07-01 14:03:15 +02:00
# THE NUMBER OF ISOLATES WHERE *ANY* ABx IS S/I/R
other_values <- base :: setdiff ( c ( NA , levels ( ab_result ) ) , ab_result )
2019-10-11 17:21:02 +02:00
other_values_filter <- base :: apply ( x , 1 , function ( y ) {
base :: all ( y %in% other_values ) & base :: any ( is.na ( y ) )
} )
2019-07-01 14:03:15 +02:00
numerator <- x %>% filter_all ( any_vars ( . %in% ab_result ) ) %>% nrow ( )
denominator <- x %>% filter ( ! other_values_filter ) %>% nrow ( )
2018-10-19 13:53:31 +02:00
}
2018-08-23 00:40:36 +02:00
} else {
2019-07-01 14:03:15 +02:00
# x is not a data.frame
2018-08-23 00:40:36 +02:00
if ( ! is.rsi ( x ) ) {
x <- as.rsi ( x )
print_warning <- TRUE
}
2019-07-01 14:03:15 +02:00
numerator <- sum ( x %in% ab_result , na.rm = TRUE )
denominator <- sum ( x %in% levels ( ab_result ) , na.rm = TRUE )
2018-08-23 00:40:36 +02:00
}
2019-11-10 12:16:56 +01:00
2018-08-23 00:40:36 +02:00
if ( print_warning == TRUE ) {
warning ( " Increase speed by transforming to class `rsi` on beforehand: df %>% mutate_if(is.rsi.eligible, as.rsi)" ,
call. = FALSE )
}
2019-11-10 12:16:56 +01:00
2018-08-23 00:40:36 +02:00
if ( only_count == TRUE ) {
2019-07-01 14:03:15 +02:00
return ( numerator )
2018-08-23 00:40:36 +02:00
}
2019-11-10 12:16:56 +01:00
2019-07-01 14:03:15 +02:00
if ( denominator < minimum ) {
if ( data_vars != " " ) {
data_vars <- paste ( " for" , data_vars )
}
2019-08-20 11:40:54 +02:00
warning ( " Introducing NA: only " , denominator , " results available" , data_vars , " (`minimum` was set to " , minimum , " )." , call. = FALSE )
2019-07-01 14:03:15 +02:00
fraction <- NA
2018-10-12 16:35:18 +02:00
} else {
2019-07-01 14:03:15 +02:00
fraction <- numerator / denominator
2018-08-23 00:40:36 +02:00
}
2019-11-10 12:16:56 +01:00
2018-08-23 00:40:36 +02:00
if ( as_percent == TRUE ) {
2019-09-30 16:45:36 +02:00
percentage ( fraction , digits = 1 )
2018-08-23 00:40:36 +02:00
} else {
2019-07-01 14:03:15 +02:00
fraction
2018-08-23 00:40:36 +02:00
}
}
2019-05-13 10:10:16 +02:00
2019-11-11 10:46:39 +01:00
#' @importFrom dplyr %>% summarise_if mutate select everything bind_rows arrange
#' @importFrom tidyr pivot_longer
2019-11-10 12:16:56 +01:00
rsi_calc_df <- function ( type , # "proportion" or "count"
2019-05-13 10:10:16 +02:00
data ,
translate_ab = " name" ,
language = get_locale ( ) ,
minimum = 30 ,
as_percent = FALSE ,
combine_SI = TRUE ,
combine_IR = FALSE ,
combine_SI_missing = FALSE ) {
2019-11-10 12:16:56 +01:00
2020-02-14 19:54:13 +01:00
check_dataset_integrity ( )
2019-05-13 10:10:16 +02:00
if ( ! " data.frame" %in% class ( data ) ) {
stop ( paste0 ( " `" , type , " _df` must be called on a data.frame" ) , call. = FALSE )
}
2019-11-10 12:16:56 +01:00
2019-05-13 10:10:16 +02:00
if ( isTRUE ( combine_IR ) & isTRUE ( combine_SI_missing ) ) {
combine_SI <- FALSE
}
if ( isTRUE ( combine_SI ) & isTRUE ( combine_IR ) ) {
2019-08-25 22:53:22 +02:00
stop ( " either `combine_SI` or `combine_IR` can be TRUE, not both" , call. = FALSE )
2019-05-13 10:10:16 +02:00
}
2019-11-10 12:16:56 +01:00
2019-08-25 22:53:22 +02:00
if ( ! any ( sapply ( data , is.rsi ) , na.rm = TRUE ) ) {
2019-05-13 10:10:16 +02:00
stop ( " No columns with class 'rsi' found. See ?as.rsi." , call. = FALSE )
}
2019-11-10 12:16:56 +01:00
2019-05-13 10:10:16 +02:00
if ( as.character ( translate_ab ) %in% c ( " TRUE" , " official" ) ) {
translate_ab <- " name"
}
2019-11-10 12:16:56 +01:00
2019-08-25 22:53:22 +02:00
get_summaryfunction <- function ( int , type ) {
2019-11-10 12:16:56 +01:00
# look for proportion_S, count_S, etc:
2019-05-13 10:10:16 +02:00
int_fn <- get ( paste0 ( type , " _" , int ) , envir = asNamespace ( " AMR" ) )
2019-11-10 12:16:56 +01:00
suppressWarnings (
if ( type == " proportion" ) {
summ <- summarise_if ( .tbl = data ,
.predicate = is.rsi ,
.funs = int_fn ,
minimum = minimum ,
as_percent = as_percent )
} else if ( type == " count" ) {
summ <- summarise_if ( .tbl = data ,
.predicate = is.rsi ,
.funs = int_fn )
}
)
2019-05-13 10:10:16 +02:00
summ %>%
2019-06-13 14:28:46 +02:00
mutate ( interpretation = int ) %>%
select ( interpretation , everything ( ) )
2019-05-13 10:10:16 +02:00
}
2019-11-10 12:16:56 +01:00
2019-08-25 22:53:22 +02:00
resS <- get_summaryfunction ( " S" , type )
resI <- get_summaryfunction ( " I" , type )
resR <- get_summaryfunction ( " R" , type )
resSI <- get_summaryfunction ( " SI" , type )
resIR <- get_summaryfunction ( " IR" , type )
2019-05-13 10:10:16 +02:00
data.groups <- group_vars ( data )
2019-11-10 12:16:56 +01:00
2019-05-13 10:10:16 +02:00
if ( isFALSE ( combine_SI ) & isFALSE ( combine_IR ) ) {
res <- bind_rows ( resS , resI , resR ) %>%
2019-06-13 14:28:46 +02:00
mutate ( interpretation = factor ( interpretation ,
2019-05-13 10:10:16 +02:00
levels = c ( " S" , " I" , " R" ) ,
ordered = TRUE ) )
2019-11-10 12:16:56 +01:00
2019-05-13 10:10:16 +02:00
} else if ( isTRUE ( combine_IR ) ) {
res <- bind_rows ( resS , resIR ) %>%
2019-06-13 14:28:46 +02:00
mutate ( interpretation = factor ( interpretation ,
2019-05-13 10:10:16 +02:00
levels = c ( " S" , " IR" ) ,
ordered = TRUE ) )
2019-11-10 12:16:56 +01:00
2019-05-13 10:10:16 +02:00
} else if ( isTRUE ( combine_SI ) ) {
res <- bind_rows ( resSI , resR ) %>%
2019-06-13 14:28:46 +02:00
mutate ( interpretation = factor ( interpretation ,
2019-05-13 10:10:16 +02:00
levels = c ( " SI" , " R" ) ,
ordered = TRUE ) )
}
2019-11-10 12:16:56 +01:00
2019-05-13 10:10:16 +02:00
res <- res %>%
2019-11-11 10:46:39 +01:00
pivot_longer ( - c ( interpretation , data.groups ) , names_to = " antibiotic" ) %>%
select ( antibiotic , everything ( ) ) %>%
arrange ( antibiotic , interpretation )
2019-11-10 12:16:56 +01:00
2019-05-13 10:10:16 +02:00
if ( ! translate_ab == FALSE ) {
2020-02-14 19:54:13 +01:00
res <- res %>% mutate ( antibiotic = ab_property ( antibiotic , property = translate_ab , language = language ) )
2019-05-13 10:10:16 +02:00
}
2019-11-10 12:16:56 +01:00
2019-11-11 10:46:39 +01:00
as.data.frame ( res , stringsAsFactors = FALSE )
2019-05-13 10:10:16 +02:00
}