2018-08-23 00:40:36 +02:00
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
2019-01-02 23:24:07 +01:00
# SOURCE #
# https://gitlab.com/msberends/AMR #
2018-08-23 00:40:36 +02:00
# #
# LICENCE #
2020-01-05 17:22:09 +01:00
# (c) 2018-2020 Berends MS, Luz CF et al. #
2018-08-23 00:40:36 +02:00
# #
2019-01-02 23:24:07 +01:00
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# #
2020-01-05 17:22:09 +01:00
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
2019-04-05 18:47:39 +02:00
# Visit our website for more info: https://msberends.gitlab.io/AMR. #
2018-08-23 00:40:36 +02:00
# ==================================================================== #
2019-06-27 11:57:45 +02:00
dots2vars <- function ( ... ) {
2019-08-20 11:40:54 +02:00
# this function is to give more informative output about
2019-11-10 12:16:56 +01:00
# variable names in count_* and proportion_* functions
2020-05-16 13:05:47 +02:00
dots <- substitute ( list ( ... ) )
paste ( as.character ( dots ) [2 : length ( dots ) ] , collapse = " , " )
2019-06-27 11:57:45 +02:00
}
2018-08-23 00:40:36 +02:00
rsi_calc <- function ( ... ,
2019-07-01 14:03:15 +02:00
ab_result ,
minimum = 0 ,
as_percent = FALSE ,
only_all_tested = FALSE ,
only_count = FALSE ) {
2019-11-10 12:16:56 +01:00
2019-06-27 11:57:45 +02:00
data_vars <- dots2vars ( ... )
2019-11-10 12:16:56 +01:00
2018-08-23 00:40:36 +02:00
if ( ! is.numeric ( minimum ) ) {
2019-10-11 17:21:02 +02:00
stop ( " `minimum` must be numeric" , call. = FALSE )
2018-08-23 00:40:36 +02:00
}
if ( ! is.logical ( as_percent ) ) {
2019-10-11 17:21:02 +02:00
stop ( " `as_percent` must be logical" , call. = FALSE )
2018-08-23 00:40:36 +02:00
}
2019-07-01 14:03:15 +02:00
if ( ! is.logical ( only_all_tested ) ) {
2019-10-11 17:21:02 +02:00
stop ( " `only_all_tested` must be logical" , call. = FALSE )
2018-10-19 13:53:31 +02:00
}
2019-11-10 12:16:56 +01:00
2020-05-19 12:08:49 +02:00
dots_df <- switch ( 1 , ... )
2018-08-24 11:08:20 +02:00
dots <- base :: eval ( base :: substitute ( base :: alist ( ... ) ) )
2019-07-01 14:03:15 +02:00
if ( " also_single_tested" %in% names ( dots ) ) {
2019-11-10 12:16:56 +01:00
stop ( " `also_single_tested` was replaced by `only_all_tested`. Please read Details in the help page (`?proportion`) as this may have a considerable impact on your analysis." , call. = FALSE )
2019-07-01 14:03:15 +02:00
}
2018-08-24 11:08:20 +02:00
ndots <- length ( dots )
2019-11-10 12:16:56 +01:00
if ( " data.frame" %in% class ( dots_df ) ) {
2020-05-19 12:08:49 +02:00
# data.frame passed with other columns, like: example_isolates %>% proportion_S(AMC, GEN)
2019-11-10 12:16:56 +01:00
dots <- as.character ( dots )
dots <- dots [dots != " ." ]
2018-08-24 11:08:20 +02:00
if ( length ( dots ) == 0 | all ( dots == " df" ) ) {
2020-05-19 12:08:49 +02:00
# for complete data.frames, like example_isolates %>% select(AMC, GEN) %>% proportion_S()
2020-05-16 13:05:47 +02:00
# and the old rsi function, which has "df" as name of the first parameter
2018-08-24 11:08:20 +02:00
x <- dots_df
} else {
2020-05-16 13:05:47 +02:00
x <- dots_df [ , dots [dots %in% colnames ( dots_df ) ] ]
2018-08-23 00:40:36 +02:00
}
2018-08-24 11:08:20 +02:00
} else if ( ndots == 1 ) {
2020-05-19 12:08:49 +02:00
# only 1 variable passed (can also be data.frame), like: proportion_S(example_isolates$AMC) and example_isolates$AMC %>% proportion_S()
2018-08-24 11:08:20 +02:00
x <- dots_df
2018-08-23 00:40:36 +02:00
} else {
2020-05-19 12:08:49 +02:00
# multiple variables passed without pipe, like: proportion_S(example_isolates$AMC, example_isolates$GEN)
2018-08-24 11:08:20 +02:00
x <- NULL
try ( x <- as.data.frame ( dots ) , silent = TRUE )
if ( is.null ( x ) ) {
2020-05-16 13:05:47 +02:00
# support for example_isolates %>% group_by(hospital_id) %>% summarise(amox = susceptibility(GEN, AMX))
x <- as.data.frame ( list ( ... ) )
2018-08-24 11:08:20 +02:00
}
2018-08-23 00:40:36 +02:00
}
2019-11-10 12:16:56 +01:00
2019-05-10 16:44:59 +02:00
if ( is.null ( x ) ) {
warning ( " argument is NULL (check if columns exist): returning NA" , call. = FALSE )
return ( NA )
}
2019-11-10 12:16:56 +01:00
2018-08-23 00:40:36 +02:00
print_warning <- FALSE
2019-11-10 12:16:56 +01:00
2019-07-01 14:03:15 +02:00
ab_result <- as.rsi ( ab_result )
2019-11-10 12:16:56 +01:00
2018-08-23 00:40:36 +02:00
if ( is.data.frame ( x ) ) {
2018-10-19 13:53:31 +02:00
rsi_integrity_check <- character ( 0 )
2019-10-11 17:21:02 +02:00
for ( i in seq_len ( ncol ( x ) ) ) {
2018-10-19 13:53:31 +02:00
# check integrity of columns: force rsi class
2018-08-23 00:40:36 +02:00
if ( ! is.rsi ( x %>% pull ( i ) ) ) {
2018-10-19 13:53:31 +02:00
rsi_integrity_check <- c ( rsi_integrity_check , x %>% pull ( i ) %>% as.character ( ) )
2019-07-01 14:03:15 +02:00
x [ , i ] <- suppressWarnings ( x %>% pull ( i ) %>% as.rsi ( ) ) # warning will be given later
2018-08-23 00:40:36 +02:00
print_warning <- TRUE
}
}
2018-10-19 13:53:31 +02:00
if ( length ( rsi_integrity_check ) > 0 ) {
# this will give a warning for invalid results, of all input columns (so only 1 warning)
rsi_integrity_check <- as.rsi ( rsi_integrity_check )
}
2020-05-16 13:05:47 +02:00
2019-07-01 14:03:15 +02:00
if ( only_all_tested == TRUE ) {
# THE NUMBER OF ISOLATES WHERE *ALL* ABx ARE S/I/R
2020-05-16 20:08:21 +02:00
x <- apply ( X = as.data.frame ( lapply ( x , as.integer ) , stringsAsFactors = FALSE ) ,
2019-07-02 16:48:52 +02:00
MARGIN = 1 ,
FUN = base :: min )
numerator <- sum ( as.integer ( x ) %in% as.integer ( ab_result ) , na.rm = TRUE )
denominator <- length ( x ) - sum ( is.na ( x ) )
2019-10-11 17:21:02 +02:00
2018-10-19 13:53:31 +02:00
} else {
2019-07-01 14:03:15 +02:00
# THE NUMBER OF ISOLATES WHERE *ANY* ABx IS S/I/R
other_values <- base :: setdiff ( c ( NA , levels ( ab_result ) ) , ab_result )
2019-10-11 17:21:02 +02:00
other_values_filter <- base :: apply ( x , 1 , function ( y ) {
base :: all ( y %in% other_values ) & base :: any ( is.na ( y ) )
} )
2020-05-16 13:05:47 +02:00
numerator <- sum ( as.logical ( by ( x , seq_len ( nrow ( x ) ) , function ( row ) any ( unlist ( row ) %in% ab_result , na.rm = TRUE ) ) ) )
denominator <- nrow ( x [ ! other_values_filter , ] )
2018-10-19 13:53:31 +02:00
}
2018-08-23 00:40:36 +02:00
} else {
2019-07-01 14:03:15 +02:00
# x is not a data.frame
2018-08-23 00:40:36 +02:00
if ( ! is.rsi ( x ) ) {
x <- as.rsi ( x )
print_warning <- TRUE
}
2019-07-01 14:03:15 +02:00
numerator <- sum ( x %in% ab_result , na.rm = TRUE )
denominator <- sum ( x %in% levels ( ab_result ) , na.rm = TRUE )
2018-08-23 00:40:36 +02:00
}
2019-11-10 12:16:56 +01:00
2018-08-23 00:40:36 +02:00
if ( print_warning == TRUE ) {
2020-05-27 16:37:49 +02:00
warning ( " Increase speed by transforming to class <rsi> on beforehand: your_data %>% mutate_if(is.rsi.eligible, as.rsi)" ,
2018-08-23 00:40:36 +02:00
call. = FALSE )
}
2019-11-10 12:16:56 +01:00
2018-08-23 00:40:36 +02:00
if ( only_count == TRUE ) {
2019-07-01 14:03:15 +02:00
return ( numerator )
2018-08-23 00:40:36 +02:00
}
2019-11-10 12:16:56 +01:00
2019-07-01 14:03:15 +02:00
if ( denominator < minimum ) {
if ( data_vars != " " ) {
data_vars <- paste ( " for" , data_vars )
}
2019-08-20 11:40:54 +02:00
warning ( " Introducing NA: only " , denominator , " results available" , data_vars , " (`minimum` was set to " , minimum , " )." , call. = FALSE )
2019-07-01 14:03:15 +02:00
fraction <- NA
2018-10-12 16:35:18 +02:00
} else {
2019-07-01 14:03:15 +02:00
fraction <- numerator / denominator
2018-08-23 00:40:36 +02:00
}
2019-11-10 12:16:56 +01:00
2018-08-23 00:40:36 +02:00
if ( as_percent == TRUE ) {
2019-09-30 16:45:36 +02:00
percentage ( fraction , digits = 1 )
2018-08-23 00:40:36 +02:00
} else {
2019-07-01 14:03:15 +02:00
fraction
2018-08-23 00:40:36 +02:00
}
}
2019-05-13 10:10:16 +02:00
2020-05-16 13:05:47 +02:00
rsi_calc_df <- function ( type , # "proportion", "count" or "both"
2019-05-13 10:10:16 +02:00
data ,
translate_ab = " name" ,
language = get_locale ( ) ,
minimum = 30 ,
as_percent = FALSE ,
combine_SI = TRUE ,
combine_IR = FALSE ,
combine_SI_missing = FALSE ) {
2019-11-10 12:16:56 +01:00
2020-02-14 19:54:13 +01:00
check_dataset_integrity ( )
2019-05-13 10:10:16 +02:00
if ( ! " data.frame" %in% class ( data ) ) {
stop ( paste0 ( " `" , type , " _df` must be called on a data.frame" ) , call. = FALSE )
}
2019-11-10 12:16:56 +01:00
2019-05-13 10:10:16 +02:00
if ( isTRUE ( combine_IR ) & isTRUE ( combine_SI_missing ) ) {
combine_SI <- FALSE
}
if ( isTRUE ( combine_SI ) & isTRUE ( combine_IR ) ) {
2019-08-25 22:53:22 +02:00
stop ( " either `combine_SI` or `combine_IR` can be TRUE, not both" , call. = FALSE )
2019-05-13 10:10:16 +02:00
}
2019-11-10 12:16:56 +01:00
2019-08-25 22:53:22 +02:00
if ( ! any ( sapply ( data , is.rsi ) , na.rm = TRUE ) ) {
2020-05-27 16:37:49 +02:00
stop ( " No columns with class <rsi> found. See ?as.rsi." , call. = FALSE )
2019-05-13 10:10:16 +02:00
}
2019-11-10 12:16:56 +01:00
2019-05-13 10:10:16 +02:00
if ( as.character ( translate_ab ) %in% c ( " TRUE" , " official" ) ) {
translate_ab <- " name"
}
2020-05-16 13:05:47 +02:00
# select only groups and antibiotics
if ( has_groups ( data ) ) {
data_has_groups <- TRUE
groups <- setdiff ( names ( get_groups ( data ) ) , " .rows" ) # get_groups is from poorman.R
data <- data [ , c ( groups , colnames ( data ) [sapply ( data , is.rsi ) ] ) , drop = FALSE ]
} else {
data_has_groups <- FALSE
data <- data [ , colnames ( data ) [sapply ( data , is.rsi ) ] , drop = FALSE ]
}
2019-11-10 12:16:56 +01:00
2020-05-16 13:05:47 +02:00
data <- as.data.frame ( data , stringsAsFactors = FALSE )
if ( isTRUE ( combine_SI ) | isTRUE ( combine_IR ) ) {
for ( i in seq_len ( ncol ( data ) ) ) {
if ( is.rsi ( data [ , i , drop = TRUE ] ) ) {
data [ , i ] <- as.character ( data [ , i , drop = TRUE ] )
if ( isTRUE ( combine_SI ) ) {
data [ , i ] <- gsub ( " (I|S)" , " SI" , data [ , i , drop = TRUE ] )
} else if ( isTRUE ( combine_IR ) ) {
data [ , i ] <- gsub ( " (I|R)" , " IR" , data [ , i , drop = TRUE ] )
}
2019-11-10 12:16:56 +01:00
}
2020-05-16 13:05:47 +02:00
}
2019-05-13 10:10:16 +02:00
}
2019-11-10 12:16:56 +01:00
2020-05-16 13:05:47 +02:00
sum_it <- function ( .data ) {
out <- data.frame ( antibiotic = character ( 0 ) ,
interpretation = character ( 0 ) ,
value = double ( 0 ) ,
isolates <- integer ( 0 ) ,
stringsAsFactors = FALSE )
if ( data_has_groups ) {
group_values <- unique ( .data [ , which ( colnames ( .data ) %in% groups ) , drop = FALSE ] )
rownames ( group_values ) <- NULL
.data <- .data [ , which ( ! colnames ( .data ) %in% groups ) , drop = FALSE ]
}
for ( i in seq_len ( ncol ( .data ) ) ) {
2020-06-09 16:18:03 +02:00
values <- .data [ , i , drop = TRUE ]
if ( isTRUE ( combine_SI ) ) {
values <- factor ( values , levels = c ( " SI" , " R" ) , ordered = TRUE )
} else if ( isTRUE ( combine_IR ) ) {
values <- factor ( values , levels = c ( " S" , " IR" ) , ordered = TRUE )
} else {
values <- factor ( values , levels = c ( " S" , " I" , " R" ) , ordered = TRUE )
}
col_results <- as.data.frame ( as.matrix ( table ( values ) ) )
2020-05-16 13:05:47 +02:00
col_results $ interpretation <- rownames ( col_results )
col_results $ isolates <- col_results [ , 1 , drop = TRUE ]
2020-06-09 16:18:03 +02:00
if ( NROW ( col_results ) > 0 && sum ( col_results $ isolates , na.rm = TRUE ) > 0 ) {
2020-05-16 13:05:47 +02:00
if ( sum ( col_results $ isolates , na.rm = TRUE ) >= minimum ) {
col_results $ value <- col_results $ isolates / sum ( col_results $ isolates , na.rm = TRUE )
} else {
col_results $ value <- rep ( NA_real_ , NROW ( col_results ) )
}
2020-05-16 20:08:21 +02:00
out_new <- data.frame ( antibiotic = ifelse ( isFALSE ( translate_ab ) ,
colnames ( .data ) [i ] ,
ab_property ( colnames ( .data ) [i ] , property = translate_ab , language = language ) ) ,
2020-05-16 13:05:47 +02:00
interpretation = col_results $ interpretation ,
value = col_results $ value ,
isolates = col_results $ isolates ,
stringsAsFactors = FALSE )
if ( data_has_groups ) {
2020-06-09 16:18:03 +02:00
if ( nrow ( group_values ) < nrow ( out_new ) ) {
# repeat group_values for the number of rows in out_new
repeated <- rep ( seq_len ( nrow ( group_values ) ) ,
each = nrow ( out_new ) / nrow ( group_values ) )
group_values <- group_values [repeated , , drop = FALSE ]
}
2020-05-16 13:05:47 +02:00
out_new <- cbind ( group_values , out_new )
}
out <- rbind ( out , out_new )
}
}
out
}
2019-11-10 12:16:56 +01:00
2020-05-16 13:05:47 +02:00
# support dplyr groups
apply_group <- function ( .data , fn , groups , ... ) {
grouped <- split ( x = .data , f = lapply ( groups , function ( x , .data ) as.factor ( .data [ , x ] ) , .data ) )
res <- do.call ( rbind , unname ( lapply ( grouped , fn , ... ) ) )
if ( any ( groups %in% colnames ( res ) ) ) {
class ( res ) <- c ( " grouped_data" , class ( res ) )
attr ( res , " groups" ) <- groups [groups %in% colnames ( res ) ]
}
res
2019-05-13 10:10:16 +02:00
}
2019-11-10 12:16:56 +01:00
2020-05-16 13:05:47 +02:00
if ( data_has_groups ) {
out <- apply_group ( data , " sum_it" , groups )
} else {
out <- sum_it ( data )
}
2019-11-10 12:16:56 +01:00
2020-05-16 13:05:47 +02:00
# apply factors for right sorting in interpretation
if ( isTRUE ( combine_SI ) ) {
out $ interpretation <- factor ( out $ interpretation , levels = c ( " SI" , " R" ) , ordered = TRUE )
} else if ( isTRUE ( combine_IR ) ) {
out $ interpretation <- factor ( out $ interpretation , levels = c ( " S" , " IR" ) , ordered = TRUE )
} else {
2020-06-17 01:39:30 +02:00
# don't use as.rsi() here, as it would add the class <rsi> and we would like
# the same data structure as output, regardless of input
out $ interpretation <- factor ( out $ interpretation , levels = c ( " S" , " I" , " R" ) , ordered = TRUE )
2019-05-13 10:10:16 +02:00
}
2019-11-10 12:16:56 +01:00
2020-05-16 13:05:47 +02:00
if ( data_has_groups ) {
# ordering by the groups and two more: "antibiotic" and "interpretation"
2020-06-09 16:18:03 +02:00
out <- ungroup ( out [do.call ( " order" , out [ , seq_len ( length ( groups ) + 2 ) ] ) , ] )
2020-05-16 13:05:47 +02:00
} else {
out <- out [order ( out $ antibiotic , out $ interpretation ) , ]
}
if ( type == " proportion" ) {
out <- subset ( out , select = - c ( isolates ) )
} else if ( type == " count" ) {
out $ value <- out $ isolates
out <- subset ( out , select = - c ( isolates ) )
}
rownames ( out ) <- NULL
out
2019-05-13 10:10:16 +02:00
}