2021-04-26 23:57:37 +02:00
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Data Analysis for R #
# #
# SOURCE #
# https://github.com/msberends/AMR #
# #
# LICENCE #
# (c) 2018-2021 Berends MS, Luz CF et al. #
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
# #
# Visit our website for the full manual and a complete tutorial about #
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
# ==================================================================== #
#' (Key) Antimicrobials for First Weighted Isolates
#'
#' These functions can be used to determine first weighted isolates by considering the phenotype for isolate selection (see [first_isolate()]). Using a phenotype-based method to determine first isolates is more reliable than methods that disregard phenotypes.
#' @inheritSection lifecycle Stable Lifecycle
#' @param x a [data.frame] with antibiotics columns, like `AMX` or `amox`. Can be left blank to determine automatically
2021-05-12 18:15:03 +02:00
#' @param y,z [character] vectors to compare
2021-04-26 23:57:37 +02:00
#' @inheritParams first_isolate
#' @param universal names of **broad-spectrum** antimicrobial agents, case-insensitive. Set to `NULL` to ignore. See *Details* for the default agents.
#' @param gram_negative names of antibiotic agents for **Gram-positives**, case-insensitive. Set to `NULL` to ignore. See *Details* for the default agents.
#' @param gram_positive names of antibiotic agents for **Gram-negatives**, case-insensitive. Set to `NULL` to ignore. See *Details* for the default agents.
#' @param antifungal names of antifungal agents for **fungi**, case-insensitive. Set to `NULL` to ignore. See *Details* for the default agents.
2021-05-12 18:15:03 +02:00
#' @param only_rsi_columns a [logical] to indicate whether only columns must be included that were transformed to class `<rsi>` (see [as.rsi()]) on beforehand (defaults to `FALSE`)
2021-04-29 17:16:30 +02:00
#' @param ... ignored, only in place to allow future extensions
2021-04-26 23:57:37 +02:00
#' @details
2021-05-17 19:43:01 +02:00
#' The [key_antimicrobials()] and [all_antimicrobials()] functions are context-aware. This means that the `x` argument can be left blank if used inside a [data.frame] call, see *Examples*.
2021-04-26 23:57:37 +02:00
#'
2021-05-12 18:15:03 +02:00
#' The function [key_antimicrobials()] returns a [character] vector with 12 antimicrobial results for every isolate. The function [all_antimicrobials()] returns a [character] vector with all antimicrobial results for every isolate. These vectors can then be compared using [antimicrobials_equal()], to check if two isolates have generally the same antibiogram. Missing and invalid values are replaced with a dot (`"."`) by [key_antimicrobials()] and ignored by [antimicrobials_equal()].
2021-04-26 23:57:37 +02:00
#'
#' Please see the [first_isolate()] function how these important functions enable the 'phenotype-based' method for determination of first isolates.
#'
#' The default antimicrobial agents used for **all rows** (set in `universal`) are:
#'
#' - Ampicillin
#' - Amoxicillin/clavulanic acid
#' - Cefuroxime
#' - Ciprofloxacin
#' - Piperacillin/tazobactam
#' - Trimethoprim/sulfamethoxazole
#'
#' The default antimicrobial agents used for **Gram-negative bacteria** (set in `gram_negative`) are:
#'
#' - Cefotaxime
#' - Ceftazidime
#' - Colistin
#' - Gentamicin
#' - Meropenem
#' - Tobramycin
#'
#' The default antimicrobial agents used for **Gram-positive bacteria** (set in `gram_positive`) are:
#'
#' - Erythromycin
#' - Oxacillin
#' - Rifampin
#' - Teicoplanin
#' - Tetracycline
#' - Vancomycin
#'
#'
#' The default antimicrobial agents used for **fungi** (set in `antifungal`) are:
#'
#' - Anidulafungin
#' - Caspofungin
#' - Fluconazole
#' - Miconazole
#' - Nystatin
#' - Voriconazole
#' @rdname key_antimicrobials
#' @export
#' @seealso [first_isolate()]
#' @inheritSection AMR Read more on Our Website!
#' @examples
#' # `example_isolates` is a data set available in the AMR package.
#' # See ?example_isolates.
#'
#' # output of the `key_antimicrobials()` function could be like this:
#' strainA <- "SSSRR.S.R..S"
#' strainB <- "SSSIRSSSRSSS"
#'
#' # those strings can be compared with:
#' antimicrobials_equal(strainA, strainB, type = "keyantimicrobials")
#' # TRUE, because I is ignored (as well as missing values)
#'
#' antimicrobials_equal(strainA, strainB, type = "keyantimicrobials", ignore_I = FALSE)
2021-05-12 18:15:03 +02:00
#' # FALSE, because I is not ignored and so the 4th [character] differs
2021-04-26 23:57:37 +02:00
#'
#' \donttest{
#' if (require("dplyr")) {
#' # set key antibiotics to a new variable
#' my_patients <- example_isolates %>%
#' mutate(keyab = key_antimicrobials(antifungal = NULL)) %>% # no need to define `x`
#' mutate(
#' # now calculate first isolates
#' first_regular = first_isolate(col_keyantimicrobials = FALSE),
#' # and first WEIGHTED isolates
#' first_weighted = first_isolate(col_keyantimicrobials = "keyab")
#' )
#'
#' # Check the difference, in this data set it results in more isolates:
#' sum(my_patients$first_regular, na.rm = TRUE)
#' sum(my_patients$first_weighted, na.rm = TRUE)
#' }
#' }
key_antimicrobials <- function ( x = NULL ,
col_mo = NULL ,
universal = c ( " ampicillin" , " amoxicillin/clavulanic acid" , " cefuroxime" ,
" piperacillin/tazobactam" , " ciprofloxacin" , " trimethoprim/sulfamethoxazole" ) ,
gram_negative = c ( " gentamicin" , " tobramycin" , " colistin" ,
" cefotaxime" , " ceftazidime" , " meropenem" ) ,
gram_positive = c ( " vancomycin" , " teicoplanin" , " tetracycline" ,
" erythromycin" , " oxacillin" , " rifampin" ) ,
antifungal = c ( " anidulafungin" , " caspofungin" , " fluconazole" ,
" miconazole" , " nystatin" , " voriconazole" ) ,
only_rsi_columns = FALSE ,
... ) {
if ( is_null_or_grouped_tbl ( x ) ) {
# when `x` is left blank, auto determine it (get_current_data() also contains dplyr::cur_data_all())
# is also fix for using a grouped df as input (a dot as first argument)
2021-06-22 12:16:42 +02:00
x <- tryCatch ( get_current_data ( arg_name = " x" , call = -2 ) , error = function ( e ) x )
2021-04-26 23:57:37 +02:00
}
meet_criteria ( x , allow_class = " data.frame" ) # also checks dimensions to be >0
meet_criteria ( col_mo , allow_class = " character" , has_length = 1 , allow_NULL = TRUE , allow_NA = TRUE , is_in = colnames ( x ) )
meet_criteria ( universal , allow_class = " character" , allow_NULL = TRUE )
meet_criteria ( gram_negative , allow_class = " character" , allow_NULL = TRUE )
meet_criteria ( gram_positive , allow_class = " character" , allow_NULL = TRUE )
meet_criteria ( antifungal , allow_class = " character" , allow_NULL = TRUE )
meet_criteria ( only_rsi_columns , allow_class = " logical" , has_length = 1 )
2021-05-30 22:14:38 +02:00
# force regular data.frame, not a tibble or data.table
2021-04-26 23:57:37 +02:00
x <- as.data.frame ( x , stringsAsFactors = FALSE )
2021-12-11 13:41:31 +01:00
cols <- get_column_abx ( x , info = FALSE , only_rsi_columns = only_rsi_columns , fn = " key_antimicrobials" )
2021-04-26 23:57:37 +02:00
# try to find columns based on type
# -- mo
if ( is.null ( col_mo ) ) {
col_mo <- search_type_in_df ( x = x , type = " mo" , info = FALSE )
}
if ( is.null ( col_mo ) ) {
warning_ ( " No column found for `col_mo`, ignoring antibiotics set in `gram_negative` and `gram_positive`, and antimycotics set in `antifungal`" , call = FALSE )
gramstain <- NA_character_
kingdom <- NA_character_
} else {
x.mo <- as.mo ( x [ , col_mo , drop = TRUE ] )
gramstain <- mo_gramstain ( x.mo , language = NULL )
kingdom <- mo_kingdom ( x.mo , language = NULL )
}
AMR_string <- function ( x , values , name , filter , cols = cols ) {
if ( is.null ( values ) ) {
return ( rep ( NA_character_ , length ( which ( filter ) ) ) )
}
values_old_length <- length ( values )
values <- as.ab ( values , flag_multiple_results = FALSE , info = FALSE )
values <- cols [names ( cols ) %in% values ]
values_new_length <- length ( values )
if ( values_new_length < values_old_length &
any ( filter , na.rm = TRUE ) &
2021-12-11 13:41:31 +01:00
message_not_thrown_before ( " key_antimicrobials" , name ) ) {
2021-04-26 23:57:37 +02:00
warning_ ( ifelse ( values_new_length == 0 ,
" No columns available " ,
paste0 ( " Only using " , values_new_length , " out of " , values_old_length , " defined columns " ) ) ,
" as key antimicrobials for " , name , " s. See ?key_antimicrobials." ,
call = FALSE )
}
generate_antimcrobials_string ( x [which ( filter ) , c ( universal , values ) , drop = FALSE ] )
}
if ( is.null ( universal ) ) {
universal <- character ( 0 )
} else {
universal <- as.ab ( universal , flag_multiple_results = FALSE , info = FALSE )
universal <- cols [names ( cols ) %in% universal ]
}
key_ab <- rep ( NA_character_ , nrow ( x ) )
key_ab [which ( gramstain == " Gram-negative" ) ] <- AMR_string ( x = x ,
values = gram_negative ,
name = " Gram-negative" ,
filter = gramstain == " Gram-negative" ,
cols = cols )
key_ab [which ( gramstain == " Gram-positive" ) ] <- AMR_string ( x = x ,
values = gram_positive ,
name = " Gram-positive" ,
filter = gramstain == " Gram-positive" ,
cols = cols )
key_ab [which ( kingdom == " Fungi" ) ] <- AMR_string ( x = x ,
values = antifungal ,
name = " antifungal" ,
filter = kingdom == " Fungi" ,
cols = cols )
# back-up - only use `universal`
key_ab [which ( is.na ( key_ab ) ) ] <- AMR_string ( x = x ,
values = character ( 0 ) ,
name = " " ,
filter = is.na ( key_ab ) ,
cols = cols )
if ( length ( unique ( key_ab ) ) == 1 ) {
warning_ ( " No distinct key antibiotics determined." , call = FALSE )
}
key_ab
}
#' @rdname key_antimicrobials
#' @export
all_antimicrobials <- function ( x = NULL ,
only_rsi_columns = FALSE ,
... ) {
if ( is_null_or_grouped_tbl ( x ) ) {
# when `x` is left blank, auto determine it (get_current_data() also contains dplyr::cur_data_all())
# is also fix for using a grouped df as input (a dot as first argument)
2021-06-22 12:16:42 +02:00
x <- tryCatch ( get_current_data ( arg_name = " x" , call = -2 ) , error = function ( e ) x )
2021-04-26 23:57:37 +02:00
}
meet_criteria ( x , allow_class = " data.frame" ) # also checks dimensions to be >0
meet_criteria ( only_rsi_columns , allow_class = " logical" , has_length = 1 )
2021-05-30 22:14:38 +02:00
# force regular data.frame, not a tibble or data.table
2021-04-26 23:57:37 +02:00
x <- as.data.frame ( x , stringsAsFactors = FALSE )
2021-12-11 13:41:31 +01:00
cols <- get_column_abx ( x , only_rsi_columns = only_rsi_columns , info = FALSE ,
sort = FALSE , fn = " all_antimicrobials" )
2021-04-26 23:57:37 +02:00
generate_antimcrobials_string ( x [ , cols , drop = FALSE ] )
}
generate_antimcrobials_string <- function ( df ) {
if ( NCOL ( df ) == 0 ) {
return ( rep ( " " , NROW ( df ) ) )
}
if ( NROW ( df ) == 0 ) {
return ( character ( 0 ) )
}
2021-11-28 23:01:26 +01:00
tryCatch ( {
2021-04-26 23:57:37 +02:00
do.call ( paste0 ,
lapply ( as.list ( df ) ,
function ( x ) {
x <- toupper ( as.character ( x ) )
x [ ! x %in% c ( " R" , " S" , " I" ) ] <- " ."
paste ( x )
2021-11-28 23:01:26 +01:00
} ) )
} ,
error = function ( e ) rep ( strrep ( " ." , NCOL ( df ) ) , NROW ( df ) ) )
2021-04-26 23:57:37 +02:00
}
#' @rdname key_antimicrobials
#' @export
antimicrobials_equal <- function ( y ,
z ,
type = c ( " points" , " keyantimicrobials" ) ,
ignore_I = TRUE ,
points_threshold = 2 ,
... ) {
meet_criteria ( y , allow_class = " character" )
meet_criteria ( z , allow_class = " character" )
stop_if ( missing ( type ) , " argument \"type\" is missing, with no default" )
meet_criteria ( type , allow_class = " character" , has_length = 1 , is_in = c ( " points" , " keyantimicrobials" ) )
meet_criteria ( ignore_I , allow_class = " logical" , has_length = 1 )
meet_criteria ( points_threshold , allow_class = c ( " numeric" , " integer" ) , has_length = 1 , is_positive = TRUE , is_finite = TRUE )
stop_ifnot ( length ( y ) == length ( z ) , " length of `y` and `z` must be equal" )
key2rsi <- function ( val ) {
2021-11-28 23:01:26 +01:00
val <- strsplit ( val , " " ) [ [1L ] ]
val.int <- rep ( NA_real_ , length ( val ) )
val.int [val == " S" ] <- 1
val.int [val == " I" ] <- 2
val.int [val == " R" ] <- 3
val.int
2021-04-26 23:57:37 +02:00
}
2021-11-28 23:01:26 +01:00
# only run on uniques
uniq <- unique ( c ( y , z ) )
uniq_list <- lapply ( uniq , key2rsi )
names ( uniq_list ) <- uniq
y <- uniq_list [match ( y , names ( uniq_list ) ) ]
z <- uniq_list [match ( z , names ( uniq_list ) ) ]
2021-04-26 23:57:37 +02:00
determine_equality <- function ( a , b , type , points_threshold , ignore_I ) {
if ( length ( a ) != length ( b ) ) {
# incomparable, so not equal
return ( FALSE )
}
# ignore NAs on both sides
NA_ind <- which ( is.na ( a ) | is.na ( b ) )
a [NA_ind ] <- NA_real_
b [NA_ind ] <- NA_real_
if ( type == " points" ) {
# count points for every single character:
# - no change is 0 points
# - I <-> S|R is 0.5 point
# - S|R <-> R|S is 1 point
# use the levels of as.rsi (S = 1, I = 2, R = 3)
# and divide by 2 (S = 0.5, I = 1, R = 1.5)
( sum ( abs ( a - b ) , na.rm = TRUE ) / 2 ) < points_threshold
} else {
if ( ignore_I == TRUE ) {
ind <- which ( a == 2 | b == 2 ) # since as.double(as.rsi("I")) == 2
a [ind ] <- NA_real_
b [ind ] <- NA_real_
}
all ( a == b , na.rm = TRUE )
}
}
out <- unlist ( mapply ( FUN = determine_equality ,
y ,
z ,
MoreArgs = list ( type = type ,
points_threshold = points_threshold ,
ignore_I = ignore_I ) ,
SIMPLIFY = FALSE ,
USE.NAMES = FALSE ) )
out [is.na ( y ) | is.na ( z ) ] <- NA
out
}