2022-08-30 21:48:02 +02:00
# ==================================================================== #
2023-07-08 17:30:05 +02:00
# TITLE: #
2022-10-05 09:12:22 +02:00
# AMR: An R Package for Working with Antimicrobial Resistance Data #
2022-08-30 21:48:02 +02:00
# #
2023-07-08 17:30:05 +02:00
# SOURCE CODE: #
2022-08-30 21:48:02 +02:00
# https://github.com/msberends/AMR #
# #
2023-07-08 17:30:05 +02:00
# PLEASE CITE THIS SOFTWARE AS: #
2024-07-16 14:51:57 +02:00
# Berends MS, Luz CF, Friedrich AW, et al. (2022). #
# AMR: An R Package for Working with Antimicrobial Resistance Data. #
# Journal of Statistical Software, 104(3), 1-31. #
2023-05-27 10:39:22 +02:00
# https://doi.org/10.18637/jss.v104.i03 #
2022-10-05 09:12:22 +02:00
# #
2022-12-27 15:16:15 +01:00
# Developed at the University of Groningen and the University Medical #
# Center Groningen in The Netherlands, in collaboration with many #
# colleagues from around the world, see our website. #
2022-08-30 21:48:02 +02:00
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
# #
# Visit our website for the full manual and a complete tutorial about #
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
# ==================================================================== #
2022-12-30 13:02:37 +01:00
#' Calculate the Mean AMR Distance
2022-08-30 21:48:02 +02:00
#'
2022-12-30 13:02:37 +01:00
#' Calculates a normalised mean for antimicrobial resistance between multiple observations, to help to identify similar isolates without comparing antibiograms by hand.
2023-01-21 23:47:20 +01:00
#' @param x a vector of class [sir][as.sir()], [mic][as.mic()] or [disk][as.disk()], or a [data.frame] containing columns of any of these classes
2022-12-30 13:02:37 +01:00
#' @param ... variables to select (supports [tidyselect language][tidyselect::language] such as `column1:column4` and `where(is.mic)`, and can thus also be [antibiotic selectors][ab_selector()]
2024-09-29 22:17:56 +02:00
#' @param combine_SI a [logical] to indicate whether all values of S, SDD, and I must be merged into one, so the input only consists of S+I vs. R (susceptible vs. resistant) - the default is `TRUE`
2023-01-05 14:43:18 +01:00
#' @details The mean AMR distance is effectively [the Z-score](https://en.wikipedia.org/wiki/Standard_score); a normalised numeric value to compare AMR test results which can help to identify similar isolates, without comparing antibiograms by hand.
2022-08-30 21:48:02 +02:00
#'
2023-01-05 14:43:18 +01:00
#' MIC values (see [as.mic()]) are transformed with [log2()] first; their distance is thus calculated as `(log2(x) - mean(log2(x))) / sd(log2(x))`.
2022-08-30 21:48:02 +02:00
#'
2023-01-21 23:47:20 +01:00
#' SIR values (see [as.sir()]) are transformed using `"S"` = 1, `"I"` = 2, and `"R"` = 3. If `combine_SI` is `TRUE` (default), the `"I"` will be considered to be 1.
2022-08-30 21:48:02 +02:00
#'
2023-01-05 14:43:18 +01:00
#' For data sets, the mean AMR distance will be calculated per column, after which the mean per row will be returned, see *Examples*.
2022-08-30 21:48:02 +02:00
#'
2022-10-21 16:02:14 +02:00
#' Use [amr_distance_from_row()] to subtract distances from the distance of one row, see *Examples*.
2022-08-30 21:48:02 +02:00
#' @section Interpretation:
#' Isolates with distances less than 0.01 difference from each other should be considered similar. Differences lower than 0.025 should be considered suspicious.
#' @export
#' @examples
2023-01-21 23:47:20 +01:00
#' sir <- random_sir(10)
#' sir
#' mean_amr_distance(sir)
2023-01-23 15:01:21 +01:00
#'
2023-01-05 14:43:18 +01:00
#' mic <- random_mic(10)
#' mic
#' mean_amr_distance(mic)
#' # equal to the Z-score of their log2:
#' (log2(mic) - mean(log2(mic))) / sd(log2(mic))
2023-01-23 15:01:21 +01:00
#'
2023-01-05 14:43:18 +01:00
#' disk <- random_disk(10)
#' disk
#' mean_amr_distance(disk)
2022-08-30 21:48:02 +02:00
#'
#' y <- data.frame(
#' id = LETTERS[1:10],
2023-01-21 23:47:20 +01:00
#' amox = random_sir(10, ab = "amox", mo = "Escherichia coli"),
2023-01-05 14:43:18 +01:00
#' cipr = random_disk(10, ab = "cipr", mo = "Escherichia coli"),
2022-08-30 21:48:02 +02:00
#' gent = random_mic(10, ab = "gent", mo = "Escherichia coli"),
#' tobr = random_mic(10, ab = "tobr", mo = "Escherichia coli")
#' )
#' y
#' mean_amr_distance(y)
#' y$amr_distance <- mean_amr_distance(y, where(is.mic))
#' y[order(y$amr_distance), ]
#'
#' if (require("dplyr")) {
#' y %>%
#' mutate(
2023-01-05 14:43:18 +01:00
#' amr_distance = mean_amr_distance(y),
2022-10-21 16:02:14 +02:00
#' check_id_C = amr_distance_from_row(amr_distance, id == "C")
2022-08-30 21:48:02 +02:00
#' ) %>%
#' arrange(check_id_C)
#' }
#' if (require("dplyr")) {
#' # support for groups
#' example_isolates %>%
#' filter(mo_genus() == "Enterococcus" & mo_species() != "") %>%
#' select(mo, TCY, carbapenems()) %>%
#' group_by(mo) %>%
2023-01-05 14:43:18 +01:00
#' mutate(dist = mean_amr_distance(.)) %>%
#' arrange(mo, dist)
2022-08-30 21:48:02 +02:00
#' }
mean_amr_distance <- function ( x , ... ) {
UseMethod ( " mean_amr_distance" )
}
2022-12-30 13:02:37 +01:00
#' @noRd
2022-08-30 21:48:02 +02:00
#' @export
mean_amr_distance.default <- function ( x , ... ) {
x <- as.double ( x )
2023-01-05 14:43:18 +01:00
# calculate z-score
2022-08-30 21:48:02 +02:00
( x - mean ( x , na.rm = TRUE ) ) / stats :: sd ( x , na.rm = TRUE )
}
2022-12-30 13:02:37 +01:00
#' @noRd
2022-08-30 21:48:02 +02:00
#' @export
mean_amr_distance.mic <- function ( x , ... ) {
mean_amr_distance ( log2 ( x ) )
}
2022-12-30 13:02:37 +01:00
#' @noRd
2022-08-30 21:48:02 +02:00
#' @export
mean_amr_distance.disk <- function ( x , ... ) {
mean_amr_distance ( as.double ( x ) )
}
#' @rdname mean_amr_distance
#' @export
2023-01-21 23:47:20 +01:00
mean_amr_distance.sir <- function ( x , ... , combine_SI = TRUE ) {
2022-08-30 21:48:02 +02:00
meet_criteria ( combine_SI , allow_class = " logical" , has_length = 1 , .call_depth = -1 )
if ( isTRUE ( combine_SI ) ) {
2024-09-29 22:17:56 +02:00
x [x %in% c ( " I" , " SDD" ) ] <- " S"
2022-08-30 21:48:02 +02:00
}
mean_amr_distance ( as.double ( x ) )
}
#' @rdname mean_amr_distance
#' @export
mean_amr_distance.data.frame <- function ( x , ... , combine_SI = TRUE ) {
meet_criteria ( combine_SI , allow_class = " logical" , has_length = 1 , .call_depth = -1 )
df <- x
if ( is_null_or_grouped_tbl ( df ) ) {
df <- get_current_data ( " x" , -2 )
}
2023-01-05 14:43:18 +01:00
df <- as.data.frame ( df , stringsAsFactors = FALSE )
2022-08-30 21:48:02 +02:00
if ( tryCatch ( length ( list ( ... ) ) > 0 , error = function ( e ) TRUE ) ) {
out <- tryCatch ( suppressWarnings ( c ( ... ) ) , error = function ( e ) NULL )
if ( ! is.null ( out ) ) {
df <- df [ , out , drop = FALSE ]
} else {
2023-02-09 13:07:39 +01:00
df <- pm_select ( df , ... )
2022-08-30 21:48:02 +02:00
}
}
2023-01-05 14:43:18 +01:00
df_classes <- colnames ( df ) [vapply ( FUN.VALUE = logical ( 1 ) , df , function ( x ) is.disk ( x ) | is.mic ( x ) | is.disk ( x ) , USE.NAMES = FALSE ) ]
df_antibiotics <- unname ( get_column_abx ( df , info = FALSE ) )
df <- df [ , colnames ( df ) [colnames ( df ) %in% union ( df_classes , df_antibiotics ) ] , drop = FALSE ]
2023-01-23 15:01:21 +01:00
2022-08-30 21:48:02 +02:00
stop_if ( ncol ( df ) < 2 ,
" data set must contain at least two variables" ,
call = -2
)
if ( message_not_thrown_before ( " mean_amr_distance" , " groups" ) ) {
2023-01-05 14:43:18 +01:00
message_ ( " Calculating mean AMR distance based on columns " , vector_and ( colnames ( df ) , sort = FALSE ) )
2022-08-30 21:48:02 +02:00
}
2023-01-23 15:01:21 +01:00
2022-08-30 21:48:02 +02:00
res <- vapply (
FUN.VALUE = double ( nrow ( df ) ) ,
df ,
mean_amr_distance ,
combine_SI = combine_SI
)
if ( is.null ( dim ( res ) ) ) {
if ( all ( is.na ( res ) ) ) {
return ( NA_real_ )
} else {
return ( mean ( res , na.rm = TRUE ) )
}
}
res <- rowMeans ( res , na.rm = TRUE )
2023-01-05 14:43:18 +01:00
res [is.infinite ( res ) | is.nan ( res ) ] <- 0
2022-08-30 21:48:02 +02:00
res
}
#' @rdname mean_amr_distance
2022-10-21 16:02:14 +02:00
#' @param amr_distance the outcome of [mean_amr_distance()]
2022-08-30 21:48:02 +02:00
#' @param row an index, such as a row number
#' @export
2022-10-21 16:02:14 +02:00
amr_distance_from_row <- function ( amr_distance , row ) {
2024-04-05 16:44:43 +02:00
meet_criteria ( amr_distance , allow_class = " numeric" , is_finite = TRUE )
meet_criteria ( row , allow_class = c ( " logical" , " numeric" , " integer" ) )
2022-08-30 21:48:02 +02:00
if ( is.logical ( row ) ) {
row <- which ( row )
}
2022-10-21 16:02:14 +02:00
abs ( amr_distance [row ] - amr_distance )
2022-08-30 21:48:02 +02:00
}