1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-09 17:02:03 +02:00

new functions R, RI, SI, S

This commit is contained in:
2018-07-29 22:14:51 +02:00
parent 8421e3f005
commit 826694323b
10 changed files with 256 additions and 183 deletions

View File

@ -101,6 +101,18 @@ as.bactid <- function(x) {
x <- paste0('^', x, '$')
for (i in 1:length(x)) {
if (x.fullbackup[i] %in% AMR::microorganisms$bactid) {
# is already a valid bactid
x[i] <- x.fullbackup[i]
next
}
if (x.backup[i] %in% AMR::microorganisms$bactid) {
# is already a valid bactid
x[i] <- x.backup[i]
next
}
if (tolower(x[i]) == '^e.*coli$') {
# avoid detection of Entamoeba coli in case of E. coli
x[i] <- 'Escherichia coli'
@ -255,3 +267,11 @@ as.data.frame.bactid <- function (x, ...) {
as.data.frame.vector(x, ...)
}
}
#' @exportMethod pull.bactid
#' @export
#' @importFrom dplyr pull
#' @noRd
pull.bactid <- function(.data, ...) {
pull(as.data.frame(.data), ...)
}

View File

@ -53,7 +53,7 @@
#' @export
#' @importFrom dplyr arrange_at lag between row_number filter mutate arrange
#' @return A vector to add to table, see Examples.
#' @source Methodology of this function is based on: "M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition", 2014, Clinical and Laboratory Standards Institute. \url{https://clsi.org/standards/products/microbiology/documents/m39/}.
#' @source Methodology of this function is based on: \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition}, 2014, \emph{Clinical and Laboratory Standards Institute (CLSI)}. \url{https://clsi.org/standards/products/microbiology/documents/m39/}.
#' @examples
#' # septic_patients is a dataset available in the AMR package
#' ?septic_patients

View File

@ -18,19 +18,22 @@
#' Calculate resistance of isolates
#'
#' These functions can be used to calculate the (co-)resistance of microbial isolates (i.e. percentage S, SI, I, IR or R). All functions can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}.
#' @param ab,ab1,ab2 vector of antibiotic interpretations, they will be transformed internally with \code{\link{as.rsi}}
#' These functions can be used to calculate the (co-)resistance of microbial isolates (i.e. percentage S, SI, IR or R). All functions can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}. \cr\cr
#' \code{R} and \code{IR} can be used to calculate resistance, \code{S} and \code{SI} can be used to calculate susceptibility.\cr
#' \code{n_rsi} counts all cases where antimicrobial interpretations are available.
#' @param ab1 vector of antibiotic interpretations, they will be transformed internally with \code{\link{as.rsi}}
#' @param ab2 like \code{ab}, a vector of antibiotic interpretations. Use this to calculate (the lack of) co-resistance: the probability where one of two drugs have a susceptible result. See Examples.
#' @param include_I logical to indicate whether antimicrobial interpretations of "I" should be included
#' @param minimum minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA}.
#' @param minimum minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA}. The default number of \code{30} isolates is advised by the CLSI as best practice, see Source.
#' @param as_percent logical to indicate whether the output must be returned as percent (text), will else be a double
#' @param interpretation antimicrobial interpretation
#' @param info \emph{DEPRECATED} calculate the amount of available isolates and print it, like \code{n = 423}
#' @param warning \emph{DEPRECATED} show a warning when the available amount of isolates is below \code{minimum}
#' @details \strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set.
#'
#' The functions \code{resistance}, \code{susceptibility} and \code{n_rsi} calculate using hybrid evaluation (i.e. using C++), which makes these functions 25-30 times faster than the old \code{rsi} function. This function is still available for backwards compatibility but is deprecated.
#' The functions \code{resistance} and \code{susceptibility} are wrappers around \code{IR} and \code{S}, respectively. All functions except \code{rsi} use hybrid evaluation (i.e. using C++), which makes these functions 20-30 times faster than the old \code{rsi} function. This latter function is still available for backwards compatibility but is deprecated.
#' \if{html}{
#' \cr
#' \cr\cr
#' To calculate the probability (\emph{p}) of susceptibility of one antibiotic, we use this formula:
#' \out{<div style="text-align: center">}\figure{mono_therapy.png}\out{</div>}
#' To calculate the probability (\emph{p}) of susceptibility of more antibiotics (i.e. combination therapy), we need to check whether one of them has a susceptible result (as numerator) and count all cases where all antibiotics were tested (as denominator). \cr
@ -41,88 +44,143 @@
#' Theoretically for three antibiotics:
#' \out{<div style="text-align: center">}\figure{combi_therapy_3.png}\out{</div>}
#' }
#' @keywords resistance susceptibility rsi_df antibiotics isolate isolates
#' @source \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition}, 2014, \emph{Clinical and Laboratory Standards Institute (CLSI)}. \url{https://clsi.org/standards/products/microbiology/documents/m39/}.
#' @keywords resistance susceptibility rsi_df rsi antibiotics isolate isolates
#' @return Double or, when \code{as_percent = TRUE}, a character.
#' @rdname resistance
#' @export
#' @examples
#' library(dplyr)
#'
#' septic_patients %>%
#' group_by(hospital_id) %>%
#' summarise(p = susceptibility(cipr),
#' n = n_rsi(cipr)) # n_rsi works like n_distinct in dplyr
#'
#' septic_patients %>%
#' group_by(hospital_id) %>%
#' summarise(cipro_p = susceptibility(cipr, as_percent = TRUE),
#' cipro_n = n_rsi(cipr),
#' genta_p = susceptibility(gent, as_percent = TRUE),
#' genta_n = n_rsi(gent),
#' combination_p = susceptibility(cipr, gent, as_percent = TRUE),
#' combination_n = n_rsi(cipr, gent))
#'
#'
#' # Calculate resistance
#' resistance(septic_patients$amox)
#' rsi(septic_patients$amox, interpretation = "IR") # deprecated
#' R(septic_patients$amox)
#' IR(septic_patients$amox)
#'
#' # Or susceptibility
#' susceptibility(septic_patients$amox)
#' rsi(septic_patients$amox, interpretation = "S") # deprecated
#' S(septic_patients$amox)
#' SI(septic_patients$amox)
#'
#' # Since n_rsi counts available isolates (and is used as denominator),
#' # you can calculate back to e.g. count resistant isolates:
#' IR(septic_patients$amox) * n_rsi(septic_patients$amox)
#'
#' library(dplyr)
#' septic_patients %>%
#' group_by(hospital_id) %>%
#' summarise(p = S(cipr),
#' n = n_rsi(cipr)) # n_rsi works like n_distinct in dplyr
#'
#' # Calculate co-resistance between amoxicillin/clav acid and gentamicin,
#' # so we can see that combination therapy does a lot more than mono therapy:
#' susceptibility(septic_patients$amcl) # p = 67.8%
#' n_rsi(septic_patients$amcl) # n = 1641
#' S(septic_patients$amcl) # p = 67.3%
#' n_rsi(septic_patients$amcl) # n = 1570
#'
#' susceptibility(septic_patients$gent) # p = 69.1%
#' n_rsi(septic_patients$gent) # n = 1863
#' S(septic_patients$gent) # p = 74.0%
#' n_rsi(septic_patients$gent) # n = 1842
#'
#' with(septic_patients,
#' susceptibility(amcl, gent)) # p = 90.6%
#' S(amcl, gent)) # p = 92.1%
#' with(septic_patients,
#' n_rsi(amcl, gent)) # n = 1580
#' n_rsi(amcl, gent)) # n = 1504
#'
#' septic_patients %>%
#' group_by(hospital_id) %>%
#' summarise(cipro_p = S(cipr, as_percent = TRUE),
#' cipro_n = n_rsi(cipr),
#' genta_p = S(gent, as_percent = TRUE),
#' genta_n = n_rsi(gent),
#' combination_p = S(cipr, gent, as_percent = TRUE),
#' combination_n = n_rsi(cipr, gent))
#'
#' \dontrun{
#'
#' # calculate current empiric combination therapy of Helicobacter gastritis:
#' my_table %>%
#' filter(first_isolate == TRUE,
#' genus == "Helicobacter") %>%
#' summarise(p = susceptibility(amox, metr), # amoxicillin with metronidazole
#' summarise(p = S(amox, metr), # amoxicillin with metronidazole
#' n = n_rsi(amox, metr))
#'
#'
#' # How fast is this hybrid evaluation in C++ compared to R?
#' # In other words: how is the speed improvement of the new `resistance` compared to old `rsi`?
#'
#' library(microbenchmark)
#' df <- septic_patients %>% group_by(hospital_id, bactid) # 317 groups with sizes 1 to 167
#'
#' microbenchmark(old_IR = df %>% summarise(p = rsi(amox, minimum = 0, interpretation = "IR")),
#' new_IR = df %>% summarise(p = resistance(amox, minimum = 0)),
#' old_S = df %>% summarise(p = rsi(amox, minimum = 0, interpretation = "S")),
#' new_S = df %>% summarise(p = susceptibility(amox, minimum = 0)),
#' times = 5,
#' unit = "s")
#'
#' # Unit: seconds
#' # expr min lq mean median uq max neval
#' # old_IR 1.95600230 1.96096857 1.97981537 1.96823318 2.00645711 2.00741568 5
#' # new_IR 0.06872808 0.06984932 0.07162866 0.06987306 0.07050094 0.07919192 5
#' # old_S 1.68893579 1.69024888 1.72461867 1.69785934 1.70428796 1.84176137 5
#' # new_S 0.06737037 0.06838167 0.07431906 0.07745364 0.07827224 0.08011738 5
#'
#' # The old function took roughly 2 seconds, the new ones take 0.07 seconds.
#' }
resistance <- function(ab,
#' @rdname resistance
#' @name resistance
#' @export
#' @rdname resistance
#' @export
S <- function(ab1,
ab2 = NULL,
minimum = 30,
as_percent = FALSE) {
susceptibility(ab1 = ab1,
ab2 = ab2,
include_I = FALSE,
minimum = minimum,
as_percent = as_percent)
}
#' @rdname resistance
#' @export
SI <- function(ab1,
ab2 = NULL,
minimum = 30,
as_percent = FALSE) {
susceptibility(ab1 = ab1,
ab2 = ab2,
include_I = TRUE,
minimum = minimum,
as_percent = as_percent)
}
#' @rdname resistance
#' @export
IR <- function(ab1,
minimum = 30,
as_percent = FALSE) {
resistance(ab1 = ab1,
include_I = TRUE,
minimum = minimum,
as_percent = as_percent)
}
#' @rdname resistance
#' @export
R <- function(ab1,
minimum = 30,
as_percent = FALSE) {
resistance(ab1 = ab1,
include_I = FALSE,
minimum = minimum,
as_percent = as_percent)
}
#' @rdname resistance
#' @export
n_rsi <- function(ab1, ab2 = NULL) {
if (NCOL(ab1) > 1) {
stop('`ab` must be a vector of antimicrobial interpretations', call. = FALSE)
}
if (!is.rsi(ab1)) {
ab1 <- as.rsi(ab1)
}
if (!is.null(ab2)) {
if (NCOL(ab2) > 1) {
stop('`ab2` must be a vector of antimicrobial interpretations', call. = FALSE)
}
if (!is.rsi(ab2)) {
ab2 <- as.rsi(ab2)
}
sum(!is.na(ab1) & !is.na(ab2))
} else {
sum(!is.na(ab1))
}
}
#' @rdname resistance
#' @export
resistance <- function(ab1,
include_I = TRUE,
minimum = 30,
as_percent = FALSE) {
if (NCOL(ab) > 1) {
stop('`ab` must be a vector of antimicrobial interpretations', call. = FALSE)
if (NCOL(ab1) > 1) {
stop('`ab1` must be a vector of antimicrobial interpretations', call. = FALSE)
}
if (!is.logical(include_I)) {
stop('`include_I` must be logical', call. = FALSE)
@ -134,14 +192,17 @@ resistance <- function(ab,
stop('`as_percent` must be logical', call. = FALSE)
}
if (!is.rsi(ab)) {
x <- as.rsi(ab)
# ab_name <- deparse(substitute(ab))
if (!is.rsi(ab1)) {
x <- as.rsi(ab1)
warning("Increase speed by transforming to class `rsi` on beforehand: df %>% mutate_at(vars(col10:col20), as.rsi)")
} else {
x <- ab
x <- ab1
}
total <- length(x) - sum(is.na(x)) # faster than C++
if (total < minimum) {
# warning("Too few isolates available for ", ab_name, ": ", total, " < ", minimum, "; returning NA.", call. = FALSE)
return(NA)
}
found <- .Call(`_AMR_rsi_calc_R`, x, include_I)
@ -180,6 +241,7 @@ susceptibility <- function(ab1,
print_warning <- TRUE
}
if (!is.null(ab2)) {
# ab_name <- paste(deparse(substitute(ab1)), "and", deparse(substitute(ab2)))
if (NCOL(ab2) > 1) {
stop('`ab2` must be a vector of antimicrobial interpretations', call. = FALSE)
}
@ -193,9 +255,11 @@ susceptibility <- function(ab1,
FUN = min)
} else {
x <- ab1
# ab_name <- deparse(substitute(ab1))
}
total <- length(x) - sum(is.na(x))
if (total < minimum) {
# warning("Too few isolates available for ", ab_name, ": ", total, " < ", minimum, "; returning NA.", call. = FALSE)
return(NA)
}
found <- .Call(`_AMR_rsi_calc_S`, x, include_I)
@ -211,28 +275,6 @@ susceptibility <- function(ab1,
}
}
#' @rdname resistance
#' @export
n_rsi <- function(ab1, ab2 = NULL) {
if (NCOL(ab1) > 1) {
stop('`ab1` must be a vector of antimicrobial interpretations', call. = FALSE)
}
if (!is.rsi(ab1)) {
ab1 <- as.rsi(ab1)
}
if (!is.null(ab2)) {
if (NCOL(ab2) > 1) {
stop('`ab2` must be a vector of antimicrobial interpretations', call. = FALSE)
}
if (!is.rsi(ab2)) {
ab2 <- as.rsi(ab2)
}
sum(!is.na(ab1) & !is.na(ab2))
} else {
sum(!is.na(ab1))
}
}
#' @rdname resistance
#' @export
rsi <- function(ab1,
@ -242,6 +284,9 @@ rsi <- function(ab1,
as_percent = FALSE,
info = FALSE,
warning = TRUE) {
.Deprecated()
ab1.name <- deparse(substitute(ab1))
if (ab1.name %like% '.[$].') {
ab1.name <- unlist(strsplit(ab1.name, "$", fixed = TRUE))