new functions R, RI, SI, S

2026-03-04 19:59:48 +01:00 · 2018-07-29 22:14:51 +02:00
parent 8421e3f005
commit 826694323b
10 changed files with 256 additions and 183 deletions
--- a/R/bactid.R
+++ b/R/bactid.R
@@ -101,6 +101,18 @@ as.bactid <- function(x) {
  x <- paste0('^', x, '$')

  for (i in 1:length(x)) {
+
+    if (x.fullbackup[i] %in% AMR::microorganisms$bactid) {
+      # is already a valid bactid
+      x[i] <- x.fullbackup[i]
+      next
+    }
+    if (x.backup[i] %in% AMR::microorganisms$bactid) {
+      # is already a valid bactid
+      x[i] <- x.backup[i]
+      next
+    }
+
    if (tolower(x[i]) == '^e.*coli$') {
      # avoid detection of Entamoeba coli in case of E. coli
      x[i] <- 'Escherichia coli'
@@ -255,3 +267,11 @@ as.data.frame.bactid <- function (x, ...) {
    as.data.frame.vector(x, ...)
  }
 }
+
+#' @exportMethod pull.bactid
+#' @export
+#' @importFrom dplyr pull
+#' @noRd
+pull.bactid <- function(.data, ...) {
+  pull(as.data.frame(.data), ...)
+}
--- a/R/first_isolate.R
+++ b/R/first_isolate.R
@@ -53,7 +53,7 @@
 #' @export
 #' @importFrom dplyr arrange_at lag between row_number filter mutate arrange
 #' @return A vector to add to table, see Examples.
-#' @source Methodology of this function is based on: "M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition", 2014, Clinical and Laboratory Standards Institute. \url{https://clsi.org/standards/products/microbiology/documents/m39/}.
+#' @source Methodology of this function is based on: \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition}, 2014, \emph{Clinical and Laboratory Standards Institute (CLSI)}. \url{https://clsi.org/standards/products/microbiology/documents/m39/}.
 #' @examples
 #' # septic_patients is a dataset available in the AMR package
 #' ?septic_patients
--- a/R/resistance.R
+++ b/R/resistance.R
@@ -18,19 +18,22 @@

 #' Calculate resistance of isolates
 #'
-#' These functions can be used to calculate the (co-)resistance of microbial isolates (i.e. percentage S, SI, I, IR or R). All functions can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}.
-#' @param ab,ab1,ab2 vector of antibiotic interpretations, they will be transformed internally with \code{\link{as.rsi}}
+#' These functions can be used to calculate the (co-)resistance of microbial isolates (i.e. percentage S, SI, IR or R). All functions can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}. \cr\cr
+#' \code{R} and \code{IR} can be used to calculate resistance, \code{S} and \code{SI} can be used to calculate susceptibility.\cr
+#' \code{n_rsi} counts all cases where antimicrobial interpretations are available.
+#' @param ab1 vector of antibiotic interpretations, they will be transformed internally with \code{\link{as.rsi}}
+#' @param ab2 like \code{ab}, a vector of antibiotic interpretations. Use this to calculate (the lack of) co-resistance: the probability where one of two drugs have a susceptible result. See Examples.
 #' @param include_I logical to indicate whether antimicrobial interpretations of "I" should be included
-#' @param minimum minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA}.
+#' @param minimum minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA}. The default number of \code{30} isolates is advised by the CLSI as best practice, see Source.
 #' @param as_percent logical to indicate whether the output must be returned as percent (text), will else be a double
 #' @param interpretation antimicrobial interpretation
 #' @param info \emph{DEPRECATED} calculate the amount of available isolates and print it, like \code{n = 423}
 #' @param warning \emph{DEPRECATED} show a warning when the available amount of isolates is below \code{minimum}
 #' @details \strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set.
 #'
-#' The functions \code{resistance}, \code{susceptibility} and \code{n_rsi} calculate using hybrid evaluation (i.e. using C++), which makes these functions 25-30 times faster than the old \code{rsi} function. This function is still available for backwards compatibility but is deprecated.
+#' The functions \code{resistance} and \code{susceptibility} are wrappers around \code{IR} and \code{S}, respectively. All functions except \code{rsi} use hybrid evaluation (i.e. using C++), which makes these functions 20-30 times faster than the old \code{rsi} function. This latter function is still available for backwards compatibility but is deprecated.
 #' \if{html}{
-#'   \cr
+#'   \cr\cr
 #'   To calculate the probability (\emph{p}) of susceptibility of one antibiotic, we use this formula:
 #'   \out{<div style="text-align: center">}\figure{mono_therapy.png}\out{</div>}
 #'   To calculate the probability (\emph{p}) of susceptibility of more antibiotics (i.e. combination therapy), we need to check whether one of them has a susceptible result (as numerator) and count all cases where all antibiotics were tested (as denominator). \cr
@@ -41,88 +44,143 @@
 #'   Theoretically for three antibiotics:
 #'   \out{<div style="text-align: center">}\figure{combi_therapy_3.png}\out{</div>}
 #' }
-#' @keywords resistance susceptibility rsi_df antibiotics isolate isolates
+#' @source \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition}, 2014, \emph{Clinical and Laboratory Standards Institute (CLSI)}. \url{https://clsi.org/standards/products/microbiology/documents/m39/}.
+#' @keywords resistance susceptibility rsi_df rsi antibiotics isolate isolates
 #' @return Double or, when \code{as_percent = TRUE}, a character.
 #' @rdname resistance
 #' @export
 #' @examples
-#' library(dplyr)
-#'
-#' septic_patients %>%
-#'   group_by(hospital_id) %>%
-#'   summarise(p = susceptibility(cipr),
-#'             n = n_rsi(cipr)) # n_rsi works like n_distinct in dplyr
-#'
-#' septic_patients %>%
-#'   group_by(hospital_id) %>%
-#'   summarise(cipro_p = susceptibility(cipr, as_percent = TRUE),
-#'             cipro_n = n_rsi(cipr),
-#'             genta_p = susceptibility(gent, as_percent = TRUE),
-#'             genta_n = n_rsi(gent),
-#'             combination_p = susceptibility(cipr, gent, as_percent = TRUE),
-#'             combination_n = n_rsi(cipr, gent))
-#'
-#'
 #' # Calculate resistance
-#' resistance(septic_patients$amox)
-#' rsi(septic_patients$amox, interpretation = "IR") # deprecated
+#' R(septic_patients$amox)
+#' IR(septic_patients$amox)
 #'
 #' # Or susceptibility
-#' susceptibility(septic_patients$amox)
-#' rsi(septic_patients$amox, interpretation = "S") # deprecated
+#' S(septic_patients$amox)
+#' SI(septic_patients$amox)
 #'
+#' # Since n_rsi counts available isolates (and is used as denominator),
+#' # you can calculate back to e.g. count resistant isolates:
+#' IR(septic_patients$amox) * n_rsi(septic_patients$amox)
+#'
+#' library(dplyr)
+#' septic_patients %>%
+#'   group_by(hospital_id) %>%
+#'   summarise(p = S(cipr),
+#'             n = n_rsi(cipr)) # n_rsi works like n_distinct in dplyr
 #'
 #' # Calculate co-resistance between amoxicillin/clav acid and gentamicin,
 #' # so we can see that combination therapy does a lot more than mono therapy:
-#' susceptibility(septic_patients$amcl) # p = 67.8%
-#' n_rsi(septic_patients$amcl)          # n = 1641
+#' S(septic_patients$amcl)     # p = 67.3%
+#' n_rsi(septic_patients$amcl) # n = 1570
 #'
-#' susceptibility(septic_patients$gent) # p = 69.1%
-#' n_rsi(septic_patients$gent)          # n = 1863
+#' S(septic_patients$gent)     # p = 74.0%
+#' n_rsi(septic_patients$gent) # n = 1842
 #'
 #' with(septic_patients,
-#'      susceptibility(amcl, gent))     # p = 90.6%
+#'      S(amcl, gent))         # p = 92.1%
 #' with(septic_patients,
-#'      n_rsi(amcl, gent))              # n = 1580
+#'      n_rsi(amcl, gent))     # n = 1504
+#'
+#' septic_patients %>%
+#'   group_by(hospital_id) %>%
+#'   summarise(cipro_p = S(cipr, as_percent = TRUE),
+#'             cipro_n = n_rsi(cipr),
+#'             genta_p = S(gent, as_percent = TRUE),
+#'             genta_n = n_rsi(gent),
+#'             combination_p = S(cipr, gent, as_percent = TRUE),
+#'             combination_n = n_rsi(cipr, gent))
 #'
 #' \dontrun{
+#'
 #' # calculate current empiric combination therapy of Helicobacter gastritis:
 #' my_table %>%
 #'   filter(first_isolate == TRUE,
 #'          genus == "Helicobacter") %>%
-#'   summarise(p = susceptibility(amox, metr), # amoxicillin with metronidazole
+#'   summarise(p = S(amox, metr),     # amoxicillin with metronidazole
 #'             n = n_rsi(amox, metr))
-#'
-#'
-#' # How fast is this hybrid evaluation in C++ compared to R?
-#' # In other words: how is the speed improvement of the new `resistance` compared to old `rsi`?
-#'
-#' library(microbenchmark)
-#' df <- septic_patients %>% group_by(hospital_id, bactid) # 317 groups with sizes 1 to 167
-#'
-#' microbenchmark(old_IR = df %>% summarise(p = rsi(amox, minimum = 0, interpretation = "IR")),
-#'                new_IR = df %>% summarise(p = resistance(amox, minimum = 0)),
-#'                old_S = df %>% summarise(p = rsi(amox, minimum = 0, interpretation = "S")),
-#'                new_S = df %>% summarise(p = susceptibility(amox, minimum = 0)),
-#'                times = 5,
-#'                unit = "s")
-#'
-#' # Unit: seconds
-#' #    expr          min         lq       mean     median         uq        max neval
-#' #  old_IR   1.95600230 1.96096857 1.97981537 1.96823318 2.00645711 2.00741568     5
-#' #  new_IR   0.06872808 0.06984932 0.07162866 0.06987306 0.07050094 0.07919192     5
-#' #   old_S   1.68893579 1.69024888 1.72461867 1.69785934 1.70428796 1.84176137     5
-#' #   new_S   0.06737037 0.06838167 0.07431906 0.07745364 0.07827224 0.08011738     5
-#'
-#' # The old function took roughly 2 seconds, the new ones take 0.07 seconds.
 #' }
-resistance <- function(ab,
+#' @rdname resistance
+#' @name resistance
+#' @export
+#' @rdname resistance
+#' @export
+S <- function(ab1,
+              ab2 = NULL,
+              minimum = 30,
+              as_percent = FALSE) {
+  susceptibility(ab1 = ab1,
+                 ab2 = ab2,
+                 include_I = FALSE,
+                 minimum = minimum,
+                 as_percent = as_percent)
+}
+
+#' @rdname resistance
+#' @export
+SI <- function(ab1,
+               ab2 = NULL,
+               minimum = 30,
+               as_percent = FALSE) {
+  susceptibility(ab1 = ab1,
+                 ab2 = ab2,
+                 include_I = TRUE,
+                 minimum = minimum,
+                 as_percent = as_percent)
+}
+
+#' @rdname resistance
+#' @export
+IR <- function(ab1,
+               minimum = 30,
+               as_percent = FALSE) {
+  resistance(ab1 = ab1,
+             include_I = TRUE,
+             minimum = minimum,
+             as_percent = as_percent)
+}
+
+#' @rdname resistance
+#' @export
+R <- function(ab1,
+              minimum = 30,
+              as_percent = FALSE) {
+  resistance(ab1 = ab1,
+             include_I = FALSE,
+             minimum = minimum,
+             as_percent = as_percent)
+}
+
+#' @rdname resistance
+#' @export
+n_rsi <- function(ab1, ab2 = NULL) {
+  if (NCOL(ab1) > 1) {
+    stop('`ab` must be a vector of antimicrobial interpretations', call. = FALSE)
+  }
+  if (!is.rsi(ab1)) {
+    ab1 <- as.rsi(ab1)
+  }
+  if (!is.null(ab2)) {
+    if (NCOL(ab2) > 1) {
+      stop('`ab2` must be a vector of antimicrobial interpretations', call. = FALSE)
+    }
+    if (!is.rsi(ab2)) {
+      ab2 <- as.rsi(ab2)
+    }
+    sum(!is.na(ab1) & !is.na(ab2))
+  } else {
+    sum(!is.na(ab1))
+  }
+}
+
+#' @rdname resistance
+#' @export
+resistance <- function(ab1,
                       include_I = TRUE,
                       minimum = 30,
                       as_percent = FALSE) {

-  if (NCOL(ab) > 1) {
-    stop('`ab` must be a vector of antimicrobial interpretations', call. = FALSE)
+  if (NCOL(ab1) > 1) {
+    stop('`ab1` must be a vector of antimicrobial interpretations', call. = FALSE)
  }
  if (!is.logical(include_I)) {
    stop('`include_I` must be logical', call. = FALSE)
@@ -134,14 +192,17 @@ resistance <- function(ab,
    stop('`as_percent` must be logical', call. = FALSE)
  }

-  if (!is.rsi(ab)) {
-    x <- as.rsi(ab)
+  # ab_name <- deparse(substitute(ab))
+
+  if (!is.rsi(ab1)) {
+    x <- as.rsi(ab1)
    warning("Increase speed by transforming to class `rsi` on beforehand: df %>% mutate_at(vars(col10:col20), as.rsi)")
  } else {
-    x <- ab
+    x <- ab1
  }
  total <- length(x) - sum(is.na(x)) # faster than C++
  if (total < minimum) {
+    # warning("Too few isolates available for ", ab_name, ": ", total, " < ", minimum, "; returning NA.", call. = FALSE)
    return(NA)
  }
  found <- .Call(`_AMR_rsi_calc_R`, x, include_I)
@@ -180,6 +241,7 @@ susceptibility <- function(ab1,
    print_warning <- TRUE
  }
  if (!is.null(ab2)) {
+    # ab_name <- paste(deparse(substitute(ab1)), "and", deparse(substitute(ab2)))
    if (NCOL(ab2) > 1) {
      stop('`ab2` must be a vector of antimicrobial interpretations', call. = FALSE)
    }
@@ -193,9 +255,11 @@ susceptibility <- function(ab1,
               FUN = min)
  } else {
    x <- ab1
+    # ab_name <- deparse(substitute(ab1))
  }
  total <- length(x) - sum(is.na(x))
  if (total < minimum) {
+    # warning("Too few isolates available for ", ab_name, ": ", total, " < ", minimum, "; returning NA.", call. = FALSE)
    return(NA)
  }
  found <- .Call(`_AMR_rsi_calc_S`, x, include_I)
@@ -211,28 +275,6 @@ susceptibility <- function(ab1,
  }
 }

-#' @rdname resistance
-#' @export
-n_rsi <- function(ab1, ab2 = NULL) {
-  if (NCOL(ab1) > 1) {
-    stop('`ab1` must be a vector of antimicrobial interpretations', call. = FALSE)
-  }
-  if (!is.rsi(ab1)) {
-    ab1 <- as.rsi(ab1)
-  }
-  if (!is.null(ab2)) {
-    if (NCOL(ab2) > 1) {
-      stop('`ab2` must be a vector of antimicrobial interpretations', call. = FALSE)
-    }
-    if (!is.rsi(ab2)) {
-      ab2 <- as.rsi(ab2)
-    }
-    sum(!is.na(ab1) & !is.na(ab2))
-  } else {
-    sum(!is.na(ab1))
-  }
-}
-
 #' @rdname resistance
 #' @export
 rsi <- function(ab1,
@@ -242,6 +284,9 @@ rsi <- function(ab1,
                as_percent = FALSE,
                info = FALSE,
                warning = TRUE) {
+
+  .Deprecated()
+
  ab1.name <- deparse(substitute(ab1))
  if (ab1.name %like% '.[$].') {
    ab1.name <- unlist(strsplit(ab1.name, "$", fixed = TRUE))