Welcome C++!

2025-07-06 18:38:43 +02:00 · 2018-07-13 17:23:46 +02:00 · 2018-07-13 17:23:46 +02:00 · b6655f6454
commit b6655f6454
parent a28289562a
19 changed files with 469 additions and 435 deletions
--- a/.gitignore
+++ b/.gitignore
@ -5,3 +5,10 @@
 AMR.Rproj
 tests/testthat/Rplots.pdf
 inst/doc
+/src/*.o
+/src/*.o-*
+/src/*.d
+/src/*.so
+*.dll
+vignettes/*.R
+.DS_Store
--- a/6
+++ b/6
@ -1,6 +1,6 @@
 Package: AMR
-Version: 0.2.0.9010
-Date: 2018-07-11
+Version: 0.2.0.9011
+Date: 2018-07-13
 Title: Antimicrobial Resistance Analysis
 Authors@R: c(
    person(
@ -35,6 +35,7 @@ Imports:
    reshape2 (>= 1.4.0),
    xml2 (>= 1.0.0),
    knitr (>= 1.0.0),
+    Rcpp (>= 0.12.17),
    readr,
    rvest (>= 0.3.2),
    tibble
@ -44,6 +45,7 @@ Suggests:
    rmarkdown,
    rstudioapi,
    tidyr
+LinkingTo: Rcpp
 VignetteBuilder: knitr
 URL: https://github.com/msberends/AMR
 BugReports: https://github.com/msberends/AMR/issues
--- a/10
+++ b/10
@ -57,12 +57,14 @@ export(mo_property)
 export(n_rsi)
 export(p.symbol)
 export(ratio)
+export(resistance)
+export(resistance_predict)
 export(right_join_microorganisms)
 export(rsi)
-export(rsi_df)
 export(rsi_predict)
 export(semi_join_microorganisms)
 export(skewness)
+export(susceptibility)
 export(top_freq)
 exportMethods(as.data.frame.frequency_tbl)
 exportMethods(as.double.mic)
@ -89,20 +91,18 @@ exportMethods(skewness.default)
 exportMethods(skewness.matrix)
 exportMethods(summary.mic)
 exportMethods(summary.rsi)
+importFrom(Rcpp,evalCpp)
 importFrom(broom,tidy)
 importFrom(clipr,read_clip_tbl)
 importFrom(clipr,write_clip)
 importFrom(curl,nslookup)
 importFrom(dplyr,"%>%")
-importFrom(dplyr,all_vars)
-importFrom(dplyr,any_vars)
 importFrom(dplyr,arrange)
 importFrom(dplyr,arrange_at)
 importFrom(dplyr,as_tibble)
 importFrom(dplyr,between)
 importFrom(dplyr,desc)
 importFrom(dplyr,filter)
-importFrom(dplyr,filter_at)
 importFrom(dplyr,group_by)
 importFrom(dplyr,group_by_at)
 importFrom(dplyr,if_else)
@ -118,7 +118,6 @@ importFrom(dplyr,slice)
 importFrom(dplyr,summarise)
 importFrom(dplyr,tibble)
 importFrom(dplyr,top_n)
-importFrom(dplyr,vars)
 importFrom(grDevices,boxplot.stats)
 importFrom(graphics,axis)
 importFrom(graphics,barplot)
@ -148,3 +147,4 @@ importFrom(utils,packageDescription)
 importFrom(utils,read.delim)
 importFrom(utils,write.table)
 importFrom(xml2,read_html)
+useDynLib(AMR, .registration = TRUE)
--- a/NEWS.md
+++ b/NEWS.md
@ -1,14 +1,15 @@
 # 0.2.0.90xx (development version)
 #### New
+* **BREAKING**: `rsi_df` was removed in favour of new functions `resistance` and `susceptibility`. Now, all functions used to calculate resistance (`resistance` and `susceptibility`) or count isolates (`n_rsi`) use **hybrid evaluation**. This means calculations are not done in R directly but rather in C++ using the `Rcpp` package, making them 60 to 65 times faster. The function `rsi` still works, but is deprecated.
 * Support for Addins menu in RStudio to quickly insert `%in%` or `%like%` (and give them keyboard shortcuts), or to view the datasets that come with this package
-* For convience, descriptive statistical functions `kurtosis` and `skewness` that are lacking in base R - they are generic functions and have support for vectors, data.frames and matrices
-* Function `g.test` to perform the Χ<sup>2</sup> distributed [*G*-test](https://en.wikipedia.org/wiki/G-test), which use is the same as `chisq.test`
-* Function `ratio` to transform a vector of values to a preset ratio. For example:
+* For convience, new descriptive statistical functions `kurtosis` and `skewness` that are lacking in base R - they are generic functions and have support for vectors, data.frames and matrices
+* Function `g.test` as added to perform the Χ<sup>2</sup> distributed [*G*-test](https://en.wikipedia.org/wiki/G-test), which use is the same as `chisq.test`
+* Function `ratio` was added to transform a vector of values to a preset ratio. For example:
 ```r
 ratio(c(772, 1611, 737), ratio = "1:2:1")
 # [1]  780 1560  780
 ```
-* Function `p.symbol` to transform p value to their related symbol: `0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1`
+* Function `p.symbol` was added to transform p values to their related symbols: `0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1`
 * New for frequency tables (function `freq`):
  * A vignette to explain its usage
  * Support for `table` to use as input: `freq(table(x, y))`
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@ -0,0 +1,15 @@
+# Generated by using Rcpp::compileAttributes() -> do not edit by hand
+# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
+
+rsi_calc_S <- function(x, include_I) {
+    .Call(`_AMR_rsi_calc_S`, x, include_I)
+}
+
+rsi_calc_R <- function(x, include_I) {
+    .Call(`_AMR_rsi_calc_R`, x, include_I)
+}
+
+rsi_calc_total <- function(x) {
+    .Call(`_AMR_rsi_calc_total`, x)
+}
+
--- a/R/classes.R
+++ b/R/classes.R
@ -22,6 +22,7 @@
 #' @rdname as.rsi
 #' @param x vector
 #' @return Ordered factor with new class \code{rsi} and new attributes \code{package} and \code{package.version}
+#' @keywords rsi
 #' @export
 #' @importFrom dplyr %>%
 #' @importFrom utils packageDescription
@ -200,6 +201,7 @@ barplot.rsi <- function(height, ...) {
 #' @param x vector
 #' @param na.rm a logical indicating whether missing values should be removed
 #' @return Ordered factor with new class \code{mic} and new attributes \code{package} and \code{package.version}
+#' @keywords mic
 #' @export
 #' @importFrom dplyr %>%
 #' @importFrom utils packageDescription
--- a/R/g.test.R
+++ b/R/g.test.R
@ -24,7 +24,7 @@
 #'
 #'   If \code{x} is a matrix with at least two rows and columns, it is taken as a two-dimensional contingency table: the entries of \code{x} must be non-negative integers.  Otherwise, \code{x} and \code{y} must be vectors or factors of the same length; cases with missing values are removed, the objects are coerced to factors, and the contingency table is computed from these.  Then Pearson's chi-squared test is performed of the null hypothesis that the joint distribution of the cell counts in a 2-dimensional contingency table is the product of the row and column marginals.
 #'
-#'   If \code{simulate.p.value} is \code{FALSE}, the p-value is computed from the asymptotic chi-squared distribution of the test statistic. Otherwise the p-value is computed for a Monte Carlo test (Hope, 1968) with \code{B} replicates.
+#'   The p-value is computed from the asymptotic chi-squared distribution of the test statistic.
 #'
 #'   In the contingency table case simulation is done by random sampling from the set of all contingency tables with given marginals, and works only if the marginals are strictly positive. Note that this is not the usual sampling situation assumed for a chi-squared test (like the \emph{G}-test) but rather that for Fisher's exact test.
 #'
@ -61,7 +61,12 @@
 #' @keywords chi
 #' @seealso \code{\link{chisq.test}}
 #' @references [1] McDonald, J.H. 2014. \strong{Handbook of Biological Statistics (3rd ed.)}. Sparky House Publishing, Baltimore, Maryland. \url{http://www.biostathandbook.com/gtestgof.html}.
-#' @source This code is almost perfectly equal to \code{\link{chisq.test}}; only the calculation of the statistic was changed to \code{2 * sum(x * log(x / E))} and Yates' continuity correction was deleted.
+#' @source This code is almost identical to \code{\link{chisq.test}}, except that:
+#' \itemize{
+#'   \item{The calculation of the statistic was changed to \code{2 * sum(x * log(x / E))}}
+#'   \item{Yates' continuity correction was removed as it does not apply to a \emph{G}-test}
+#'   \item{The possibility to simulate p values with \code{simulate.p.value} was removed}
+#' }
 #' @export
 #' @importFrom stats pchisq complete.cases
 #' @examples
@ -105,9 +110,7 @@ g.test <- function(x,
                    y = NULL,
                    # correct = TRUE,
                    p = rep(1/length(x), length(x)),
-                    rescale.p = FALSE,
-                    simulate.p.value = FALSE,
-                    B = 2000) {
+                    rescale.p = FALSE) {
  DNAME <- deparse(substitute(x))
  if (is.data.frame(x))
    x <- as.matrix(x)
@ -141,11 +144,11 @@ g.test <- function(x,
    stop("all entries of 'x' must be nonnegative and finite")
  if ((n <- sum(x)) == 0)
    stop("at least one entry of 'x' must be positive")
-  if (simulate.p.value) {
-    setMETH <- function() METHOD <<- paste(METHOD, "with simulated p-value\n\t (based on",
-                                           B, "replicates)")
-    almost.1 <- 1 - 64 * .Machine$double.eps
-  }
+  # if (simulate.p.value) {
+  #   setMETH <- function() METHOD <<- paste(METHOD, "with simulated p-value\n\t (based on",
+  #                                          B, "replicates)")
+  #   almost.1 <- 1 - 64 * .Machine$double.eps
+  # }
  if (is.matrix(x)) {
    METHOD <- "G-test of independence"
    nr <- as.integer(nrow(x))
@ -158,17 +161,17 @@ g.test <- function(x,
    v <- function(r, c, n) c * r * (n - r) * (n - c)/n^3
    V <- outer(sr, sc, v, n)
    dimnames(E) <- dimnames(x)
-    if (simulate.p.value && all(sr > 0) && all(sc > 0)) {
-      setMETH()
-      tmp <- .Call(C_chisq_sim, sr, sc, B, E)
-      STATISTIC <- 2 * sum(x * log(x / E)) # sum(sort((x - E)^2/E, decreasing = TRUE)) for chisq.test
-      PARAMETER <- NA
-      PVAL <- (1 + sum(tmp >= almost.1 * STATISTIC))/(B +
-                                                        1)
-    }
-    else {
-      if (simulate.p.value)
-        warning("cannot compute simulated p-value with zero marginals")
+    # if (simulate.p.value && all(sr > 0) && all(sc > 0)) {
+    #   setMETH()
+    #   tmp <- .Call(chisq_sim, sr, sc, B, E, PACKAGE = "stats")
+    #   STATISTIC <- 2 * sum(x * log(x / E)) # sum(sort((x - E)^2/E, decreasing = TRUE)) for chisq.test
+    #   PARAMETER <- NA
+    #   PVAL <- (1 + sum(tmp >= almost.1 * STATISTIC))/(B +
+    #                                                     1)
+    # }
+    # else {
+      # if (simulate.p.value)
+      #   warning("cannot compute simulated p-value with zero marginals")
      # if (correct && nrow(x) == 2L && ncol(x) == 2L) {
      #   YATES <- min(0.5, abs(x - E))
      #   if (YATES > 0)
@ -178,7 +181,7 @@ g.test <- function(x,
      STATISTIC <- 2 * sum(x * log(x / E)) # sum((abs(x - E) - YATES)^2/E) for chisq.test
      PARAMETER <- (nr - 1L) * (nc - 1L)
      PVAL <- pchisq(STATISTIC, PARAMETER, lower.tail = FALSE)
-    }
+   # }
  }
  else {
    if (length(dim(x)) > 2L)
@ -199,22 +202,22 @@ g.test <- function(x,
    V <- n * p * (1 - p)
    STATISTIC <- 2 * sum(x * log(x / E)) # sum((x - E)^2/E) for chisq.test
    names(E) <- names(x)
-    if (simulate.p.value) {
-      setMETH()
-      nx <- length(x)
-      sm <- matrix(sample.int(nx, B * n, TRUE, prob = p),
-                   nrow = n)
-      ss <- apply(sm, 2L, function(x, E, k) {
-        sum((table(factor(x, levels = 1L:k)) - E)^2/E)
-      }, E = E, k = nx)
-      PARAMETER <- NA
-      PVAL <- (1 + sum(ss >= almost.1 * STATISTIC))/(B +
-                                                       1)
-    }
-    else {
+    # if (simulate.p.value) {
+    #   setMETH()
+    #   nx <- length(x)
+    #   sm <- matrix(sample.int(nx, B * n, TRUE, prob = p),
+    #                nrow = n)
+    #   ss <- apply(sm, 2L, function(x, E, k) {
+    #     sum((table(factor(x, levels = 1L:k)) - E)^2/E)
+    #   }, E = E, k = nx)
+    #   PARAMETER <- NA
+    #   PVAL <- (1 + sum(ss >= almost.1 * STATISTIC))/(B +
+    #                                                    1)
+    # }
+    # else {
      PARAMETER <- length(x) - 1
      PVAL <- pchisq(STATISTIC, PARAMETER, lower.tail = FALSE)
-    }
+    # }
  }
  names(STATISTIC) <- "X-squared"
  names(PARAMETER) <- "df"
--- a/R/rsi_analysis.R
+++ b/R/rsi_analysis.R
@ -16,287 +16,198 @@
 # GNU General Public License for more details.                         #
 # ==================================================================== #

-#' Resistance of isolates
+#' Calculate resistance of isolates
 #'
-#' This functions can be used to calculate the (co-)resistance of isolates (i.e. percentage S, SI, I, IR or R [of a vector] of isolates). The functions \code{rsi} and \code{n_rsi} can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}.
-#' @param tbl \code{data.frame} containing columns with antibiotic interpretations.
-#' @param ab character vector with 1, 2 or 3 antibiotics that occur as column names in \code{tbl}, like \code{ab = c("amox", "amcl")}
-#' @param ab1,ab2 vector of antibiotic interpretations, they will be transformed internally with \code{\link{as.rsi}}
-#' @param interpretation antimicrobial interpretation of which the portion must be calculated. Valid values are \code{"S"}, \code{"SI"}, \code{"I"}, \code{"IR"} or \code{"R"}.
-#' @param minimum minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning (when \code{warning = TRUE}).
-#' @param as_percent return output as percent (text), will else (at default) be a double
-#' @param info calculate the amount of available isolates and print it, like \code{n = 423}
-#' @param warning show a warning when the available amount of isolates is below \code{minimum}
-#' @details Remember that you should filter your table to let it contain \strong{only first isolates}!
+#' These functions can be used to calculate the (co-)resistance of microbial isolates (i.e. percentage S, SI, I, IR or R). All functions can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}.
+#' @param ab,ab1,ab2 vector of antibiotic interpretations, they will be transformed internally with \code{\link{as.rsi}}
+#' @param include_I logical to indicate whether antimicrobial interpretations of "I" should be included
+#' @param minimum minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA}.
+#' @param as_percent logical to indicate whether the output must be returned as percent (text), will else be a double
+#' @param interpretation antimicrobial interpretation
+#' @details \strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set.
+#'
+#' All return values are calculated using hybrid evaluation (i.e. using C++), which makes these functions 60-65 times faster than in \code{AMR} v0.2.0 and below. The \code{rsi} function is available for backwards compatibility and deprecated. It now uses the \code{resistance} and \code{susceptibility} functions internally, based on the \code{interpretation} parameter.
 #' \if{html}{
-#'   \cr \cr
+#'   \cr
 #'   To calculate the probability (\emph{p}) of susceptibility of one antibiotic, we use this formula:
 #'   \out{<div style="text-align: center">}\figure{mono_therapy.png}\out{</div>}
 #'   To calculate the probability (\emph{p}) of susceptibility of more antibiotics (i.e. combination therapy), we need to check whether one of them has a susceptible result (as numerator) and count all cases where all antibiotics were tested (as denominator). \cr
+#'   \cr
 #'   For two antibiotics:
 #'   \out{<div style="text-align: center">}\figure{combi_therapy_2.png}\out{</div>}
 #'   \cr
-#'   For three antibiotics:
+#'   Theoretically for three antibiotics:
 #'   \out{<div style="text-align: center">}\figure{combi_therapy_3.png}\out{</div>}
 #' }
-#' @keywords rsi antibiotics isolate isolates
+#' @keywords resistance susceptibility rsi_df antibiotics isolate isolates
 #' @return Double or, when \code{as_percent = TRUE}, a character.
-#' @rdname rsi
+#' @rdname resistance
 #' @export
-#' @importFrom dplyr %>% n_distinct filter filter_at pull vars all_vars any_vars
 #' @examples
 #' library(dplyr)
 #'
 #' septic_patients %>%
 #'   group_by(hospital_id) %>%
-#'   summarise(cipro_susceptibility = rsi(cipr, interpretation = "S"),
+#'   summarise(p = susceptibility(cipr),
 #'             n = n_rsi(cipr)) # n_rsi works like n_distinct in dplyr
 #'
 #' septic_patients %>%
 #'   group_by(hospital_id) %>%
-#'   summarise(cipro_S = rsi(cipr, interpretation = "S",
-#'                           as_percent = TRUE, warning = FALSE),
+#'   summarise(cipro_p = susceptibility(cipr, as_percent = TRUE),
 #'             cipro_n = n_rsi(cipr),
-#'             genta_S = rsi(gent, interpretation = "S",
-#'                           as_percent = TRUE, warning = FALSE),
+#'             genta_p = susceptibility(gent, as_percent = TRUE),
 #'             genta_n = n_rsi(gent),
-#'             combination_S = rsi(cipr, gent, interpretation = "S",
-#'                                 as_percent = TRUE, warning = FALSE),
+#'             combination_p = susceptibility(cipr, gent, as_percent = TRUE),
 #'             combination_n = n_rsi(cipr, gent))
 #'
-#' # calculate resistance
-#' rsi(septic_patients$amox)
-#' # or susceptibility
-#' rsi(septic_patients$amox, interpretation = "S")
 #'
-#' # calculate co-resistance between amoxicillin/clav acid and gentamicin,
-#' # so we can review that combination therapy does a lot more than mono therapy:
-#' septic_patients %>% rsi_df(ab = "amcl", interpretation = "S") # = 67.8%
-#' septic_patients %>% rsi_df(ab = "gent", interpretation = "S") # = 69.1%
-#' septic_patients %>% rsi_df(ab = c("amcl", "gent"), interpretation = "S") # = 90.6%
+#' # Calculate resistance
+#' resistance(septic_patients$amox)
+#' rsi(septic_patients$amox, interpretation = "IR") # deprecated
+#'
+#' # Or susceptibility
+#' susceptibility(septic_patients$amox)
+#' rsi(septic_patients$amox, interpretation = "S") # deprecated
+#'
+#'
+#' # Calculate co-resistance between amoxicillin/clav acid and gentamicin,
+#' # so we can see that combination therapy does a lot more than mono therapy:
+#' susceptibility(septic_patients$amcl) # p = 67.8%
+#' n_rsi(septic_patients$amcl)          # n = 1641
+#'
+#' susceptibility(septic_patients$gent) # p = 69.1%
+#' n_rsi(septic_patients$gent)          # n = 1863
+#'
+#' with(septic_patients,
+#'      susceptibility(amcl, gent))     # p = 90.6%
+#' with(septic_patients,
+#'      n_rsi(amcl, gent))              # n = 1580
 #'
 #' \dontrun{
 #' # calculate current empiric combination therapy of Helicobacter gastritis:
 #' my_table %>%
 #'   filter(first_isolate == TRUE,
 #'          genus == "Helicobacter") %>%
-#'   rsi_df(ab = c("amox", "metr")) # amoxicillin with metronidazole
+#'   summarise(p = susceptibility(amox, metr), # amoxicillin with metronidazole
+#'             n = n_rsi(amox, metr))
 #' }
+resistance <- function(ab,
+                       include_I = TRUE,
+                       minimum = 30,
+                       as_percent = FALSE) {
+
+  if (NCOL(ab) > 1) {
+    stop('`ab` must be a vector of antimicrobial interpretations', call. = FALSE)
+  }
+  if (!is.logical(include_I)) {
+    stop('`include_I` must be logical', call. = FALSE)
+  }
+  if (!is.numeric(minimum)) {
+    stop('`minimum` must be numeric', call. = FALSE)
+  }
+  if (!is.logical(as_percent)) {
+    stop('`as_percent` must be logical', call. = FALSE)
+  }
+
+  x <- as.integer(as.rsi(ab))
+  total <-  .Call(`_AMR_rsi_calc_total`, x)
+  if (total < minimum) {
+    return(NA)
+  }
+  found <- .Call(`_AMR_rsi_calc_R`, x, include_I)
+
+  if (as_percent == TRUE) {
+    percent(found / total, force_zero = TRUE)
+  } else {
+    found / total
+  }
+}
+
+#' @rdname resistance
+#' @export
+susceptibility <- function(ab1,
+                           ab2 = NULL,
+                           include_I = FALSE,
+                           minimum = 30,
+                           as_percent = FALSE) {
+
+  if (NCOL(ab1) > 1) {
+    stop('`ab1` must be a vector of antimicrobial interpretations', call. = FALSE)
+  }
+  if (!is.logical(include_I)) {
+    stop('`include_I` must be logical', call. = FALSE)
+  }
+  if (!is.numeric(minimum)) {
+    stop('`minimum` must be numeric', call. = FALSE)
+  }
+  if (!is.logical(as_percent)) {
+    stop('`as_percent` must be logical', call. = FALSE)
+  }
+
+  if (!is.null(ab2)) {
+    if (NCOL(ab2) > 1) {
+      stop('`ab2` must be a vector of antimicrobial interpretations', call. = FALSE)
+    }
+    x <- apply(X = data.frame(ab1 = as.integer(as.rsi(ab1)),
+                              ab2 = as.integer(as.rsi(ab2))),
+               MARGIN = 1,
+               FUN = min)
+  } else {
+    x <- as.integer(as.rsi(ab1))
+  }
+  total <-  .Call(`_AMR_rsi_calc_total`, x)
+  if (total < minimum) {
+    return(NA)
+  }
+  found <- .Call(`_AMR_rsi_calc_S`, x, include_I)
+
+  if (as_percent == TRUE) {
+    percent(found / total, force_zero = TRUE)
+  } else {
+    found / total
+  }
+}
+
+#' @rdname resistance
+#' @export
+n_rsi <- function(ab1, ab2 = NULL) {
+  if (NCOL(ab1) > 1) {
+    stop('`ab1` must be a vector of antimicrobial interpretations', call. = FALSE)
+  }
+  if (!is.null(ab2)) {
+    if (NCOL(ab2) > 1) {
+      stop('`ab2` must be a vector of antimicrobial interpretations', call. = FALSE)
+    }
+    x <- apply(X = data.frame(ab1 = as.integer(as.rsi(ab1)),
+                              ab2 = as.integer(as.rsi(ab2))),
+               MARGIN = 1,
+               FUN = min)
+  } else {
+    x <- as.integer(as.rsi(ab1))
+  }
+  .Call(`_AMR_rsi_calc_total`, x)
+}
+
+
+#' @rdname resistance
+#' @export
 rsi <- function(ab1,
-                ab2 = NA,
-                interpretation = 'IR',
+                ab2 = NULL,
+                interpretation = "IR",
                minimum = 30,
-                as_percent = FALSE,
-                info = FALSE,
-                warning = TRUE) {
-  ab1.name <- deparse(substitute(ab1))
-  if (ab1.name %like% '.[$].') {
-    ab1.name <- unlist(strsplit(ab1.name, "$", fixed = TRUE))
-    ab1.name <- ab1.name[length(ab1.name)]
-  }
-  if (!ab1.name %like% '^[a-z]{3,4}$') {
-    ab1.name <- 'rsi1'
-  }
-  if (length(ab1) == 1 & is.character(ab1)) {
-    stop('`ab1` must be a vector of antibiotic interpretations.',
-         '\n  Try rsi(', ab1, ', ...) instead of rsi("', ab1, '", ...)', call. = FALSE)
-  }
-  ab2.name <- deparse(substitute(ab2))
-  if (ab2.name %like% '.[$].') {
-    ab2.name <- unlist(strsplit(ab2.name, "$", fixed = TRUE))
-    ab2.name <- ab2.name[length(ab2.name)]
-  }
-  if (!ab2.name %like% '^[a-z]{3,4}$') {
-    ab2.name <- 'rsi2'
-  }
-  if (length(ab2) == 1 & is.character(ab2)) {
-    stop('`ab2` must be a vector of antibiotic interpretations.',
-         '\n  Try rsi(', ab2, ', ...) instead of rsi("', ab2, '", ...)', call. = FALSE)
-  }
-
-  interpretation <- paste(interpretation, collapse = "")
-
-  ab1 <- as.rsi(ab1)
-  ab2 <- as.rsi(ab2)
-
-  tbl <- tibble(rsi1 = ab1, rsi2 = ab2)
-  colnames(tbl) <- c(ab1.name, ab2.name)
-
-  if (length(ab2) == 1) {
-    r <- rsi_df(tbl = tbl,
-                ab = ab1.name,
-                interpretation = interpretation,
-                minimum = minimum,
-                as_percent = FALSE,
-                info = info,
-                warning = warning)
+                as_percent = FALSE) {
+  warning("'rsi' is deprecated. Use 'resistance' or 'susceptibility' instead.", call. = FALSE)
+  if (interpretation %in% c('IR', 'RI')) {
+    resistance(ab = ab1, include_I = TRUE, minimum = minimum, as_percent = as_percent)
+  } else if (interpretation == 'R') {
+    resistance(ab = ab1, include_I = FALSE, minimum = minimum, as_percent = as_percent)
+  } else  if (interpretation %in% c('IS', 'SI')) {
+    susceptibility(ab1 = ab1, ab2 = ab2, include_I = TRUE, minimum = minimum, as_percent = as_percent)
+  } else if (interpretation == 'S') {
+    susceptibility(ab1 = ab1, ab2 = ab2, include_I = FALSE, minimum = minimum, as_percent = as_percent)
  } else {
-    if (length(ab1) != length(ab2)) {
-      stop('`ab1` (n = ', length(ab1), ') and `ab2` (n = ', length(ab2), ') must be of same length.', call. = FALSE)
-    }
-    if (!interpretation %in% c('S', 'IS', 'SI')) {
-      warning('`interpretation` not set to S or I/S, albeit analysing a combination therapy.', call. = FALSE)
-    }
-    r <- rsi_df(tbl = tbl,
-                ab = c(ab1.name, ab2.name),
-                interpretation = interpretation,
-                minimum = minimum,
-                as_percent = FALSE,
-                info = info,
-                warning = warning)
+    stop('invalid `interpretation`')
  }
-  if (as_percent == TRUE) {
-    percent(r, force_zero = TRUE)
-  } else {
-    r
-  }
-}
-
-#' @export
-#' @rdname rsi
-rsi_df <- function(tbl,
-                   ab,
-                   interpretation = 'IR',
-                   minimum = 30,
-                   as_percent = FALSE,
-                   info = TRUE,
-                   warning = TRUE) {
-
-  # in case tbl$interpretation already exists:
-  interpretations_to_check <- paste(interpretation, collapse = "")
-
-  # validate:
-  if (min(grepl('^[a-z]{3,4}$', ab)) == 0 &
-      min(grepl('^rsi[1-2]$', ab)) == 0) {
-    for (i in 1:length(ab)) {
-      ab[i] <- paste0('rsi', i)
-    }
-  }
-  if (!grepl('^(S|SI|IS|I|IR|RI|R){1}$', interpretations_to_check)) {
-    stop('Invalid `interpretation`; must be "S", "SI", "I", "IR", or "R".')
-  }
-  if ('is_ic' %in% colnames(tbl)) {
-    if (n_distinct(tbl$is_ic) > 1 & warning == TRUE) {
-      warning('Dataset contains isolates from the Intensive Care. Exclude them from proper epidemiological analysis.')
-    }
-  }
-
-  # transform when checking for different results
-  if (interpretations_to_check %in% c('SI', 'IS')) {
-    for (i in 1:length(ab)) {
-      tbl[which(tbl[, ab[i]] == 'I'), ab[i]] <- 'S'
-    }
-    interpretations_to_check <- 'S'
-  }
-  if (interpretations_to_check %in% c('RI', 'IR')) {
-    for (i in 1:length(ab)) {
-      tbl[which(tbl[, ab[i]] == 'I'), ab[i]] <- 'R'
-    }
-    interpretations_to_check <- 'R'
-  }
-
-  # get fraction
-  if (length(ab) == 1) {
-    numerator <- tbl %>%
-      filter(pull(., ab[1]) == interpretations_to_check) %>%
-      nrow()
-
-    denominator <- tbl %>%
-      filter(pull(., ab[1]) %in% c("S", "I", "R")) %>%
-      nrow()
-
-  } else if (length(ab) == 2) {
-    if (interpretations_to_check != 'S') {
-      warning('`interpretation` not set to S or I/S, albeit analysing a combination therapy.', call. = FALSE)
-    }
-    numerator <- tbl %>%
-      filter_at(vars(ab[1], ab[2]),
-                any_vars(. == interpretations_to_check)) %>%
-      filter_at(vars(ab[1], ab[2]),
-                all_vars(. %in% c("S", "R", "I"))) %>%
-      nrow()
-
-    denominator <- tbl %>%
-      filter_at(vars(ab[1], ab[2]),
-                all_vars(. %in% c("S", "R", "I"))) %>%
-      nrow()
-
-  } else if (length(ab) == 3) {
-    if (interpretations_to_check != 'S') {
-      warning('`interpretation` not set to S or I/S, albeit analysing a combination therapy.', call. = FALSE)
-    }
-    numerator <- tbl %>%
-      filter_at(vars(ab[1], ab[2], ab[3]),
-                any_vars(. == interpretations_to_check)) %>%
-      filter_at(vars(ab[1], ab[2], ab[3]),
-                all_vars(. %in% c("S", "R", "I"))) %>%
-      nrow()
-
-    denominator <- tbl %>%
-      filter_at(vars(ab[1], ab[2], ab[3]),
-                all_vars(. %in% c("S", "R", "I"))) %>%
-      nrow()
-
-  } else {
-    stop('Maximum of 3 drugs allowed.')
-  }
-
-  # build text part
-  if (info == TRUE) {
-    cat('n =', denominator)
-    info.txt1 <- percent(denominator / nrow(tbl))
-    if (denominator == 0) {
-      info.txt1 <- 'none'
-    }
-    info.txt2 <- gsub(',', ' and',
-                      ab %>%
-                        abname(tolower = TRUE) %>%
-                        toString(), fixed = TRUE)
-    info.txt2 <- gsub('rsi1 and rsi2', 'these two drugs', info.txt2, fixed = TRUE)
-    info.txt2 <- gsub('rsi1', 'this drug', info.txt2, fixed = TRUE)
-    cat(paste0(' (of ', nrow(tbl), ' in total; ', info.txt1, ' tested on ', info.txt2, ')\n'))
-  }
-
-  # calculate and format
-  y <- numerator / denominator
-  if (as_percent == TRUE) {
-   y <- percent(y, force_zero = TRUE)
-  }
-
-  if (denominator < minimum) {
-    if (warning == TRUE) {
-      warning(paste0('TOO FEW ISOLATES OF ', toString(ab), ' (n = ', denominator, ', n < ', minimum, '); NO RESULT.'))
-    }
-    y <- NA
-  }
-
-  # output
-  y
-}
-
-#' @export
-#' @rdname rsi
-n_rsi <- function(ab1, ab2 = NA) {
-
-  if (length(ab1) == 1 & is.character(ab1)) {
-    stop('`ab1` must be a vector of antibiotic interpretations.',
-         '\n  Try n_rsi(', ab1, ', ...) instead of n_rsi("', ab1, '", ...)', call. = FALSE)
-  }
-  ab1 <- as.rsi(ab1)
-
-  if (length(ab2) == 1 & all(is.na(ab2))) {
-    # only 1 antibiotic
-    length(ab1[!is.na(ab1)])
-  } else {
-    if (length(ab2) == 1 & is.character(ab2)) {
-      stop('`ab2` must be a vector of antibiotic interpretations.',
-           '\n  Try n_rsi(', ab2, ', ...) instead of n_rsi("', ab2, '", ...)', call. = FALSE)
-    }
-    ab2 <- as.rsi(ab2)
-    tbl <- tibble(ab1, ab2)
-    tbl %>% filter(!is.na(ab1) & !is.na(ab2)) %>% nrow()
-  }
-
 }

 #' Predict antimicrobial resistance
@ -312,7 +223,8 @@ n_rsi <- function(ab1, ab2 = NA) {
 #' @param preserve_measurements overwrite predictions of years that are actually available in the data, with the original data. The standard errors of those years will be \code{NA}.
 #' @param info print textual analysis with the name and \code{\link{summary}} of the model.
 #' @return \code{data.frame} with columns \code{year}, \code{probR}, \code{se_min} and \code{se_max}.
-#' @seealso \code{\link{lm}} \cr \code{\link{glm}}
+#' @seealso \code{\link{resistance}} \cr \code{\link{lm}} \code{\link{glm}}
+#' @rdname resistance_predict
 #' @export
 #' @importFrom dplyr %>% pull mutate group_by_at summarise filter
 #' @importFrom reshape2 dcast
@ -354,7 +266,7 @@ n_rsi <- function(ab1, ab2 = NA) {
 #'               year_max = 2025,
 #'               preserve_measurements = FALSE)
 #'
-rsi_predict <- function(tbl,
+resistance_predict <- function(tbl,
                        col_ab,
                        col_date,
                        year_max = as.integer(format(as.Date(Sys.Date()), '%Y')) + 15,
@ -491,3 +403,7 @@ rsi_predict <- function(tbl,
  total

 }
+
+#' @rdname resistance_predict
+#' @export
+rsi_predict <- resistance_predict
--- a/R/zzz.R
+++ b/R/zzz.R
@ -1,3 +1,7 @@
 .onLoad <- function(libname, pkgname) {
  backports::import(pkgname)
 }
+
+#' @importFrom Rcpp evalCpp
+#' @useDynLib AMR, .registration = TRUE
+NULL
--- a/README.md
+++ b/README.md
@ -24,8 +24,8 @@ This R package was created for academic research by PhD students of the Faculty
 ## Why this package?
 This R package contains functions to make **microbiological, epidemiological data analysis easier**. It allows the use of some new classes to work with MIC values and antimicrobial interpretations (i.e. values S, I and R).

-With `AMR` you can also:
-* Conduct AMR analysis with the `rsi` function, that can also be used with the `dplyr` package (e.g. in conjunction with `summarise`) to calculate the resistance percentages (and even co-resistance) of different antibiotic columns of a table
+With `AMR` you can:
+* Calculate the resistance (and even co-resistance) of microbial isolates with the `resistance` and `susceptibility` functions, that can also be used with the `dplyr` package (e.g. in conjunction with `summarise`). Our functions use expressions that are not evaluated by R, but by alternative C++ code that is dramatically faster and uses less memory. This is called *hybrid evaluation*.
 * Predict antimicrobial resistance for the nextcoming years with the `rsi_predict` function
 * Apply [EUCAST rules to isolates](http://www.eucast.org/expert_rules_and_intrinsic_resistance/) with the `EUCAST_rules` function
 * Identify first isolates of every patient [using guidelines from the CLSI](https://clsi.org/standards/products/microbiology/documents/m39/) (Clinical and Laboratory Standards Institute) with the `first_isolate` function
--- a/man/as.mic.Rd
+++ b/man/as.mic.Rd
@ -30,3 +30,4 @@ as.mic("<=0.002; R") # will return <=0.002
 plot(mic_data)
 barplot(mic_data)
 }
+\keyword{mic}
--- a/man/as.rsi.Rd
+++ b/man/as.rsi.Rd
@ -29,3 +29,4 @@ as.rsi("<= 0.002; R") # will return R
 plot(rsi_data)    # for percentages
 barplot(rsi_data) # for frequencies
 }
+\keyword{rsi}
--- a/man/g.test.Rd
+++ b/man/g.test.Rd
@ -4,11 +4,15 @@
 \alias{g.test}
 \title{\emph{G}-test for Count Data}
 \source{
-This code is almost perfectly equal to \code{\link{chisq.test}}; only the calculation of the statistic was changed to \code{2 * sum(x * log(x / E))} and Yates' continuity correction was deleted.
+This code is almost identical to \code{\link{chisq.test}}, except that:
+\itemize{
+  \item{The calculation of the statistic was changed to \code{2 * sum(x * log(x / E))}}
+  \item{Yates' continuity correction was removed as it does not apply to a \emph{G}-test}
+  \item{The possibility to simulate p values with \code{simulate.p.value} was removed}
+}
 }
 \usage{
-g.test(x, y = NULL, p = rep(1/length(x), length(x)), rescale.p = FALSE,
-  simulate.p.value = FALSE, B = 2000)
+g.test(x, y = NULL, p = rep(1/length(x), length(x)), rescale.p = FALSE)
 }
 \arguments{
 \item{x}{a numeric vector or matrix. \code{x} and \code{y} can also
@ -23,12 +27,6 @@ g.test(x, y = NULL, p = rep(1/length(x), length(x)), rescale.p = FALSE,
 \item{rescale.p}{a logical scalar; if TRUE then \code{p} is rescaled
    (if necessary) to sum to 1.  If \code{rescale.p} is FALSE, and
    \code{p} does not sum to 1, an error is given.}
-
-\item{simulate.p.value}{a logical indicating whether to compute
-    p-values by Monte Carlo simulation.}
-
-\item{B}{an integer specifying the number of replicates used in the
-    Monte Carlo test.}
 }
 \value{
 A list with class \code{"htest"} containing the following
@ -58,7 +56,7 @@ If \code{x} is a matrix with one row or column, or if \code{x} is a vector and \

  If \code{x} is a matrix with at least two rows and columns, it is taken as a two-dimensional contingency table: the entries of \code{x} must be non-negative integers.  Otherwise, \code{x} and \code{y} must be vectors or factors of the same length; cases with missing values are removed, the objects are coerced to factors, and the contingency table is computed from these.  Then Pearson's chi-squared test is performed of the null hypothesis that the joint distribution of the cell counts in a 2-dimensional contingency table is the product of the row and column marginals.

-  If \code{simulate.p.value} is \code{FALSE}, the p-value is computed from the asymptotic chi-squared distribution of the test statistic. Otherwise the p-value is computed for a Monte Carlo test (Hope, 1968) with \code{B} replicates.
+  The p-value is computed from the asymptotic chi-squared distribution of the test statistic.

  In the contingency table case simulation is done by random sampling from the set of all contingency tables with given marginals, and works only if the marginals are strictly positive. Note that this is not the usual sampling situation assumed for a chi-squared test (like the \emph{G}-test) but rather that for Fisher's exact test.

--- a/man/resistance.Rd
+++ b/man/resistance.Rd
@ -0,0 +1,108 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/resistance.R
+\name{resistance}
+\alias{resistance}
+\alias{susceptibility}
+\alias{n_rsi}
+\alias{rsi}
+\title{Calculate resistance of isolates}
+\usage{
+resistance(ab, include_I = TRUE, minimum = 30, as_percent = FALSE)
+
+susceptibility(ab1, ab2 = NULL, include_I = FALSE, minimum = 30,
+  as_percent = FALSE)
+
+n_rsi(ab1, ab2 = NULL)
+
+rsi(ab1, ab2 = NULL, interpretation = "IR", minimum = 30,
+  as_percent = FALSE)
+}
+\arguments{
+\item{ab, ab1, ab2}{vector of antibiotic interpretations, they will be transformed internally with \code{\link{as.rsi}}}
+
+\item{include_I}{logical to indicate whether antimicrobial interpretations of "I" should be included}
+
+\item{minimum}{minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA}.}
+
+\item{as_percent}{logical to indicate whether the output must be returned as percent (text), will else be a double}
+
+\item{interpretation}{antimicrobial interpretation}
+}
+\value{
+Double or, when \code{as_percent = TRUE}, a character.
+}
+\description{
+These functions can be used to calculate the (co-)resistance of microbial isolates (i.e. percentage S, SI, I, IR or R). All functions can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}.
+}
+\details{
+\strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set.
+
+All return values are calculated using hybrid evaluation (i.e. using C++), which makes these functions 60-65 times faster than in \code{AMR} v0.2.0 and below. The \code{rsi} function is available for backwards compatibility and deprecated. It now uses the \code{resistance} and \code{susceptibility} functions internally, based on the \code{interpretation} parameter.
+\if{html}{
+  \cr
+  To calculate the probability (\emph{p}) of susceptibility of one antibiotic, we use this formula:
+  \out{<div style="text-align: center">}\figure{mono_therapy.png}\out{</div>}
+  To calculate the probability (\emph{p}) of susceptibility of more antibiotics (i.e. combination therapy), we need to check whether one of them has a susceptible result (as numerator) and count all cases where all antibiotics were tested (as denominator). \cr
+  \cr
+  For two antibiotics:
+  \out{<div style="text-align: center">}\figure{combi_therapy_2.png}\out{</div>}
+  \cr
+  Theoretically for three antibiotics:
+  \out{<div style="text-align: center">}\figure{combi_therapy_3.png}\out{</div>}
+}
+}
+\examples{
+library(dplyr)
+
+septic_patients \%>\%
+  group_by(hospital_id) \%>\%
+  summarise(p = susceptibility(cipr),
+            n = n_rsi(cipr)) # n_rsi works like n_distinct in dplyr
+
+septic_patients \%>\%
+  group_by(hospital_id) \%>\%
+  summarise(cipro_p = susceptibility(cipr, as_percent = TRUE),
+            cipro_n = n_rsi(cipr),
+            genta_p = susceptibility(gent, as_percent = TRUE),
+            genta_n = n_rsi(gent),
+            combination_p = susceptibility(cipr, gent, as_percent = TRUE),
+            combination_n = n_rsi(cipr, gent))
+
+
+# Calculate resistance
+resistance(septic_patients$amox)
+rsi(septic_patients$amox, interpretation = "IR") # deprecated
+
+# Or susceptibility
+susceptibility(septic_patients$amox)
+rsi(septic_patients$amox, interpretation = "S") # deprecated
+
+
+# Calculate co-resistance between amoxicillin/clav acid and gentamicin,
+# so we can see that combination therapy does a lot more than mono therapy:
+susceptibility(septic_patients$amcl) # p = 67.8\%
+n_rsi(septic_patients$amcl)          # n = 1641
+
+susceptibility(septic_patients$gent) # p = 69.1\%
+n_rsi(septic_patients$gent)          # n = 1863
+
+with(septic_patients,
+     susceptibility(amcl, gent))     # p = 90.6\%
+with(septic_patients,
+     n_rsi(amcl, gent))              # n = 1580
+
+\dontrun{
+# calculate current empiric combination therapy of Helicobacter gastritis:
+my_table \%>\%
+  filter(first_isolate == TRUE,
+         genus == "Helicobacter") \%>\%
+  summarise(p = susceptibility(amox, metr), # amoxicillin with metronidazole
+            n = n_rsi(amox, metr))
+}
+}
+\keyword{antibiotics}
+\keyword{isolate}
+\keyword{isolates}
+\keyword{resistance}
+\keyword{rsi_df}
+\keyword{susceptibility}
--- a/man/resistance_predict.Rd
+++ b/man/resistance_predict.Rd
@ -1,9 +1,15 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/rsi_analysis.R
-\name{rsi_predict}
+% Please edit documentation in R/resistance.R
+\name{resistance_predict}
+\alias{resistance_predict}
 \alias{rsi_predict}
 \title{Predict antimicrobial resistance}
 \usage{
+resistance_predict(tbl, col_ab, col_date,
+  year_max = as.integer(format(as.Date(Sys.Date()), "\%Y")) + 15,
+  year_every = 1, model = "binomial", I_as_R = TRUE,
+  preserve_measurements = TRUE, info = TRUE)
+
 rsi_predict(tbl, col_ab, col_date,
  year_max = as.integer(format(as.Date(Sys.Date()), "\%Y")) + 15,
  year_every = 1, model = "binomial", I_as_R = TRUE,
@ -74,5 +80,5 @@ septic_patients \%>\%

 }
 \seealso{
-\code{\link{lm}} \cr \code{\link{glm}}
+\code{\link{resistance}} \cr \code{\link{lm}} \code{\link{glm}}
 }
--- a/man/rsi.Rd
+++ b/man/rsi.Rd
@ -1,96 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/rsi_analysis.R
-\name{rsi}
-\alias{rsi}
-\alias{rsi_df}
-\alias{n_rsi}
-\title{Resistance of isolates}
-\usage{
-rsi(ab1, ab2 = NA, interpretation = "IR", minimum = 30,
-  as_percent = FALSE, info = FALSE, warning = TRUE)
-
-rsi_df(tbl, ab, interpretation = "IR", minimum = 30, as_percent = FALSE,
-  info = TRUE, warning = TRUE)
-
-n_rsi(ab1, ab2 = NA)
-}
-\arguments{
-\item{ab1, ab2}{vector of antibiotic interpretations, they will be transformed internally with \code{\link{as.rsi}}}
-
-\item{interpretation}{antimicrobial interpretation of which the portion must be calculated. Valid values are \code{"S"}, \code{"SI"}, \code{"I"}, \code{"IR"} or \code{"R"}.}
-
-\item{minimum}{minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning (when \code{warning = TRUE}).}
-
-\item{as_percent}{return output as percent (text), will else (at default) be a double}
-
-\item{info}{calculate the amount of available isolates and print it, like \code{n = 423}}
-
-\item{warning}{show a warning when the available amount of isolates is below \code{minimum}}
-
-\item{tbl}{\code{data.frame} containing columns with antibiotic interpretations.}
-
-\item{ab}{character vector with 1, 2 or 3 antibiotics that occur as column names in \code{tbl}, like \code{ab = c("amox", "amcl")}}
-}
-\value{
-Double or, when \code{as_percent = TRUE}, a character.
-}
-\description{
-This functions can be used to calculate the (co-)resistance of isolates (i.e. percentage S, SI, I, IR or R [of a vector] of isolates). The functions \code{rsi} and \code{n_rsi} can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}.
-}
-\details{
-Remember that you should filter your table to let it contain \strong{only first isolates}!
-\if{html}{
-  \cr \cr
-  To calculate the probability (\emph{p}) of susceptibility of one antibiotic, we use this formula:
-  \out{<div style="text-align: center">}\figure{mono_therapy.png}\out{</div>}
-  To calculate the probability (\emph{p}) of susceptibility of more antibiotics (i.e. combination therapy), we need to check whether one of them has a susceptible result (as numerator) and count all cases where all antibiotics were tested (as denominator). \cr
-  For two antibiotics:
-  \out{<div style="text-align: center">}\figure{combi_therapy_2.png}\out{</div>}
-  \cr
-  For three antibiotics:
-  \out{<div style="text-align: center">}\figure{combi_therapy_3.png}\out{</div>}
-}
-}
-\examples{
-library(dplyr)
-
-septic_patients \%>\%
-  group_by(hospital_id) \%>\%
-  summarise(cipro_susceptibility = rsi(cipr, interpretation = "S"),
-            n = n_rsi(cipr)) # n_rsi works like n_distinct in dplyr
-
-septic_patients \%>\%
-  group_by(hospital_id) \%>\%
-  summarise(cipro_S = rsi(cipr, interpretation = "S",
-                          as_percent = TRUE, warning = FALSE),
-            cipro_n = n_rsi(cipr),
-            genta_S = rsi(gent, interpretation = "S",
-                          as_percent = TRUE, warning = FALSE),
-            genta_n = n_rsi(gent),
-            combination_S = rsi(cipr, gent, interpretation = "S",
-                                as_percent = TRUE, warning = FALSE),
-            combination_n = n_rsi(cipr, gent))
-
-# calculate resistance
-rsi(septic_patients$amox)
-# or susceptibility
-rsi(septic_patients$amox, interpretation = "S")
-
-# calculate co-resistance between amoxicillin/clav acid and gentamicin,
-# so we can review that combination therapy does a lot more than mono therapy:
-septic_patients \%>\% rsi_df(ab = "amcl", interpretation = "S") # = 67.8\%
-septic_patients \%>\% rsi_df(ab = "gent", interpretation = "S") # = 69.1\%
-septic_patients \%>\% rsi_df(ab = c("amcl", "gent"), interpretation = "S") # = 90.6\%
-
-\dontrun{
-# calculate current empiric combination therapy of Helicobacter gastritis:
-my_table \%>\%
-  filter(first_isolate == TRUE,
-         genus == "Helicobacter") \%>\%
-  rsi_df(ab = c("amox", "metr")) # amoxicillin with metronidazole
-}
-}
-\keyword{antibiotics}
-\keyword{isolate}
-\keyword{isolates}
-\keyword{rsi}
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@ -0,0 +1,54 @@
+// Generated by using Rcpp::compileAttributes() -> do not edit by hand
+// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
+
+#include <Rcpp.h>
+
+using namespace Rcpp;
+
+// rsi_calc_S
+int rsi_calc_S(std::vector<double> x, bool include_I);
+RcppExport SEXP _AMR_rsi_calc_S(SEXP xSEXP, SEXP include_ISEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< std::vector<double> >::type x(xSEXP);
+    Rcpp::traits::input_parameter< bool >::type include_I(include_ISEXP);
+    rcpp_result_gen = Rcpp::wrap(rsi_calc_S(x, include_I));
+    return rcpp_result_gen;
+END_RCPP
+}
+// rsi_calc_R
+int rsi_calc_R(std::vector<double> x, bool include_I);
+RcppExport SEXP _AMR_rsi_calc_R(SEXP xSEXP, SEXP include_ISEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< std::vector<double> >::type x(xSEXP);
+    Rcpp::traits::input_parameter< bool >::type include_I(include_ISEXP);
+    rcpp_result_gen = Rcpp::wrap(rsi_calc_R(x, include_I));
+    return rcpp_result_gen;
+END_RCPP
+}
+// rsi_calc_total
+int rsi_calc_total(std::vector<double> x);
+RcppExport SEXP _AMR_rsi_calc_total(SEXP xSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< std::vector<double> >::type x(xSEXP);
+    rcpp_result_gen = Rcpp::wrap(rsi_calc_total(x));
+    return rcpp_result_gen;
+END_RCPP
+}
+
+static const R_CallMethodDef CallEntries[] = {
+    {"_AMR_rsi_calc_S", (DL_FUNC) &_AMR_rsi_calc_S, 2},
+    {"_AMR_rsi_calc_R", (DL_FUNC) &_AMR_rsi_calc_R, 2},
+    {"_AMR_rsi_calc_total", (DL_FUNC) &_AMR_rsi_calc_total, 1},
+    {NULL, NULL, 0}
+};
+
+RcppExport void R_init_AMR(DllInfo *dll) {
+    R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
+    R_useDynamicSymbols(dll, FALSE);
+}
--- a/src/rsi_calc.cpp
+++ b/src/rsi_calc.cpp
@ -0,0 +1,29 @@
+#include <Rcpp.h>
+#include <vector>        // for std::vector
+#include <functional>    // for std::less, etc
+#include <algorithm>     // for count_if
+
+using namespace Rcpp ;
+
+// [[Rcpp::export]]
+int rsi_calc_S(std::vector<double> x, bool include_I) {
+  if (include_I == TRUE) {
+    return count_if(x.begin(), x.end(), bind2nd(std::less_equal<double>(), 2));
+  } else {
+    return count_if(x.begin(), x.end(), bind2nd(std::less<double>(), 2));
+  }
+}
+
+// [[Rcpp::export]]
+int rsi_calc_R(std::vector<double> x, bool include_I) {
+  if (include_I == TRUE) {
+    return count_if(x.begin(), x.end(), bind2nd(std::greater_equal<double>(), 2));
+  } else {
+    return count_if(x.begin(), x.end(), bind2nd(std::greater<double>(), 2));
+  }
+}
+
+// [[Rcpp::export]]
+int rsi_calc_total(std::vector<double> x) {
+ return count_if(x.begin(), x.end(), bind2nd(std::less_equal<double>(), 3));
+}
--- a/tests/testthat/test-rsi_analysis.R
+++ b/tests/testthat/test-rsi_analysis.R
@ -1,46 +1,29 @@
-context("rsi_analysis.R")
+context("resistance.R")
+
+test_that("resistance works", {
+  # amox resistance in `septic_patients` should be around 57.56%
+  expect_equal(resistance(septic_patients$amox), 0.5756, tolerance = 0.0001)
+  expect_equal(susceptibility(septic_patients$amox), 1 - 0.5756, tolerance = 0.0001)

-test_that("rsi works", {
-  # amox resistance in `septic_patients` should be around 53.86%
-  expect_equal(rsi(septic_patients$amox), 0.5756, tolerance = 0.0001)
-  expect_equal(rsi(septic_patients$amox), 0.5756, tolerance = 0.0001)
-  expect_equal(rsi_df(septic_patients,
-                      ab = "amox",
-                      info = FALSE),
-               0.5756,
-               tolerance = 0.0001)
  # pita+genta susceptibility around 98.09%
-  expect_equal(rsi(septic_patients$pita,
-                   septic_patients$gent,
-                   interpretation = "S",
-                   info = TRUE),
+  expect_equal(susceptibility(septic_patients$pita,
+                              septic_patients$gent),
               0.9809,
               tolerance = 0.0001)
-  expect_equal(rsi_df(septic_patients,
-                      ab = c("pita", "gent"),
-                      interpretation = "S",
-                      info = FALSE),
+  expect_equal(suppressWarnings(rsi(septic_patients$pita,
+                                    septic_patients$gent,
+                                    interpretation = "S")),
               0.9809,
               tolerance = 0.0001)
-  # mero+pita+genta susceptibility around 98.58%
-  expect_equal(rsi_df(septic_patients,
-                      ab = c("mero", "pita", "gent"),
-                      interpretation = "IS",
-                      info = FALSE),
-               0.9858,
-               tolerance = 0.0001)

  # count of cases
  expect_equal(septic_patients %>%
                 group_by(hospital_id) %>%
-                 summarise(cipro_S = rsi(cipr, interpretation = "S",
-                                         as_percent = TRUE, warning = FALSE),
+                 summarise(cipro_p = susceptibility(cipr, as_percent = TRUE),
                           cipro_n = n_rsi(cipr),
-                           genta_S = rsi(gent, interpretation = "S",
-                                         as_percent = TRUE, warning = FALSE),
+                           genta_p = susceptibility(gent, as_percent = TRUE),
                           genta_n = n_rsi(gent),
-                           combination_S = rsi(cipr, gent, interpretation = "S",
-                                               as_percent = TRUE, warning = FALSE),
+                           combination_p = susceptibility(cipr, gent, as_percent = TRUE),
                           combination_n = n_rsi(cipr, gent)) %>%
                 pull(combination_n),
               c(138, 474, 170, 464, 183))