new portion functions

2026-02-01 07:52:53 +01:00 · 2018-08-10 15:01:05 +02:00
parent ae2433a020
commit 53fa198e35
19 changed files with 892 additions and 1140 deletions
--- a/4
+++ b/4
@@ -1,6 +1,6 @@
 Package: AMR
-Version: 0.2.0.9021
+Version: 0.2.0.9022
-Date: 2018-08-03
+Date: 2018-08-10
 Title: Antimicrobial Resistance Analysis
 Authors@R: c(
    person(
--- a/18
+++ b/18
@@ -51,7 +51,6 @@ export(g.test)
 export(guess_atc)
 export(guess_bactid)
 export(inner_join_microorganisms)
 export(intermediate)
 export(interpretive_reading)
 export(is.bactid)
 export(is.mic)
@@ -64,21 +63,18 @@ export(like)
 export(mo_property)
 export(n_rsi)
 export(p.symbol)
 export(portion_I)
 export(portion_IR)
 export(portion_R)
 export(portion_S)
 export(portion_SI)
 export(ratio)
 export(resistance)
 export(resistance_predict)
 export(right_join_microorganisms)
 export(rsi)
 export(rsi_I)
 export(rsi_IR)
 export(rsi_R)
 export(rsi_S)
 export(rsi_SI)
 export(rsi_n)
 export(rsi_predict)
 export(semi_join_microorganisms)
 export(skewness)
 export(susceptibility)
 export(top_freq)
 exportMethods(as.data.frame.bactid)
 exportMethods(as.data.frame.frequency_tbl)
@@ -114,8 +110,6 @@ importFrom(clipr,read_clip_tbl)
 importFrom(clipr,write_clip)
 importFrom(curl,nslookup)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,all_vars)
 importFrom(dplyr,any_vars)
 importFrom(dplyr,arrange)
 importFrom(dplyr,arrange_at)
 importFrom(dplyr,as_tibble)
@@ -123,7 +117,6 @@ importFrom(dplyr,between)
 importFrom(dplyr,case_when)
 importFrom(dplyr,desc)
 importFrom(dplyr,filter)
 importFrom(dplyr,filter_at)
 importFrom(dplyr,group_by)
 importFrom(dplyr,group_by_at)
 importFrom(dplyr,if_else)
@@ -139,7 +132,6 @@ importFrom(dplyr,slice)
 importFrom(dplyr,summarise)
 importFrom(dplyr,tibble)
 importFrom(dplyr,top_n)
 importFrom(dplyr,vars)
 importFrom(grDevices,boxplot.stats)
 importFrom(graphics,axis)
 importFrom(graphics,barplot)
--- a/NEWS.md
+++ b/NEWS.md
@@ -2,7 +2,7 @@
 **Published on CRAN: (unpublished)**
 #### New
-* **BREAKING**: `rsi_df` was removed in favour of new functions `rsi_R`, `rsi_IR`, `rsi_I`, `rsi_SI` and `rsi_S` to selectively calculate resistance or susceptibility. These functions use **hybrid evaluation**, which means that calculations are not done in R directly but rather in C++ using the `Rcpp` package, making them 20 to 30 times faster. The function `rsi` still works, but is deprecated. The function `n_rsi` is accompanied by `rsi_n`. 
+* **BREAKING**: `rsi_df` was removed in favour of new functions `portion_R`, `portion_IR`, `portion_I`, `portion_SI` and `portion_S` to selectively calculate resistance or susceptibility. These functions use **hybrid evaluation**, which means that calculations are not done in R directly but rather in C++ using the `Rcpp` package, making them 20 to 30 times faster. The function `rsi` still works, but is deprecated.
 * **BREAKING**: the methodology for determining first weighted isolates was changed. The antibiotics that are compared between isolates (call *key antibiotics*) to include more first isolates (afterwards called first *weighted* isolates) are now as follows:
  * Universal: amoxicillin, amoxicillin/clavlanic acid, cefuroxime, piperacillin/tazobactam, ciprofloxacin,  trimethoprim/sulfamethoxazole
  * Gram-positive: vancomycin, teicoplanin, tetracycline, erythromycin, oxacillin, rifampicin
--- a/R/data.R
+++ b/R/data.R
@@ -321,7 +321,7 @@
 #'          first_isolates == TRUE) %>%
 #'   group_by(hospital_id) %>%
 #'   summarise(n = n_rsi(amox),
-#'             p = resistance(amox))
+#'             p = portion_IR(amox))
 #'
 #'
 #' # 2. Get the amoxicillin/clavulanic acid resistance
@@ -332,5 +332,5 @@
 #'          first_isolates == TRUE) %>%
 #'   group_by(year = format(date, "%Y")) %>%
 #'   summarise(n = n_rsi(amcl),
-#'             p = resistance(amcl, minimum = 20))
+#'             p = portion_IR(amcl, minimum = 20))
 "septic_patients"
--- a/R/first_isolate.R
+++ b/R/first_isolate.R
@@ -55,31 +55,32 @@
 #' @return A vector to add to table, see Examples.
 #' @source Methodology of this function is based on: \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition}, 2014, \emph{Clinical and Laboratory Standards Institute (CLSI)}. \url{https://clsi.org/standards/products/microbiology/documents/m39/}.
 #' @examples
-#' # septic_patients is a dataset available in the AMR package
+#' # septic_patients is a dataset available in the AMR package. It is true data.
 #' ?septic_patients
 #' my_patients <- septic_patients
 #'
 #' library(dplyr)
-#' my_patients$first_isolate <- my_patients %>%
+#' my_patients <- septic_patients %>%
-#'   first_isolate(col_date = "date",
+#'   mutate(first_isolate = first_isolate(.,
-#'                 col_patient_id = "patient_id",
+#'                                        col_date = "date",
-#'                 col_bactid = "bactid")
+#'                                        col_patient_id = "patient_id",
 #'                                        col_bactid = "bactid"))
 #'
 #' # Now let's see if first isolates matter:
 #' A <- my_patients %>%
 #'   group_by(hospital_id) %>%
-#'   summarise(count = n_rsi(gent), # gentamicin
+#'   summarise(count = n_rsi(gent),            # gentamicin availability
-#'             resistance = resistance(gent))
+#'             resistance = portion_IR(gent))  # gentamicin resistance
 #'
 #' B <- my_patients %>%
-#'   filter(first_isolate == TRUE) %>% # the 1st isolate filter
+#'   filter(first_isolate == TRUE) %>%         # the 1st isolate filter
 #'   group_by(hospital_id) %>%
-#'   summarise(count = n_rsi(gent),
+#'   summarise(count = n_rsi(gent),            # gentamicin availability
-#'             resistance = resistance(gent))
+#'             resistance = portion_IR(gent))  # gentamicin resistance
 #'
-#' # Have a look at A and B. B is more reliable because every isolate is
+#' # Have a look at A and B.
-#' # counted once. Gentamicin resitance in hospital D appears to be 5%
+#' # B is more reliable because every isolate is only counted once.
-#' # higher than originally thought.
+#' # Gentamicin resitance in hospital D appears to be 5.4% higher than
 #' # when you (erroneously) would have used all isolates!
 #'
 #' ## OTHER EXAMPLES:
 #'
--- a/R/n_rsi.R
+++ b/R/n_rsi.R
@@ -0,0 +1,54 @@
 # ==================================================================== #
 # TITLE                                                                #
 # Antimicrobial Resistance (AMR) Analysis                              #
 #                                                                      #
 # AUTHORS                                                              #
 # Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl)           #
 #                                                                      #
 # LICENCE                                                              #
 # This program is free software; you can redistribute it and/or modify #
 # it under the terms of the GNU General Public License version 2.0,    #
 # as published by the Free Software Foundation.                        #
 #                                                                      #
 # This program is distributed in the hope that it will be useful,      #
 # but WITHOUT ANY WARRANTY; without even the implied warranty of       #
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
 # GNU General Public License for more details.                         #
 # ==================================================================== #
 #' Count cases with antimicrobial results
 #'
 #' This counts all cases where antimicrobial interpretations are available. Its use is equal to \code{\link{n_distinct}}.
 #' @param ab1,ab2 vector of antibiotic interpretations, they will be transformed internally with \code{\link{as.rsi}} if needed
 #' @export
 #' @seealso The \code{\link{portion}} functions to calculate resistance and susceptibility.
 #' @examples
 #' library(dplyr)
 #'
 #' septic_patients %>%
 #'   group_by(hospital_id) %>%
 #'   summarise(cipro_p = portion_S(cipr, as_percent = TRUE),
 #'             cipro_n = n_rsi(cipr),
 #'             genta_p = portion_S(gent, as_percent = TRUE),
 #'             genta_n = n_rsi(gent),
 #'             combination_p = portion_S(cipr, gent, as_percent = TRUE),
 #'             combination_n = n_rsi(cipr, gent))
 n_rsi <- function(ab1, ab2 = NULL) {
  if (NCOL(ab1) > 1) {
    stop('`ab` must be a vector of antimicrobial interpretations', call. = FALSE)
  }
  if (!is.rsi(ab1)) {
    ab1 <- as.rsi(ab1)
  }
  if (!is.null(ab2)) {
    if (NCOL(ab2) > 1) {
      stop('`ab2` must be a vector of antimicrobial interpretations', call. = FALSE)
    }
    if (!is.rsi(ab2)) {
      ab2 <- as.rsi(ab2)
    }
    sum(!is.na(ab1) & !is.na(ab2))
  } else {
    sum(!is.na(ab1))
  }
 }
--- a/R/portion.R
+++ b/R/portion.R
@@ -0,0 +1,242 @@
 # ==================================================================== #
 # TITLE                                                                #
 # Antimicrobial Resistance (AMR) Analysis                              #
 #                                                                      #
 # AUTHORS                                                              #
 # Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl)           #
 #                                                                      #
 # LICENCE                                                              #
 # This program is free software; you can redistribute it and/or modify #
 # it under the terms of the GNU General Public License version 2.0,    #
 # as published by the Free Software Foundation.                        #
 #                                                                      #
 # This program is distributed in the hope that it will be useful,      #
 # but WITHOUT ANY WARRANTY; without even the implied warranty of       #
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
 # GNU General Public License for more details.                         #
 # ==================================================================== #
 #' Calculate resistance of isolates
 #'
 #' @description These functions can be used to calculate the (co-)resistance of microbial isolates (i.e. percentage S, SI, I, IR or R). All functions can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}.
 #'
 #' \code{portion_R} and \code{portion_IR} can be used to calculate resistance, \code{portion_S} and \code{portion_SI} can be used to calculate susceptibility.\cr
 #' @param ab1 vector of antibiotic interpretations, they will be transformed internally with \code{\link{as.rsi}} if needed
 #' @param ab2 like \code{ab}, a vector of antibiotic interpretations. Use this to calculate (the lack of) co-resistance: the probability where one of two drugs have a resistant or susceptible result. See Examples.
 #' @param minimum minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA}. The default number of \code{30} isolates is advised by the CLSI as best practice, see Source.
 #' @param as_percent logical to indicate whether the output must be returned as percent (text), will else be a double
 #' @details \strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set.
 #'
 #' The old \code{\link{rsi}} function is still available for backwards compatibility but is deprecated.
 #' \if{html}{
 #'   \cr\cr
 #'   To calculate the probability (\emph{p}) of susceptibility of one antibiotic, we use this formula:
 #'   \out{<div style="text-align: center">}\figure{mono_therapy.png}\out{</div>}
 #'   To calculate the probability (\emph{p}) of susceptibility of more antibiotics (i.e. combination therapy), we need to check whether one of them has a susceptible result (as numerator) and count all cases where all antibiotics were tested (as denominator). \cr
 #'   \cr
 #'   For two antibiotics:
 #'   \out{<div style="text-align: center">}\figure{combi_therapy_2.png}\out{</div>}
 #'   \cr
 #'   Theoretically for three antibiotics:
 #'   \out{<div style="text-align: center">}\figure{combi_therapy_3.png}\out{</div>}
 #' }
 #' @source \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition}, 2014, \emph{Clinical and Laboratory Standards Institute (CLSI)}. \url{https://clsi.org/standards/products/microbiology/documents/m39/}.
 #' @seealso \code{\link{n_rsi}} to count cases with antimicrobial results.
 #' @keywords resistance susceptibility rsi_df rsi antibiotics isolate isolates
 #' @return Double or, when \code{as_percent = TRUE}, a character.
 #' @rdname portion
 #' @name portion
 #' @export
 #' @examples
 #' # Calculate resistance
 #' portion_R(septic_patients$amox)
 #' portion_IR(septic_patients$amox)
 #'
 #' # Or susceptibility
 #' portion_S(septic_patients$amox)
 #' portion_SI(septic_patients$amox)
 #'
 #' # Since n_rsi counts available isolates (and is used as denominator),
 #' # you can calculate back to count e.g. non-susceptible isolates:
 #' portion_IR(septic_patients$amox) * n_rsi(septic_patients$amox)
 #'
 #' library(dplyr)
 #' septic_patients %>%
 #'   group_by(hospital_id) %>%
 #'   summarise(p = portion_S(cipr),
 #'             n = n_rsi(cipr)) # n_rsi works like n_distinct in dplyr
 #'
 #' septic_patients %>%
 #'   group_by(hospital_id) %>%
 #'   summarise(R = portion_R(cipr, as_percent = TRUE),
 #'             I = portion_I(cipr, as_percent = TRUE),
 #'             S = portion_S(cipr, as_percent = TRUE),
 #'             n = n_rsi(cipr), # works like n_distinct in dplyr
 #'             total = n())     # NOT the amount of tested isolates!
 #'
 #' # Calculate co-resistance between amoxicillin/clav acid and gentamicin,
 #' # so we can see that combination therapy does a lot more than mono therapy:
 #' portion_S(septic_patients$amcl) # S = 67.3%
 #' n_rsi(septic_patients$amcl)     # n = 1570
 #'
 #' portion_S(septic_patients$gent) # S = 74.0%
 #' n_rsi(septic_patients$gent)     # n = 1842
 #'
 #' with(septic_patients,
 #'      portion_S(amcl, gent))     # S = 92.1%
 #' with(septic_patients,           # n = 1504
 #'      n_rsi(amcl, gent))
 #'
 #' septic_patients %>%
 #'   group_by(hospital_id) %>%
 #'   summarise(cipro_p = portion_S(cipr, as_percent = TRUE),
 #'             cipro_n = n_rsi(cipr),
 #'             genta_p = portion_S(gent, as_percent = TRUE),
 #'             genta_n = n_rsi(gent),
 #'             combination_p = portion_S(cipr, gent, as_percent = TRUE),
 #'             combination_n = n_rsi(cipr, gent))
 #'
 #' \dontrun{
 #'
 #' # calculate current empiric combination therapy of Helicobacter gastritis:
 #' my_table %>%
 #'   filter(first_isolate == TRUE,
 #'          genus == "Helicobacter") %>%
 #'   summarise(p = portion_S(amox, metr),  # amoxicillin with metronidazole
 #'             n = n_rsi(amox, metr))
 #' }
 portion_R <- function(ab1,
                      ab2 = NULL,
                      minimum = 30,
                      as_percent = FALSE) {
  rsi_calc(type = "R",
           ab1 = ab1,
           ab2 = ab2,
           include_I = FALSE,
           minimum = minimum,
           as_percent = as_percent)
 }
 #' @rdname portion
 #' @export
 portion_IR <- function(ab1,
                       ab2 = NULL,
                       minimum = 30,
                       as_percent = FALSE) {
  rsi_calc(type = "R",
           ab1 = ab1,
           ab2 = ab2,
           include_I = TRUE,
           minimum = minimum,
           as_percent = as_percent)
 }
 #' @rdname portion
 #' @export
 portion_I <- function(ab1,
                      minimum = 30,
                      as_percent = FALSE) {
  rsi_calc(type = "I",
           ab1 = ab1,
           ab2 = NULL,
           include_I = FALSE,
           minimum = minimum,
           as_percent = as_percent)
 }
 #' @rdname portion
 #' @export
 portion_SI <- function(ab1,
                       ab2 = NULL,
                       minimum = 30,
                       as_percent = FALSE) {
  rsi_calc(type = "S",
           ab1 = ab1,
           ab2 = ab2,
           include_I = TRUE,
           minimum = minimum,
           as_percent = as_percent)
 }
 #' @rdname portion
 #' @export
 portion_S <- function(ab1,
                      ab2 = NULL,
                      minimum = 30,
                      as_percent = FALSE) {
  rsi_calc(type = "S",
           ab1 = ab1,
           ab2 = ab2,
           include_I = FALSE,
           minimum = minimum,
           as_percent = as_percent)
 }
 rsi_calc <- function(type,
                     ab1,
                     ab2,
                     include_I,
                     minimum,
                     as_percent) {
  if (NCOL(ab1) > 1) {
    stop('`ab1` must be a vector of antimicrobial interpretations', call. = FALSE)
  }
  if (!is.logical(include_I)) {
    stop('`include_I` must be logical', call. = FALSE)
  }
  if (!is.numeric(minimum)) {
    stop('`minimum` must be numeric', call. = FALSE)
  }
  if (!is.logical(as_percent)) {
    stop('`as_percent` must be logical', call. = FALSE)
  }
  print_warning <- FALSE
  if (!is.rsi(ab1)) {
    ab1 <- as.rsi(ab1)
    print_warning <- TRUE
  }
  if (!is.null(ab2)) {
    # ab_name <- paste(deparse(substitute(ab1)), "and", deparse(substitute(ab2)))
    if (NCOL(ab2) > 1) {
      stop('`ab2` must be a vector of antimicrobial interpretations', call. = FALSE)
    }
    if (!is.rsi(ab2)) {
      ab2 <- as.rsi(ab2)
      print_warning <- TRUE
    }
    x <- apply(X = data.frame(ab1 = as.integer(ab1),
                              ab2 = as.integer(ab2)),
               MARGIN = 1,
               FUN = min)
  } else {
    x <- ab1
    # ab_name <- deparse(substitute(ab1))
  }
  if (print_warning == TRUE) {
    warning("Increase speed by transforming to class `rsi` on beforehand: df %>% mutate_at(vars(col10:col20), as.rsi)")
  }
  total <- length(x) - sum(is.na(x))
  if (total < minimum) {
    return(NA)
  }
  if (type == "S") {
    found <- .Call(`_AMR_rsi_calc_S`, x, include_I)
  } else if (type == "I") {
    found <- .Call(`_AMR_rsi_calc_I`, x)
  } else if (type == "R") {
    found <- .Call(`_AMR_rsi_calc_R`, x, include_I)
  } else {
    stop("invalid type")
  }
  if (as_percent == TRUE) {
    percent(found / total, force_zero = TRUE)
  } else {
    found / total
  }
 }
--- a/R/resistance_predict.R
+++ b/R/resistance_predict.R
@@ -0,0 +1,293 @@
 # ==================================================================== #
 # TITLE                                                                #
 # Antimicrobial Resistance (AMR) Analysis                              #
 #                                                                      #
 # AUTHORS                                                              #
 # Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl)           #
 #                                                                      #
 # LICENCE                                                              #
 # This program is free software; you can redistribute it and/or modify #
 # it under the terms of the GNU General Public License version 2.0,    #
 # as published by the Free Software Foundation.                        #
 #                                                                      #
 # This program is distributed in the hope that it will be useful,      #
 # but WITHOUT ANY WARRANTY; without even the implied warranty of       #
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
 # GNU General Public License for more details.                         #
 # ==================================================================== #
 #' Predict antimicrobial resistance
 #'
 #' Create a prediction model to predict antimicrobial resistance for the next years on statistical solid ground. Standard errors (SE) will be returned as columns \code{se_min} and \code{se_max}. See Examples for a real live example.
 #' @inheritParams first_isolate
 #' @param col_ab column name of \code{tbl} with antimicrobial interpretations (\code{R}, \code{I} and \code{S})
 #' @param col_date column name of the date, will be used to calculate years if this column doesn't consist of years already
 #' @param year_min lowest year to use in the prediction model, dafaults the lowest year in \code{col_date}
 #' @param year_max highest year to use in the prediction model, defaults to 15 years after today
 #' @param year_every unit of sequence between lowest year found in the data and \code{year_max}
 #' @param minimum minimal amount of available isolates per year to include. Years containing less observations will be estimated by the model.
 #' @param model the statistical model of choice. Valid values are \code{"binomial"} (or \code{"binom"} or \code{"logit"}) or \code{"loglin"} or \code{"linear"} (or \code{"lin"}).
 #' @param I_as_R treat \code{I} as \code{R}
 #' @param preserve_measurements logical to indicate whether predictions of years that are actually available in the data should be overwritten with the original data. The standard errors of those years will be \code{NA}.
 #' @param info print textual analysis with the name and \code{\link{summary}} of the model.
 #' @return \code{data.frame} with columns:
 #' \itemize{
 #'   \item{\code{year}}
 #'   \item{\code{value}, the same as \code{estimated} when \code{preserve_measurements = FALSE}, and a combination of \code{observed} and \code{estimated} otherwise}
 #'   \item{\code{se_min}, the lower bound of the standard error with a minimum of \code{0}}
 #'   \item{\code{se_max} the upper bound of the standard error with a maximum of \code{1}}
 #'   \item{\code{observations}, the total number of observations, i.e. S + I + R}
 #'   \item{\code{observed}, the original observed values}
 #'   \item{\code{estimated}, the estimated values, calculated by the model}
 #' }
 #' @seealso The \code{\link{portion}} function to calculate resistance, \cr \code{\link{lm}} \code{\link{glm}}
 #' @rdname resistance_predict
 #' @export
 #' @importFrom stats predict glm lm
 #' @importFrom dplyr %>% pull mutate group_by_at summarise filter n_distinct arrange case_when
 # @importFrom tidyr spread
 #' @examples
 #' \dontrun{
 #' # use it with base R:
 #' resistance_predict(tbl = tbl[which(first_isolate == TRUE & genus == "Haemophilus"),],
 #'                    col_ab = "amcl", col_date = "date")
 #'
 #' # or use dplyr so you can actually read it:
 #' library(dplyr)
 #' tbl %>%
 #'   filter(first_isolate == TRUE,
 #'          genus == "Haemophilus") %>%
 #'   resistance_predict(amcl, date)
 #' }
 #'
 #'
 #' # real live example:
 #' library(dplyr)
 #' septic_patients %>%
 #'   # get bacteria properties like genus and species
 #'   left_join_microorganisms("bactid") %>%
 #'   # calculate first isolates
 #'   mutate(first_isolate =
 #'            first_isolate(.,
 #'                          "date",
 #'                          "patient_id",
 #'                          "bactid",
 #'                          col_specimen = NA,
 #'                          col_icu = NA)) %>%
 #'   # filter on first E. coli isolates
 #'   filter(genus == "Escherichia",
 #'          species == "coli",
 #'          first_isolate == TRUE) %>%
 #'   # predict resistance of cefotaxime for next years
 #'   resistance_predict(col_ab = "cfot",
 #'                      col_date = "date",
 #'                      year_max = 2025,
 #'                      preserve_measurements = TRUE,
 #'                      minimum = 0)
 #'
 #' # create nice plots with ggplot
 #' if (!require(ggplot2)) {
 #'
 #'   data <- septic_patients %>%
 #'     filter(bactid == "ESCCOL") %>%
 #'     resistance_predict(col_ab = "amox",
 #'                       col_date = "date",
 #'                       info = FALSE,
 #'                       minimum = 15)
 #'
 #'   ggplot(data,
 #'          aes(x = year)) +
 #'     geom_col(aes(y = value),
 #'              fill = "grey75") +
 #'     geom_errorbar(aes(ymin = se_min,
 #'                       ymax = se_max),
 #'                   colour = "grey50") +
 #'     scale_y_continuous(limits = c(0, 1),
 #'                        breaks = seq(0, 1, 0.1),
 #'                        labels = paste0(seq(0, 100, 10), "%")) +
 #'     labs(title = expression(paste("Forecast of amoxicillin resistance in ",
 #'                                   italic("E. coli"))),
 #'          y = "%IR",
 #'          x = "Year") +
 #'     theme_minimal(base_size = 13)
 #' }
 resistance_predict <- function(tbl,
                               col_ab,
                               col_date,
                               year_min = NULL,
                               year_max = NULL,
                               year_every = 1,
                               minimum = 30,
                               model = 'binomial',
                               I_as_R = TRUE,
                               preserve_measurements = TRUE,
                               info = TRUE) {
  if (nrow(tbl) == 0) {
    stop('This table does not contain any observations.')
  }
  if (!col_ab %in% colnames(tbl)) {
    stop('Column ', col_ab, ' not found.')
  }
  if (!col_date %in% colnames(tbl)) {
    stop('Column ', col_date, ' not found.')
  }
  if ('grouped_df' %in% class(tbl)) {
    # no grouped tibbles please, mutate will throw errors
    tbl <- base::as.data.frame(tbl, stringsAsFactors = FALSE)
  }
  if (I_as_R == TRUE) {
    tbl[, col_ab] <- gsub('I', 'R', tbl %>% pull(col_ab))
  }
  if (!tbl %>% pull(col_ab) %>% is.rsi()) {
    tbl[, col_ab] <- tbl %>% pull(col_ab) %>% as.rsi()
  }
  year <- function(x) {
    if (all(grepl('^[0-9]{4}$', x))) {
      x
    } else {
      as.integer(format(as.Date(x), '%Y'))
    }
  }
  df <- tbl %>%
    mutate(year = tbl %>% pull(col_date) %>% year()) %>%
    group_by_at(c('year', col_ab)) %>%
    summarise(n())
  if (df %>% pull(col_ab) %>% n_distinct(na.rm = TRUE) < 2) {
    stop("No variety in antimicrobial interpretations - all isolates are '",
         df %>% pull(col_ab) %>% unique() %>% .[!is.na(.)], "'.",
         call. = FALSE)
  }
  colnames(df) <- c('year', 'antibiotic', 'observations')
  df <- df %>%
    filter(!is.na(antibiotic)) %>%
    tidyr::spread(antibiotic, observations, fill = 0) %>%
    mutate(total = R + S) %>%
    filter(total >= minimum)
  if (NROW(df) == 0) {
    stop('There are no observations.')
  }
  year_lowest <- min(df$year)
  if (is.null(year_min)) {
    year_min <- year_lowest
  } else {
    year_min <- max(year_min, year_lowest, na.rm = TRUE)
  }
  if (is.null(year_max)) {
    year_max <- year(Sys.Date()) + 15
  }
  years_predict <- seq(from = year_min, to = year_max, by = year_every)
  if (model %in% c('binomial', 'binom', 'logit')) {
    logitmodel <- with(df, glm(cbind(R, S) ~ year, family = binomial))
    if (info == TRUE) {
      cat('\nLogistic regression model (logit) with binomial distribution')
      cat('\n------------------------------------------------------------\n')
      print(summary(logitmodel))
    }
    predictmodel <- predict(logitmodel, newdata = with(df, list(year = years_predict)), type = "response", se.fit = TRUE)
    prediction <- predictmodel$fit
    se <- predictmodel$se.fit
  } else if (model == 'loglin') {
    loglinmodel <- with(df, glm(R ~ year, family = poisson))
    if (info == TRUE) {
      cat('\nLog-linear regression model (loglin) with poisson distribution')
      cat('\n--------------------------------------------------------------\n')
      print(summary(loglinmodel))
    }
    predictmodel <- predict(loglinmodel, newdata = with(df, list(year = years_predict)), type = "response", se.fit = TRUE)
    prediction <- predictmodel$fit
    se <- predictmodel$se.fit
  } else if (model %in% c('lin', 'linear')) {
    linmodel <- with(df, lm((R / (R + S)) ~ year))
    if (info == TRUE) {
      cat('\nLinear regression model')
      cat('\n-----------------------\n')
      print(summary(linmodel))
    }
    predictmodel <- predict(linmodel, newdata = with(df, list(year = years_predict)), se.fit = TRUE)
    prediction <- predictmodel$fit
    se <- predictmodel$se.fit
  } else {
    stop('No valid model selected.')
  }
  # prepare the output dataframe
  prediction <- data.frame(year = years_predict, value = prediction, stringsAsFactors = FALSE)
  prediction$se_min <- prediction$value - se
  prediction$se_max <- prediction$value + se
  if (model == 'loglin') {
    prediction$value <- prediction$value %>%
      format(scientific = FALSE) %>%
      as.integer()
    prediction$se_min <- prediction$se_min %>% as.integer()
    prediction$se_max <- prediction$se_max %>% as.integer()
    colnames(prediction) <- c('year', 'amountR', 'se_max', 'se_min')
  } else {
    prediction$se_max[which(prediction$se_max > 1)] <- 1
  }
  prediction$se_min[which(prediction$se_min < 0)] <- 0
  prediction$observations = NA
  total <- prediction
  if (preserve_measurements == TRUE) {
    # replace estimated data by observed data
    if (I_as_R == TRUE) {
      if (!'I' %in% colnames(df)) {
        df$I <- 0
      }
      df$value <- df$R / rowSums(df[, c('R', 'S', 'I')])
    } else {
      df$value <- df$R / rowSums(df[, c('R', 'S')])
    }
    measurements <- data.frame(year = df$year,
                               value = df$value,
                               se_min = NA,
                               se_max = NA,
                               observations = df$total,
                               stringsAsFactors = FALSE)
    colnames(measurements) <- colnames(prediction)
    total <- rbind(measurements,
                   prediction %>% filter(!year %in% df$year))
    if (model %in% c('binomial', 'binom', 'logit')) {
      total <- total %>% mutate(observed = ifelse(is.na(observations), NA, value),
                                estimated = prediction$value)
    }
  }
  if ("value" %in% colnames(total)) {
    total <- total %>%
      mutate(value = case_when(value > 1 ~ 1,
                               value < 0 ~ 0,
                               TRUE ~ value))
  }
  total %>% arrange(year)
 }
 #' @rdname resistance_predict
 #' @export
 rsi_predict <- resistance_predict
--- a/R/rsi.R
+++ b/R/rsi.R
@@ -0,0 +1,53 @@
 # ==================================================================== #
 # TITLE                                                                #
 # Antimicrobial Resistance (AMR) Analysis                              #
 #                                                                      #
 # AUTHORS                                                              #
 # Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl)           #
 #                                                                      #
 # LICENCE                                                              #
 # This program is free software; you can redistribute it and/or modify #
 # it under the terms of the GNU General Public License version 2.0,    #
 # as published by the Free Software Foundation.                        #
 #                                                                      #
 # This program is distributed in the hope that it will be useful,      #
 # but WITHOUT ANY WARRANTY; without even the implied warranty of       #
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
 # GNU General Public License for more details.                         #
 # ==================================================================== #
 #' Calculate resistance of isolates
 #'
 #' This function is deprecated. Use the \code{\link{portion}} functions instead.
 #' @inheritParams portion
 #' @param interpretation antimicrobial interpretation to check for
 #' @param ... deprecated parameters to support usage on older versions
 #' @importFrom dplyr case_when
 #' @export
 rsi <- function(ab1,
                ab2 = NULL,
                interpretation = "IR",
                minimum = 30,
                as_percent = FALSE,
                ...) {
  result <- case_when(
    interpretation == "S"             ~ portion_S(ab1 = ab1, ab2 = ab2, minimum = minimum, as_percent = FALSE),
    interpretation %in% c("SI", "IS") ~ portion_SI(ab1 = ab1, ab2 = ab2, minimum = minimum, as_percent = FALSE),
    interpretation == "I"             ~ portion_I(ab1 = ab1, minimum = minimum, as_percent = FALSE),
    interpretation %in% c("RI", "IR") ~ portion_IR(ab1 = ab1, ab2 = ab2, minimum = minimum, as_percent = FALSE),
    interpretation == "R"             ~ portion_R(ab1 = ab1, ab2 = ab2, minimum = minimum, as_percent = FALSE),
    TRUE ~ -1
  )
  if (result == -1) {
    stop("invalid interpretation")
  }
  .Deprecated(new = paste0("portion_", interpretation))
  if (as_percent == TRUE) {
    percent(result, force_zero = TRUE)
  } else {
    result
  }
 }
--- a/R/rsi_IR.R
+++ b/R/rsi_IR.R
@@ -1,791 +0,0 @@
 # ==================================================================== #
 # TITLE                                                                #
 # Antimicrobial Resistance (AMR) Analysis                              #
 #                                                                      #
 # AUTHORS                                                              #
 # Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl)           #
 #                                                                      #
 # LICENCE                                                              #
 # This program is free software; you can redistribute it and/or modify #
 # it under the terms of the GNU General Public License version 2.0,    #
 # as published by the Free Software Foundation.                        #
 #                                                                      #
 # This program is distributed in the hope that it will be useful,      #
 # but WITHOUT ANY WARRANTY; without even the implied warranty of       #
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
 # GNU General Public License for more details.                         #
 # ==================================================================== #
 #' Calculate resistance of isolates
 #'
 #' These functions can be used to calculate the (co-)resistance of microbial isolates (i.e. percentage S, SI, I, IR or R). All functions can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}. \cr\cr
 #' \code{rsi_R} and \code{rsi_IR} can be used to calculate resistance, \code{rsi_S} and \code{rsi_SI} can be used to calculate susceptibility.\cr
 #' \code{rsi_n} counts all cases where antimicrobial interpretations are available.
 #' @param ab1 vector of antibiotic interpretations, they will be transformed internally with \code{\link{as.rsi}}
 #' @param ab2 like \code{ab}, a vector of antibiotic interpretations. Use this to calculate (the lack of) co-resistance: the probability where one of two drugs have a susceptible result. See Examples.
 #' @param include_I logical to indicate whether antimicrobial interpretations of "I" should be included
 #' @param minimum minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA}. The default number of \code{30} isolates is advised by the CLSI as best practice, see Source.
 #' @param as_percent logical to indicate whether the output must be returned as percent (text), will else be a double
 #' @details \strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set.
 #'
 #' The functions \code{resistance} and \code{susceptibility} are wrappers around \code{rsi_IR} and \code{rsi_S}, respectively. All functions use hybrid evaluation (i.e. using C++), which makes these functions 20-30 times faster than the old \code{\link{rsi}} function. This latter function is still available for backwards compatibility but is deprecated.
 #' \if{html}{
 #'   \cr\cr
 #'   To calculate the probability (\emph{p}) of susceptibility of one antibiotic, we use this formula:
 #'   \out{<div style="text-align: center">}\figure{mono_therapy.png}\out{</div>}
 #'   To calculate the probability (\emph{p}) of susceptibility of more antibiotics (i.e. combination therapy), we need to check whether one of them has a susceptible result (as numerator) and count all cases where all antibiotics were tested (as denominator). \cr
 #'   \cr
 #'   For two antibiotics:
 #'   \out{<div style="text-align: center">}\figure{combi_therapy_2.png}\out{</div>}
 #'   \cr
 #'   Theoretically for three antibiotics:
 #'   \out{<div style="text-align: center">}\figure{combi_therapy_3.png}\out{</div>}
 #' }
 #' @source \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition}, 2014, \emph{Clinical and Laboratory Standards Institute (CLSI)}. \url{https://clsi.org/standards/products/microbiology/documents/m39/}.
 #' @keywords resistance susceptibility rsi_df rsi antibiotics isolate isolates
 #' @return Double or, when \code{as_percent = TRUE}, a character.
 #' @rdname rsi_IR
 #' @name rsi_IR
 #' @export
 #' @examples
 #' # Calculate resistance
 #' rsi_R(septic_patients$amox)
 #' rsi_IR(septic_patients$amox)
 #'
 #' # Or susceptibility
 #' rsi_S(septic_patients$amox)
 #' rsi_SI(septic_patients$amox)
 #'
 #' # Since n_rsi counts available isolates (and is used as denominator),
 #' # you can calculate back to e.g. count resistant isolates:
 #' rsi_IR(septic_patients$amox) * n_rsi(septic_patients$amox)
 #'
 #' library(dplyr)
 #' septic_patients %>%
 #'   group_by(hospital_id) %>%
 #'   summarise(p = rsi_S(cipr),
 #'             n = rsi_n(cipr)) # n_rsi works like n_distinct in dplyr
 #'
 #' septic_patients %>%
 #'   group_by(hospital_id) %>%
 #'   summarise(R = rsi_R(cipr, as_percent = TRUE),
 #'             I = rsi_I(cipr, as_percent = TRUE),
 #'             S = rsi_S(cipr, as_percent = TRUE),
 #'             n = rsi_n(cipr), # also: n_rsi, works like n_distinct in dplyr
 #'             total = n()) # this is the length, NOT the amount of tested isolates
 #'
 #' # Calculate co-resistance between amoxicillin/clav acid and gentamicin,
 #' # so we can see that combination therapy does a lot more than mono therapy:
 #' rsi_S(septic_patients$amcl) # S = 67.3%
 #' rsi_n(septic_patients$amcl) # n = 1570
 #'
 #' rsi_S(septic_patients$gent) # S = 74.0%
 #' rsi_n(septic_patients$gent) # n = 1842
 #'
 #' with(septic_patients,
 #'      rsi_S(amcl, gent))     # S = 92.1%
 #' with(septic_patients,       # n = 1504
 #'      rsi_n(amcl, gent))
 #'
 #' septic_patients %>%
 #'   group_by(hospital_id) %>%
 #'   summarise(cipro_p = rsi_S(cipr, as_percent = TRUE),
 #'             cipro_n = rsi_n(cipr),
 #'             genta_p = rsi_S(gent, as_percent = TRUE),
 #'             genta_n = rsi_n(gent),
 #'             combination_p = rsi_S(cipr, gent, as_percent = TRUE),
 #'             combination_n = rsi_n(cipr, gent))
 #'
 #' \dontrun{
 #'
 #' # calculate current empiric combination therapy of Helicobacter gastritis:
 #' my_table %>%
 #'   filter(first_isolate == TRUE,
 #'          genus == "Helicobacter") %>%
 #'   summarise(p = rsi_S(amox, metr),  # amoxicillin with metronidazole
 #'             n = rsi_n(amox, metr))
 #' }
 rsi_R <- function(ab1,
                  minimum = 30,
                  as_percent = FALSE) {
  resistance(ab1 = ab1,
             include_I = FALSE,
             minimum = minimum,
             as_percent = as_percent)
 }
 #' @rdname rsi_IR
 #' @export
 rsi_IR <- function(ab1,
                   minimum = 30,
                   as_percent = FALSE) {
  resistance(ab1 = ab1,
             include_I = TRUE,
             minimum = minimum,
             as_percent = as_percent)
 }
 #' @rdname rsi_IR
 #' @export
 rsi_I <- function(ab1,
                  minimum = 30,
                  as_percent = FALSE) {
  intermediate(ab1 = ab1,
               minimum = minimum,
               as_percent = as_percent)
 }
 #' @rdname rsi_IR
 #' @export
 rsi_SI <- function(ab1,
                   ab2 = NULL,
                   minimum = 30,
                   as_percent = FALSE) {
  susceptibility(ab1 = ab1,
                 ab2 = ab2,
                 include_I = TRUE,
                 minimum = minimum,
                 as_percent = as_percent)
 }
 #' @rdname rsi_IR
 #' @export
 rsi_S <- function(ab1,
                  ab2 = NULL,
                  minimum = 30,
                  as_percent = FALSE) {
  susceptibility(ab1 = ab1,
                 ab2 = ab2,
                 include_I = FALSE,
                 minimum = minimum,
                 as_percent = as_percent)
 }
 #' @rdname rsi_IR
 #' @export
 resistance <- function(ab1,
                       include_I = TRUE,
                       minimum = 30,
                       as_percent = FALSE) {
  if (NCOL(ab1) > 1) {
    stop('`ab1` must be a vector of antimicrobial interpretations', call. = FALSE)
  }
  if (!is.logical(include_I)) {
    stop('`include_I` must be logical', call. = FALSE)
  }
  if (!is.numeric(minimum)) {
    stop('`minimum` must be numeric', call. = FALSE)
  }
  if (!is.logical(as_percent)) {
    stop('`as_percent` must be logical', call. = FALSE)
  }
  # ab_name <- deparse(substitute(ab))
  if (!is.rsi(ab1)) {
    x <- as.rsi(ab1)
    warning("Increase speed by transforming to class `rsi` on beforehand: df %>% mutate_at(vars(col10:col20), as.rsi)")
  } else {
    x <- ab1
  }
  total <- length(x) - sum(is.na(x)) # faster than C++
  if (total < minimum) {
    # warning("Too few isolates available for ", ab_name, ": ", total, " < ", minimum, "; returning NA.", call. = FALSE)
    return(NA)
  }
  found <- .Call(`_AMR_rsi_calc_R`, x, include_I)
  if (as_percent == TRUE) {
    percent(found / total, force_zero = TRUE)
  } else {
    found / total
  }
 }
 #' @rdname rsi_IR
 #' @export
 intermediate <- function(ab1,
                         minimum = 30,
                         as_percent = FALSE) {
  if (NCOL(ab1) > 1) {
    stop('`ab1` must be a vector of antimicrobial interpretations', call. = FALSE)
  }
  if (!is.numeric(minimum)) {
    stop('`minimum` must be numeric', call. = FALSE)
  }
  if (!is.logical(as_percent)) {
    stop('`as_percent` must be logical', call. = FALSE)
  }
  # ab_name <- deparse(substitute(ab))
  if (!is.rsi(ab1)) {
    x <- as.rsi(ab1)
    warning("Increase speed by transforming to class `rsi` on beforehand: df %>% mutate_at(vars(col10:col20), as.rsi)")
  } else {
    x <- ab1
  }
  total <- length(x) - sum(is.na(x)) # faster than C++
  if (total < minimum) {
    # warning("Too few isolates available for ", ab_name, ": ", total, " < ", minimum, "; returning NA.", call. = FALSE)
    return(NA)
  }
  found <- .Call(`_AMR_rsi_calc_I`, x)
  if (as_percent == TRUE) {
    percent(found / total, force_zero = TRUE)
  } else {
    found / total
  }
 }
 #' @rdname rsi_IR
 #' @export
 susceptibility <- function(ab1,
                           ab2 = NULL,
                           include_I = FALSE,
                           minimum = 30,
                           as_percent = FALSE) {
  if (NCOL(ab1) > 1) {
    stop('`ab1` must be a vector of antimicrobial interpretations', call. = FALSE)
  }
  if (!is.logical(include_I)) {
    stop('`include_I` must be logical', call. = FALSE)
  }
  if (!is.numeric(minimum)) {
    stop('`minimum` must be numeric', call. = FALSE)
  }
  if (!is.logical(as_percent)) {
    stop('`as_percent` must be logical', call. = FALSE)
  }
  print_warning <- FALSE
  if (!is.rsi(ab1)) {
    ab1 <- as.rsi(ab1)
    print_warning <- TRUE
  }
  if (!is.null(ab2)) {
    # ab_name <- paste(deparse(substitute(ab1)), "and", deparse(substitute(ab2)))
    if (NCOL(ab2) > 1) {
      stop('`ab2` must be a vector of antimicrobial interpretations', call. = FALSE)
    }
    if (!is.rsi(ab2)) {
      ab2 <- as.rsi(ab2)
      print_warning <- TRUE
    }
    x <- apply(X = data.frame(ab1 = as.integer(ab1),
                              ab2 = as.integer(ab2)),
               MARGIN = 1,
               FUN = min)
  } else {
    x <- ab1
    # ab_name <- deparse(substitute(ab1))
  }
  total <- length(x) - sum(is.na(x))
  if (total < minimum) {
    # warning("Too few isolates available for ", ab_name, ": ", total, " < ", minimum, "; returning NA.", call. = FALSE)
    return(NA)
  }
  found <- .Call(`_AMR_rsi_calc_S`, x, include_I)
  if (print_warning == TRUE) {
    warning("Increase speed by transforming to class `rsi` on beforehand: df %>% mutate_at(vars(col10:col20), as.rsi)")
  }
  if (as_percent == TRUE) {
    percent(found / total, force_zero = TRUE)
  } else {
    found / total
  }
 }
 #' @rdname rsi_IR
 #' @export
 rsi_n <- function(ab1, ab2 = NULL) {
  if (NCOL(ab1) > 1) {
    stop('`ab` must be a vector of antimicrobial interpretations', call. = FALSE)
  }
  if (!is.rsi(ab1)) {
    ab1 <- as.rsi(ab1)
  }
  if (!is.null(ab2)) {
    if (NCOL(ab2) > 1) {
      stop('`ab2` must be a vector of antimicrobial interpretations', call. = FALSE)
    }
    if (!is.rsi(ab2)) {
      ab2 <- as.rsi(ab2)
    }
    sum(!is.na(ab1) & !is.na(ab2))
  } else {
    sum(!is.na(ab1))
  }
 }
 #' @rdname rsi_IR
 #' @export
 n_rsi <- rsi_n
 #' @inherit resistance
 #' @description This function is deprecated. Use \code{\link{rsi_IR}} instead.
 #' @param info calculate the amount of available isolates and print it, like \code{n = 423}
 #' @param warning show a warning when the available amount of isolates is below \code{minimum}
 #' @param interpretation antimicrobial interpretation
 #' @export
 rsi <- function(ab1,
                ab2 = NA,
                interpretation = 'IR',
                minimum = 30,
                as_percent = FALSE,
                info = FALSE,
                warning = TRUE) {
  .Deprecated()
  ab1.name <- deparse(substitute(ab1))
  if (ab1.name %like% '.[$].') {
    ab1.name <- unlist(strsplit(ab1.name, "$", fixed = TRUE))
    ab1.name <- ab1.name[length(ab1.name)]
  }
  if (!ab1.name %like% '^[a-z]{3,4}$') {
    ab1.name <- 'rsi1'
  }
  if (length(ab1) == 1 & is.character(ab1)) {
    stop('`ab1` must be a vector of antibiotic interpretations.',
         '\n  Try rsi(', ab1, ', ...) instead of rsi("', ab1, '", ...)', call. = FALSE)
  }
  ab2.name <- deparse(substitute(ab2))
  if (ab2.name %like% '.[$].') {
    ab2.name <- unlist(strsplit(ab2.name, "$", fixed = TRUE))
    ab2.name <- ab2.name[length(ab2.name)]
  }
  if (!ab2.name %like% '^[a-z]{3,4}$') {
    ab2.name <- 'rsi2'
  }
  if (length(ab2) == 1 & is.character(ab2)) {
    stop('`ab2` must be a vector of antibiotic interpretations.',
         '\n  Try rsi(', ab2, ', ...) instead of rsi("', ab2, '", ...)', call. = FALSE)
  }
  interpretation <- paste(interpretation, collapse = "")
  ab1 <- as.rsi(ab1)
  ab2 <- as.rsi(ab2)
  tbl <- tibble(rsi1 = ab1, rsi2 = ab2)
  colnames(tbl) <- c(ab1.name, ab2.name)
  if (length(ab2) == 1) {
    r <- rsi_df(tbl = tbl,
                ab = ab1.name,
                interpretation = interpretation,
                minimum = minimum,
                as_percent = FALSE,
                info = info,
                warning = warning)
  } else {
    if (length(ab1) != length(ab2)) {
      stop('`ab1` (n = ', length(ab1), ') and `ab2` (n = ', length(ab2), ') must be of same length.', call. = FALSE)
    }
    if (!interpretation %in% c('S', 'IS', 'SI')) {
      warning('`interpretation` not set to S or I/S, albeit analysing a combination therapy.', call. = FALSE)
    }
    r <- rsi_df(tbl = tbl,
                ab = c(ab1.name, ab2.name),
                interpretation = interpretation,
                minimum = minimum,
                as_percent = FALSE,
                info = info,
                warning = warning)
  }
  if (as_percent == TRUE) {
    percent(r, force_zero = TRUE)
  } else {
    r
  }
 }
 #' @importFrom dplyr %>% filter_at vars any_vars all_vars
 #' @noRd
 rsi_df <- function(tbl,
                   ab,
                   interpretation = 'IR',
                   minimum = 30,
                   as_percent = FALSE,
                   info = TRUE,
                   warning = TRUE) {
  # in case tbl$interpretation already exists:
  interpretations_to_check <- paste(interpretation, collapse = "")
  # validate:
  if (min(grepl('^[a-z]{3,4}$', ab)) == 0 &
      min(grepl('^rsi[1-2]$', ab)) == 0) {
    for (i in 1:length(ab)) {
      ab[i] <- paste0('rsi', i)
    }
  }
  if (!grepl('^(S|SI|IS|I|IR|RI|R){1}$', interpretations_to_check)) {
    stop('Invalid `interpretation`; must be "S", "SI", "I", "IR", or "R".')
  }
  if ('is_ic' %in% colnames(tbl)) {
    if (n_distinct(tbl$is_ic) > 1 & warning == TRUE) {
      warning('Dataset contains isolates from the Intensive Care. Exclude them from proper epidemiological analysis.')
    }
  }
  # transform when checking for different results
  if (interpretations_to_check %in% c('SI', 'IS')) {
    for (i in 1:length(ab)) {
      tbl[which(tbl[, ab[i]] == 'I'), ab[i]] <- 'S'
    }
    interpretations_to_check <- 'S'
  }
  if (interpretations_to_check %in% c('RI', 'IR')) {
    for (i in 1:length(ab)) {
      tbl[which(tbl[, ab[i]] == 'I'), ab[i]] <- 'R'
    }
    interpretations_to_check <- 'R'
  }
  # get fraction
  if (length(ab) == 1) {
    numerator <- tbl %>%
      filter(pull(., ab[1]) == interpretations_to_check) %>%
      nrow()
    denominator <- tbl %>%
      filter(pull(., ab[1]) %in% c("S", "I", "R")) %>%
      nrow()
  } else if (length(ab) == 2) {
    if (interpretations_to_check != 'S') {
      warning('`interpretation` not set to S or I/S, albeit analysing a combination therapy.', call. = FALSE)
    }
    numerator <- tbl %>%
      filter_at(vars(ab[1], ab[2]),
                any_vars(. == interpretations_to_check)) %>%
      filter_at(vars(ab[1], ab[2]),
                all_vars(. %in% c("S", "R", "I"))) %>%
      nrow()
    denominator <- tbl %>%
      filter_at(vars(ab[1], ab[2]),
                all_vars(. %in% c("S", "R", "I"))) %>%
      nrow()
  } else {
    stop('Maximum of 2 drugs allowed.')
  }
  # build text part
  if (info == TRUE) {
    cat('n =', denominator)
    info.txt1 <- percent(denominator / nrow(tbl))
    if (denominator == 0) {
      info.txt1 <- 'none'
    }
    info.txt2 <- gsub(',', ' and',
                      ab %>%
                        abname(tolower = TRUE) %>%
                        toString(), fixed = TRUE)
    info.txt2 <- gsub('rsi1 and rsi2', 'these two drugs', info.txt2, fixed = TRUE)
    info.txt2 <- gsub('rsi1', 'this drug', info.txt2, fixed = TRUE)
    cat(paste0(' (of ', nrow(tbl), ' in total; ', info.txt1, ' tested on ', info.txt2, ')\n'))
  }
  # calculate and format
  y <- numerator / denominator
  if (as_percent == TRUE) {
    y <- percent(y, force_zero = TRUE)
  }
  if (denominator < minimum) {
    if (warning == TRUE) {
      warning(paste0('TOO FEW ISOLATES OF ', toString(ab), ' (n = ', denominator, ', n < ', minimum, '); NO RESULT.'))
    }
    y <- NA
  }
  # output
  y
 }
 #' Predict antimicrobial resistance
 #'
 #' Create a prediction model to predict antimicrobial resistance for the next years on statistical solid ground. Standard errors (SE) will be returned as columns \code{se_min} and \code{se_max}. See Examples for a real live example.
 #' @inheritParams first_isolate
 #' @param col_ab column name of \code{tbl} with antimicrobial interpretations (\code{R}, \code{I} and \code{S})
 #' @param col_date column name of the date, will be used to calculate years if this column doesn't consist of years already
 #' @param year_min lowest year to use in the prediction model, dafaults the lowest year in \code{col_date}
 #' @param year_max highest year to use in the prediction model, defaults to 15 years after today
 #' @param year_every unit of sequence between lowest year found in the data and \code{year_max}
 #' @param minimum minimal amount of available isolates per year to include. Years containing less observations will be estimated by the model.
 #' @param model the statistical model of choice. Valid values are \code{"binomial"} (or \code{"binom"} or \code{"logit"}) or \code{"loglin"} or \code{"linear"} (or \code{"lin"}).
 #' @param I_as_R treat \code{I} as \code{R}
 #' @param preserve_measurements logical to indicate whether predictions of years that are actually available in the data should be overwritten with the original data. The standard errors of those years will be \code{NA}.
 #' @param info print textual analysis with the name and \code{\link{summary}} of the model.
 #' @return \code{data.frame} with columns:
 #' \itemize{
 #'   \item{\code{year}}
 #'   \item{\code{value}, the same as \code{estimated} when \code{preserve_measurements = FALSE}, and a combination of \code{observed} and \code{estimated} otherwise}
 #'   \item{\code{se_min}, the lower bound of the standard error with a minimum of \code{0}}
 #'   \item{\code{se_max} the upper bound of the standard error with a maximum of \code{1}}
 #'   \item{\code{observations}, the total number of observations, i.e. S + I + R}
 #'   \item{\code{observed}, the original observed values}
 #'   \item{\code{estimated}, the estimated values, calculated by the model}
 #' }
 #' @seealso \code{\link{resistance}} \cr \code{\link{lm}} \code{\link{glm}}
 #' @rdname resistance_predict
 #' @export
 #' @importFrom stats predict glm lm
 #' @importFrom dplyr %>% pull mutate group_by_at summarise filter n_distinct arrange case_when
 # @importFrom tidyr spread
 #' @examples
 #' \dontrun{
 #' # use it with base R:
 #' resistance_predict(tbl = tbl[which(first_isolate == TRUE & genus == "Haemophilus"),],
 #'                    col_ab = "amcl", col_date = "date")
 #'
 #' # or use dplyr so you can actually read it:
 #' library(dplyr)
 #' tbl %>%
 #'   filter(first_isolate == TRUE,
 #'          genus == "Haemophilus") %>%
 #'   resistance_predict(amcl, date)
 #' }
 #'
 #'
 #' # real live example:
 #' library(dplyr)
 #' septic_patients %>%
 #'   # get bacteria properties like genus and species
 #'   left_join_microorganisms("bactid") %>%
 #'   # calculate first isolates
 #'   mutate(first_isolate =
 #'            first_isolate(.,
 #'                          "date",
 #'                          "patient_id",
 #'                          "bactid",
 #'                          col_specimen = NA,
 #'                          col_icu = NA)) %>%
 #'   # filter on first E. coli isolates
 #'   filter(genus == "Escherichia",
 #'          species == "coli",
 #'          first_isolate == TRUE) %>%
 #'   # predict resistance of cefotaxime for next years
 #'   resistance_predict(col_ab = "cfot",
 #'                      col_date = "date",
 #'                      year_max = 2025,
 #'                      preserve_measurements = TRUE,
 #'                      minimum = 0)
 #'
 #' # create nice plots with ggplot
 #' if (!require(ggplot2)) {
 #'
 #'   data <- septic_patients %>%
 #'     filter(bactid == "ESCCOL") %>%
 #'     resistance_predict(col_ab = "amox",
 #'                       col_date = "date",
 #'                       info = FALSE,
 #'                       minimum = 15)
 #'
 #'   ggplot(data,
 #'          aes(x = year)) +
 #'     geom_col(aes(y = value),
 #'              fill = "grey75") +
 #'     geom_errorbar(aes(ymin = se_min,
 #'                       ymax = se_max),
 #'                   colour = "grey50") +
 #'     scale_y_continuous(limits = c(0, 1),
 #'                        breaks = seq(0, 1, 0.1),
 #'                        labels = paste0(seq(0, 100, 10), "%")) +
 #'     labs(title = expression(paste("Forecast of amoxicillin resistance in ",
 #'                                   italic("E. coli"))),
 #'          y = "%IR",
 #'          x = "Year") +
 #'     theme_minimal(base_size = 13)
 #' }
 resistance_predict <- function(tbl,
                               col_ab,
                               col_date,
                               year_min = NULL,
                               year_max = NULL,
                               year_every = 1,
                               minimum = 30,
                               model = 'binomial',
                               I_as_R = TRUE,
                               preserve_measurements = TRUE,
                               info = TRUE) {
  if (nrow(tbl) == 0) {
    stop('This table does not contain any observations.')
  }
  if (!col_ab %in% colnames(tbl)) {
    stop('Column ', col_ab, ' not found.')
  }
  if (!col_date %in% colnames(tbl)) {
    stop('Column ', col_date, ' not found.')
  }
  if ('grouped_df' %in% class(tbl)) {
    # no grouped tibbles please, mutate will throw errors
    tbl <- base::as.data.frame(tbl, stringsAsFactors = FALSE)
  }
  if (I_as_R == TRUE) {
    tbl[, col_ab] <- gsub('I', 'R', tbl %>% pull(col_ab))
  }
  if (!tbl %>% pull(col_ab) %>% is.rsi()) {
    tbl[, col_ab] <- tbl %>% pull(col_ab) %>% as.rsi()
  }
  year <- function(x) {
    if (all(grepl('^[0-9]{4}$', x))) {
      x
    } else {
      as.integer(format(as.Date(x), '%Y'))
    }
  }
  df <- tbl %>%
    mutate(year = tbl %>% pull(col_date) %>% year()) %>%
    group_by_at(c('year', col_ab)) %>%
    summarise(n())
  if (df %>% pull(col_ab) %>% n_distinct(na.rm = TRUE) < 2) {
    stop("No variety in antimicrobial interpretations - all isolates are '",
         df %>% pull(col_ab) %>% unique() %>% .[!is.na(.)], "'.",
         call. = FALSE)
  }
  colnames(df) <- c('year', 'antibiotic', 'observations')
  df <- df %>%
    filter(!is.na(antibiotic)) %>%
    tidyr::spread(antibiotic, observations, fill = 0) %>%
    mutate(total = R + S) %>%
    filter(total >= minimum)
  if (NROW(df) == 0) {
    stop('There are no observations.')
  }
  year_lowest <- min(df$year)
  if (is.null(year_min)) {
    year_min <- year_lowest
  } else {
    year_min <- max(year_min, year_lowest, na.rm = TRUE)
  }
  if (is.null(year_max)) {
    year_max <- year(Sys.Date()) + 15
  }
  years_predict <- seq(from = year_min, to = year_max, by = year_every)
  if (model %in% c('binomial', 'binom', 'logit')) {
    logitmodel <- with(df, glm(cbind(R, S) ~ year, family = binomial))
    if (info == TRUE) {
      cat('\nLogistic regression model (logit) with binomial distribution')
      cat('\n------------------------------------------------------------\n')
      print(summary(logitmodel))
    }
    predictmodel <- predict(logitmodel, newdata = with(df, list(year = years_predict)), type = "response", se.fit = TRUE)
    prediction <- predictmodel$fit
    se <- predictmodel$se.fit
  } else if (model == 'loglin') {
    loglinmodel <- with(df, glm(R ~ year, family = poisson))
    if (info == TRUE) {
      cat('\nLog-linear regression model (loglin) with poisson distribution')
      cat('\n--------------------------------------------------------------\n')
      print(summary(loglinmodel))
    }
    predictmodel <- predict(loglinmodel, newdata = with(df, list(year = years_predict)), type = "response", se.fit = TRUE)
    prediction <- predictmodel$fit
    se <- predictmodel$se.fit
  } else if (model %in% c('lin', 'linear')) {
    linmodel <- with(df, lm((R / (R + S)) ~ year))
    if (info == TRUE) {
      cat('\nLinear regression model')
      cat('\n-----------------------\n')
      print(summary(linmodel))
    }
    predictmodel <- predict(linmodel, newdata = with(df, list(year = years_predict)), se.fit = TRUE)
    prediction <- predictmodel$fit
    se <- predictmodel$se.fit
  } else {
    stop('No valid model selected.')
  }
  # prepare the output dataframe
  prediction <- data.frame(year = years_predict, value = prediction, stringsAsFactors = FALSE)
  prediction$se_min <- prediction$value - se
  prediction$se_max <- prediction$value + se
  if (model == 'loglin') {
    prediction$value <- prediction$value %>%
      format(scientific = FALSE) %>%
      as.integer()
    prediction$se_min <- prediction$se_min %>% as.integer()
    prediction$se_max <- prediction$se_max %>% as.integer()
    colnames(prediction) <- c('year', 'amountR', 'se_max', 'se_min')
  } else {
    prediction$se_max[which(prediction$se_max > 1)] <- 1
  }
  prediction$se_min[which(prediction$se_min < 0)] <- 0
  prediction$observations = NA
  total <- prediction
  if (preserve_measurements == TRUE) {
    # replace estimated data by observed data
    if (I_as_R == TRUE) {
      if (!'I' %in% colnames(df)) {
        df$I <- 0
      }
      df$value <- df$R / rowSums(df[, c('R', 'S', 'I')])
    } else {
      df$value <- df$R / rowSums(df[, c('R', 'S')])
    }
    measurements <- data.frame(year = df$year,
                               value = df$value,
                               se_min = NA,
                               se_max = NA,
                               observations = df$total,
                               stringsAsFactors = FALSE)
    colnames(measurements) <- colnames(prediction)
    total <- rbind(measurements,
                   prediction %>% filter(!year %in% df$year))
    if (model %in% c('binomial', 'binom', 'logit')) {
      total <- total %>% mutate(observed = ifelse(is.na(observations), NA, value),
                                estimated = prediction$value)
    }
  }
  if ("value" %in% colnames(total)) {
    total <- total %>%
      mutate(value = case_when(value > 1 ~ 1,
                               value < 0 ~ 0,
                               TRUE ~ value))
  }
  total %>% arrange(year)
 }
 #' @rdname resistance_predict
 #' @export
 rsi_predict <- resistance_predict
--- a/README.md
+++ b/README.md
@@ -13,8 +13,8 @@ Erwin E.A. Hassing<sup>2</sup>,
 <a href="https://orcid.org/0000-0003-4881-038X"><img src="https://cran.r-project.org/web/orcid.svg" height="16px"></a> Alex W. Friedrich<sup>1,b</sup>,
 <a href="https://orcid.org/0000-0003-1634-0010"><img src="https://cran.r-project.org/web/orcid.svg" height="16px"></a> Bhanu Sinha<sup>1,b</sup>
-<sup>1</sup> Department of Medical Microbiology, University of Groningen, University Medical Center Groningen, Groningen, the Netherlands<br>
+<sup>1</sup> Department of Medical Microbiology, University of Groningen, University Medical Center Groningen, Groningen, the Netherlands - [rug.nl](http://www.rug.nl) [umcg.nl](http://www.umcg.nl)<br>
-<sup>2</sup> Certe Medical Diagnostics & Advice, Groningen, the Netherlands<br>
+<sup>2</sup> Certe Medical Diagnostics & Advice, Groningen, the Netherlands - [certe.nl](http://www.certe.nl)<br>
 <sup>a</sup> R package author and dissertant<br>
 <sup>b</sup> Thesis advisor
@@ -25,10 +25,10 @@ Erwin E.A. Hassing<sup>2</sup>,
 <a href="http://www.eurhealth-1health.eu"><img src="man/figures/logo_interreg.png" height="60px"></a>
 ## Why this package?
-This R package contains functions to make **microbiological, epidemiological data analysis easier**. It allows the use of some new classes to work with MIC values and antimicrobial interpretations (i.e. values S, I and R).
+This R package was intended to make microbial epidemiology easier. Most functions contain extensive help pages to get started.
 With `AMR` you can:
-* Calculate the resistance (and even co-resistance) of microbial isolates with the `rsi_R`, `rsi_IR`, `rsi_I`, `rsi_SI` and `rsi_S` functions, that can also be used with the `dplyr` package (e.g. in conjunction with `summarise`)
+* Calculate the resistance (and even co-resistance) of microbial isolates with the `portion_R`, `portion_IR`, `portion_I`, `portion_SI` and `portion_S` functions, that can also be used with the `dplyr` package (e.g. in conjunction with `summarise`)
 * Predict antimicrobial resistance for the nextcoming years with the `resistance_predict` function
 * Apply [EUCAST rules to isolates](http://www.eucast.org/expert_rules_and_intrinsic_resistance/) with the `EUCAST_rules` function
 * Identify first isolates of every patient [using guidelines from the CLSI](https://clsi.org/standards/products/microbiology/documents/m39/) (Clinical and Laboratory Standards Institute) with the `first_isolate` function
@@ -50,7 +50,7 @@ And it contains:
 With the `MDRO` function (abbreviation of Multi Drug Resistant Organisms), you can check your isolates for exceptional resistance with country-specific guidelines or EUCAST rules. Currently guidelines for Germany and the Netherlands are supported. Please suggest addition of your own country here: [https://github.com/msberends/AMR/issues/new](https://github.com/msberends/AMR/issues/new?title=New%20guideline%20for%20MDRO&body=%3C--%20Please%20add%20your%20country%20code,%20guideline%20name,%20version%20and%20source%20below%20and%20remove%20this%20line--%3E).
-The functions to calculate microbial resistance use expressions that are not evaluated by R itself, but by alternative C++ code that is 25 to 30 times faster and uses less memory. This is called *hybrid evaluation*.
+The functions to calculate microbial resistance use expressions that are not evaluated by R itself, but by alternative C++ code that is 25 to 30 times faster than R and uses less memory. This is called *hybrid evaluation*.
 #### Read all changes and new functions in [NEWS.md](NEWS.md).
--- a/man/first_isolate.Rd
+++ b/man/first_isolate.Rd
@@ -75,31 +75,32 @@ Determine first (weighted) isolates of all microorganisms of every patient per e
 }
 \examples{
-# septic_patients is a dataset available in the AMR package
+# septic_patients is a dataset available in the AMR package. It is true data.
 ?septic_patients
 my_patients <- septic_patients
 library(dplyr)
-my_patients$first_isolate <- my_patients \%>\%
+my_patients <- septic_patients \%>\%
-  first_isolate(col_date = "date",
+  mutate(first_isolate = first_isolate(.,
-                col_patient_id = "patient_id",
+                                       col_date = "date",
-                col_bactid = "bactid")
+                                       col_patient_id = "patient_id",
                                       col_bactid = "bactid"))
 # Now let's see if first isolates matter:
 A <- my_patients \%>\%
  group_by(hospital_id) \%>\%
-  summarise(count = n_rsi(gent), # gentamicin
+  summarise(count = n_rsi(gent),            # gentamicin availability
-            resistance = resistance(gent))
+            resistance = portion_IR(gent))  # gentamicin resistance
 B <- my_patients \%>\%
-  filter(first_isolate == TRUE) \%>\% # the 1st isolate filter
+  filter(first_isolate == TRUE) \%>\%         # the 1st isolate filter
  group_by(hospital_id) \%>\%
-  summarise(count = n_rsi(gent),
+  summarise(count = n_rsi(gent),            # gentamicin availability
-            resistance = resistance(gent))
+            resistance = portion_IR(gent))  # gentamicin resistance
-# Have a look at A and B. B is more reliable because every isolate is
+# Have a look at A and B.
-# counted once. Gentamicin resitance in hospital D appears to be 5\%
+# B is more reliable because every isolate is only counted once.
-# higher than originally thought.
+# Gentamicin resitance in hospital D appears to be 5.4\% higher than
 # when you (erroneously) would have used all isolates!
 ## OTHER EXAMPLES:
--- a/man/n_rsi.Rd
+++ b/man/n_rsi.Rd
@@ -0,0 +1,29 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/n_rsi.R
 \name{n_rsi}
 \alias{n_rsi}
 \title{Count cases with antimicrobial results}
 \usage{
 n_rsi(ab1, ab2 = NULL)
 }
 \arguments{
 \item{ab1, ab2}{vector of antibiotic interpretations, they will be transformed internally with \code{\link{as.rsi}} if needed}
 }
 \description{
 This counts all cases where antimicrobial interpretations are available. Its use is equal to \code{\link{n_distinct}}.
 }
 \examples{
 library(dplyr)
 septic_patients \%>\%
  group_by(hospital_id) \%>\%
  summarise(cipro_p = portion_S(cipr, as_percent = TRUE),
            cipro_n = n_rsi(cipr),
            genta_p = portion_S(gent, as_percent = TRUE),
            genta_n = n_rsi(gent),
            combination_p = portion_S(cipr, gent, as_percent = TRUE),
            combination_n = n_rsi(cipr, gent))
 }
 \seealso{
 The \code{\link{portion}} functions to calculate resistance and susceptibility.
 }
--- a/man/portion.Rd
+++ b/man/portion.Rd
@@ -0,0 +1,127 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/portion.R
 \name{portion}
 \alias{portion}
 \alias{portion_R}
 \alias{portion_IR}
 \alias{portion_I}
 \alias{portion_SI}
 \alias{portion_S}
 \title{Calculate resistance of isolates}
 \source{
 \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition}, 2014, \emph{Clinical and Laboratory Standards Institute (CLSI)}. \url{https://clsi.org/standards/products/microbiology/documents/m39/}.
 }
 \usage{
 portion_R(ab1, ab2 = NULL, minimum = 30, as_percent = FALSE)
 portion_IR(ab1, ab2 = NULL, minimum = 30, as_percent = FALSE)
 portion_I(ab1, minimum = 30, as_percent = FALSE)
 portion_SI(ab1, ab2 = NULL, minimum = 30, as_percent = FALSE)
 portion_S(ab1, ab2 = NULL, minimum = 30, as_percent = FALSE)
 }
 \arguments{
 \item{ab1}{vector of antibiotic interpretations, they will be transformed internally with \code{\link{as.rsi}} if needed}
 \item{ab2}{like \code{ab}, a vector of antibiotic interpretations. Use this to calculate (the lack of) co-resistance: the probability where one of two drugs have a resistant or susceptible result. See Examples.}
 \item{minimum}{minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA}. The default number of \code{30} isolates is advised by the CLSI as best practice, see Source.}
 \item{as_percent}{logical to indicate whether the output must be returned as percent (text), will else be a double}
 }
 \value{
 Double or, when \code{as_percent = TRUE}, a character.
 }
 \description{
 These functions can be used to calculate the (co-)resistance of microbial isolates (i.e. percentage S, SI, I, IR or R). All functions can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}.
 \code{portion_R} and \code{portion_IR} can be used to calculate resistance, \code{portion_S} and \code{portion_SI} can be used to calculate susceptibility.\cr
 }
 \details{
 \strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set.
 The old \code{\link{rsi}} function is still available for backwards compatibility but is deprecated.
 \if{html}{
  \cr\cr
  To calculate the probability (\emph{p}) of susceptibility of one antibiotic, we use this formula:
  \out{<div style="text-align: center">}\figure{mono_therapy.png}\out{</div>}
  To calculate the probability (\emph{p}) of susceptibility of more antibiotics (i.e. combination therapy), we need to check whether one of them has a susceptible result (as numerator) and count all cases where all antibiotics were tested (as denominator). \cr
  \cr
  For two antibiotics:
  \out{<div style="text-align: center">}\figure{combi_therapy_2.png}\out{</div>}
  \cr
  Theoretically for three antibiotics:
  \out{<div style="text-align: center">}\figure{combi_therapy_3.png}\out{</div>}
 }
 }
 \examples{
 # Calculate resistance
 portion_R(septic_patients$amox)
 portion_IR(septic_patients$amox)
 # Or susceptibility
 portion_S(septic_patients$amox)
 portion_SI(septic_patients$amox)
 # Since n_rsi counts available isolates (and is used as denominator),
 # you can calculate back to count e.g. non-susceptible isolates:
 portion_IR(septic_patients$amox) * n_rsi(septic_patients$amox)
 library(dplyr)
 septic_patients \%>\%
  group_by(hospital_id) \%>\%
  summarise(p = portion_S(cipr),
            n = n_rsi(cipr)) # n_rsi works like n_distinct in dplyr
 septic_patients \%>\%
  group_by(hospital_id) \%>\%
  summarise(R = portion_R(cipr, as_percent = TRUE),
            I = portion_I(cipr, as_percent = TRUE),
            S = portion_S(cipr, as_percent = TRUE),
            n = n_rsi(cipr), # works like n_distinct in dplyr
            total = n())     # NOT the amount of tested isolates!
 # Calculate co-resistance between amoxicillin/clav acid and gentamicin,
 # so we can see that combination therapy does a lot more than mono therapy:
 portion_S(septic_patients$amcl) # S = 67.3\%
 n_rsi(septic_patients$amcl)     # n = 1570
 portion_S(septic_patients$gent) # S = 74.0\%
 n_rsi(septic_patients$gent)     # n = 1842
 with(septic_patients,
     portion_S(amcl, gent))     # S = 92.1\%
 with(septic_patients,           # n = 1504
     n_rsi(amcl, gent))
 septic_patients \%>\%
  group_by(hospital_id) \%>\%
  summarise(cipro_p = portion_S(cipr, as_percent = TRUE),
            cipro_n = n_rsi(cipr),
            genta_p = portion_S(gent, as_percent = TRUE),
            genta_n = n_rsi(gent),
            combination_p = portion_S(cipr, gent, as_percent = TRUE),
            combination_n = n_rsi(cipr, gent))
 \dontrun{
 # calculate current empiric combination therapy of Helicobacter gastritis:
 my_table \%>\%
  filter(first_isolate == TRUE,
         genus == "Helicobacter") \%>\%
  summarise(p = portion_S(amox, metr),  # amoxicillin with metronidazole
            n = n_rsi(amox, metr))
 }
 }
 \seealso{
 \code{\link{n_rsi}} to count cases with antimicrobial results.
 }
 \keyword{antibiotics}
 \keyword{isolate}
 \keyword{isolates}
 \keyword{resistance}
 \keyword{rsi}
 \keyword{rsi_df}
 \keyword{susceptibility}
--- a/man/resistance_predict.Rd
+++ b/man/resistance_predict.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/rsi_IR.R
+% Please edit documentation in R/resistance_predict.R
 \name{resistance_predict}
 \alias{resistance_predict}
 \alias{rsi_predict}
@@ -118,5 +118,5 @@ if (!require(ggplot2)) {
 }
 }
 \seealso{
-\code{\link{resistance}} \cr \code{\link{lm}} \code{\link{glm}}
+The \code{\link{portion}} function to calculate resistance, \cr \code{\link{lm}} \code{\link{glm}}
 }
--- a/man/rsi.Rd
+++ b/man/rsi.Rd
@@ -1,106 +1,25 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/rsi_IR.R
+% Please edit documentation in R/rsi.R
 \name{rsi}
 \alias{rsi}
 \title{Calculate resistance of isolates}
 \usage{
-rsi(ab1, ab2 = NA, interpretation = "IR", minimum = 30,
+rsi(ab1, ab2 = NULL, interpretation = "IR", minimum = 30,
-  as_percent = FALSE, info = FALSE, warning = TRUE)
+  as_percent = FALSE, ...)
 }
 \arguments{
-\item{ab1}{vector of antibiotic interpretations, they will be transformed internally with \code{\link{as.rsi}}}
+\item{ab1}{vector of antibiotic interpretations, they will be transformed internally with \code{\link{as.rsi}} if needed}
-\item{ab2}{like \code{ab}, a vector of antibiotic interpretations. Use this to calculate (the lack of) co-resistance: the probability where one of two drugs have a susceptible result. See Examples.}
+\item{ab2}{like \code{ab}, a vector of antibiotic interpretations. Use this to calculate (the lack of) co-resistance: the probability where one of two drugs have a resistant or susceptible result. See Examples.}
-\item{interpretation}{antimicrobial interpretation}
+\item{interpretation}{antimicrobial interpretation to check for}
 \item{minimum}{minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA}. The default number of \code{30} isolates is advised by the CLSI as best practice, see Source.}
 \item{as_percent}{logical to indicate whether the output must be returned as percent (text), will else be a double}
-\item{info}{calculate the amount of available isolates and print it, like \code{n = 423}}
+\item{...}{deprecated parameters to support usage on older versions}
 \item{warning}{show a warning when the available amount of isolates is below \code{minimum}}
 }
 \value{
 Double or, when \code{as_percent = TRUE}, a character.
 }
 \description{
-This function is deprecated. Use \code{\link{rsi_IR}} instead.
+This function is deprecated. Use the \code{\link{portion}} functions instead.
 }
 \details{
 \strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set.
 The functions \code{resistance} and \code{susceptibility} are wrappers around \code{rsi_IR} and \code{rsi_S}, respectively. All functions use hybrid evaluation (i.e. using C++), which makes these functions 20-30 times faster than the old \code{\link{rsi}} function. This latter function is still available for backwards compatibility but is deprecated.
 \if{html}{
  \cr\cr
  To calculate the probability (\emph{p}) of susceptibility of one antibiotic, we use this formula:
  \out{<div style="text-align: center">}\figure{mono_therapy.png}\out{</div>}
  To calculate the probability (\emph{p}) of susceptibility of more antibiotics (i.e. combination therapy), we need to check whether one of them has a susceptible result (as numerator) and count all cases where all antibiotics were tested (as denominator). \cr
  \cr
  For two antibiotics:
  \out{<div style="text-align: center">}\figure{combi_therapy_2.png}\out{</div>}
  \cr
  Theoretically for three antibiotics:
  \out{<div style="text-align: center">}\figure{combi_therapy_3.png}\out{</div>}
 }
 }
 \examples{
 # Calculate resistance
 rsi_R(septic_patients$amox)
 rsi_IR(septic_patients$amox)
 # Or susceptibility
 rsi_S(septic_patients$amox)
 rsi_SI(septic_patients$amox)
 # Since n_rsi counts available isolates (and is used as denominator),
 # you can calculate back to e.g. count resistant isolates:
 rsi_IR(septic_patients$amox) * n_rsi(septic_patients$amox)
 library(dplyr)
 septic_patients \%>\%
  group_by(hospital_id) \%>\%
  summarise(p = rsi_S(cipr),
            n = rsi_n(cipr)) # n_rsi works like n_distinct in dplyr
 septic_patients \%>\%
  group_by(hospital_id) \%>\%
  summarise(R = rsi_R(cipr, as_percent = TRUE),
            I = rsi_I(cipr, as_percent = TRUE),
            S = rsi_S(cipr, as_percent = TRUE),
            n = rsi_n(cipr), # also: n_rsi, works like n_distinct in dplyr
            total = n()) # this is the length, NOT the amount of tested isolates
 # Calculate co-resistance between amoxicillin/clav acid and gentamicin,
 # so we can see that combination therapy does a lot more than mono therapy:
 rsi_S(septic_patients$amcl) # S = 67.3\%
 rsi_n(septic_patients$amcl) # n = 1570
 rsi_S(septic_patients$gent) # S = 74.0\%
 rsi_n(septic_patients$gent) # n = 1842
 with(septic_patients,
     rsi_S(amcl, gent))     # S = 92.1\%
 with(septic_patients,       # n = 1504
     rsi_n(amcl, gent))
 septic_patients \%>\%
  group_by(hospital_id) \%>\%
  summarise(cipro_p = rsi_S(cipr, as_percent = TRUE),
            cipro_n = rsi_n(cipr),
            genta_p = rsi_S(gent, as_percent = TRUE),
            genta_n = rsi_n(gent),
            combination_p = rsi_S(cipr, gent, as_percent = TRUE),
            combination_n = rsi_n(cipr, gent))
 \dontrun{
 # calculate current empiric combination therapy of Helicobacter gastritis:
 my_table \%>\%
  filter(first_isolate == TRUE,
         genus == "Helicobacter") \%>\%
  summarise(p = rsi_S(amox, metr),  # amoxicillin with metronidazole
            n = rsi_n(amox, metr))
 }
 }
--- a/man/rsi_IR.Rd
+++ b/man/rsi_IR.Rd
@@ -1,141 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/rsi_IR.R
 \name{rsi_IR}
 \alias{rsi_IR}
 \alias{rsi_R}
 \alias{rsi_I}
 \alias{rsi_SI}
 \alias{rsi_S}
 \alias{resistance}
 \alias{intermediate}
 \alias{susceptibility}
 \alias{rsi_n}
 \alias{n_rsi}
 \title{Calculate resistance of isolates}
 \source{
 \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition}, 2014, \emph{Clinical and Laboratory Standards Institute (CLSI)}. \url{https://clsi.org/standards/products/microbiology/documents/m39/}.
 }
 \usage{
 rsi_R(ab1, minimum = 30, as_percent = FALSE)
 rsi_IR(ab1, minimum = 30, as_percent = FALSE)
 rsi_I(ab1, minimum = 30, as_percent = FALSE)
 rsi_SI(ab1, ab2 = NULL, minimum = 30, as_percent = FALSE)
 rsi_S(ab1, ab2 = NULL, minimum = 30, as_percent = FALSE)
 resistance(ab1, include_I = TRUE, minimum = 30, as_percent = FALSE)
 intermediate(ab1, minimum = 30, as_percent = FALSE)
 susceptibility(ab1, ab2 = NULL, include_I = FALSE, minimum = 30,
  as_percent = FALSE)
 rsi_n(ab1, ab2 = NULL)
 n_rsi(ab1, ab2 = NULL)
 }
 \arguments{
 \item{ab1}{vector of antibiotic interpretations, they will be transformed internally with \code{\link{as.rsi}}}
 \item{minimum}{minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA}. The default number of \code{30} isolates is advised by the CLSI as best practice, see Source.}
 \item{as_percent}{logical to indicate whether the output must be returned as percent (text), will else be a double}
 \item{ab2}{like \code{ab}, a vector of antibiotic interpretations. Use this to calculate (the lack of) co-resistance: the probability where one of two drugs have a susceptible result. See Examples.}
 \item{include_I}{logical to indicate whether antimicrobial interpretations of "I" should be included}
 }
 \value{
 Double or, when \code{as_percent = TRUE}, a character.
 }
 \description{
 These functions can be used to calculate the (co-)resistance of microbial isolates (i.e. percentage S, SI, I, IR or R). All functions can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}. \cr\cr
 \code{rsi_R} and \code{rsi_IR} can be used to calculate resistance, \code{rsi_S} and \code{rsi_SI} can be used to calculate susceptibility.\cr
 \code{rsi_n} counts all cases where antimicrobial interpretations are available.
 }
 \details{
 \strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set.
 The functions \code{resistance} and \code{susceptibility} are wrappers around \code{rsi_IR} and \code{rsi_S}, respectively. All functions use hybrid evaluation (i.e. using C++), which makes these functions 20-30 times faster than the old \code{\link{rsi}} function. This latter function is still available for backwards compatibility but is deprecated.
 \if{html}{
  \cr\cr
  To calculate the probability (\emph{p}) of susceptibility of one antibiotic, we use this formula:
  \out{<div style="text-align: center">}\figure{mono_therapy.png}\out{</div>}
  To calculate the probability (\emph{p}) of susceptibility of more antibiotics (i.e. combination therapy), we need to check whether one of them has a susceptible result (as numerator) and count all cases where all antibiotics were tested (as denominator). \cr
  \cr
  For two antibiotics:
  \out{<div style="text-align: center">}\figure{combi_therapy_2.png}\out{</div>}
  \cr
  Theoretically for three antibiotics:
  \out{<div style="text-align: center">}\figure{combi_therapy_3.png}\out{</div>}
 }
 }
 \examples{
 # Calculate resistance
 rsi_R(septic_patients$amox)
 rsi_IR(septic_patients$amox)
 # Or susceptibility
 rsi_S(septic_patients$amox)
 rsi_SI(septic_patients$amox)
 # Since n_rsi counts available isolates (and is used as denominator),
 # you can calculate back to e.g. count resistant isolates:
 rsi_IR(septic_patients$amox) * n_rsi(septic_patients$amox)
 library(dplyr)
 septic_patients \%>\%
  group_by(hospital_id) \%>\%
  summarise(p = rsi_S(cipr),
            n = rsi_n(cipr)) # n_rsi works like n_distinct in dplyr
 septic_patients \%>\%
  group_by(hospital_id) \%>\%
  summarise(R = rsi_R(cipr, as_percent = TRUE),
            I = rsi_I(cipr, as_percent = TRUE),
            S = rsi_S(cipr, as_percent = TRUE),
            n = rsi_n(cipr), # also: n_rsi, works like n_distinct in dplyr
            total = n()) # this is the length, NOT the amount of tested isolates
 # Calculate co-resistance between amoxicillin/clav acid and gentamicin,
 # so we can see that combination therapy does a lot more than mono therapy:
 rsi_S(septic_patients$amcl) # S = 67.3\%
 rsi_n(septic_patients$amcl) # n = 1570
 rsi_S(septic_patients$gent) # S = 74.0\%
 rsi_n(septic_patients$gent) # n = 1842
 with(septic_patients,
     rsi_S(amcl, gent))     # S = 92.1\%
 with(septic_patients,       # n = 1504
     rsi_n(amcl, gent))
 septic_patients \%>\%
  group_by(hospital_id) \%>\%
  summarise(cipro_p = rsi_S(cipr, as_percent = TRUE),
            cipro_n = rsi_n(cipr),
            genta_p = rsi_S(gent, as_percent = TRUE),
            genta_n = rsi_n(gent),
            combination_p = rsi_S(cipr, gent, as_percent = TRUE),
            combination_n = rsi_n(cipr, gent))
 \dontrun{
 # calculate current empiric combination therapy of Helicobacter gastritis:
 my_table \%>\%
  filter(first_isolate == TRUE,
         genus == "Helicobacter") \%>\%
  summarise(p = rsi_S(amox, metr),  # amoxicillin with metronidazole
            n = rsi_n(amox, metr))
 }
 }
 \keyword{antibiotics}
 \keyword{isolate}
 \keyword{isolates}
 \keyword{resistance}
 \keyword{rsi}
 \keyword{rsi_df}
 \keyword{susceptibility}
--- a/man/septic_patients.Rd
+++ b/man/septic_patients.Rd
@@ -50,7 +50,7 @@ my_data \%>\%
         first_isolates == TRUE) \%>\%
  group_by(hospital_id) \%>\%
  summarise(n = n_rsi(amox),
-            p = resistance(amox))
+            p = portion_IR(amox))
 # 2. Get the amoxicillin/clavulanic acid resistance
@@ -61,6 +61,6 @@ my_data \%>\%
         first_isolates == TRUE) \%>\%
  group_by(year = format(date, "\%Y")) \%>\%
  summarise(n = n_rsi(amcl),
-            p = resistance(amcl, minimum = 20))
+            p = portion_IR(amcl, minimum = 20))
 }
 \keyword{datasets}
--- a/tests/testthat/test-portion.R
+++ b/tests/testthat/test-portion.R
@@ -1,29 +1,17 @@
-context("rsi_IR.R")
+context("portion.R")
 test_that("resistance works", {
  # check shortcuts
  expect_equal(resistance(septic_patients$amox, include_I = TRUE),
               rsi_IR(septic_patients$amox))
  expect_equal(resistance(septic_patients$amox, include_I = FALSE),
               rsi_R(septic_patients$amox))
  expect_equal(intermediate(septic_patients$amox),
               rsi_I(septic_patients$amox))
  expect_equal(susceptibility(septic_patients$amox, include_I = TRUE),
               rsi_SI(septic_patients$amox))
  expect_equal(susceptibility(septic_patients$amox, include_I = FALSE),
               rsi_S(septic_patients$amox))
  # amox resistance in `septic_patients`
-  expect_equal(rsi_R(septic_patients$amox), 0.6603, tolerance = 0.0001)
+  expect_equal(portion_R(septic_patients$amox), 0.6603, tolerance = 0.0001)
-  expect_equal(rsi_I(septic_patients$amox), 0.0030, tolerance = 0.0001)
+  expect_equal(portion_I(septic_patients$amox), 0.0030, tolerance = 0.0001)
-  expect_equal(1 - rsi_R(septic_patients$amox) - rsi_I(septic_patients$amox),
+  expect_equal(1 - portion_R(septic_patients$amox) - portion_I(septic_patients$amox),
-               rsi_S(septic_patients$amox))
+               portion_S(septic_patients$amox))
  expect_equal(portion_R(septic_patients$amox) + portion_I(septic_patients$amox),
               portion_IR(septic_patients$amox))
  expect_equal(portion_S(septic_patients$amox) + portion_I(septic_patients$amox),
               portion_SI(septic_patients$amox))
  # pita+genta susceptibility around 98.09%
  expect_equal(susceptibility(septic_patients$pita,
                              septic_patients$gent),
               0.9535,
               tolerance = 0.0001)
  expect_equal(suppressWarnings(rsi(septic_patients$pita,
                                    septic_patients$gent,
                                    interpretation = "S")),
@@ -33,10 +21,10 @@ test_that("resistance works", {
  # percentages
  expect_equal(septic_patients %>%
                 group_by(hospital_id) %>%
-                 summarise(R = rsi_R(cipr, as_percent = TRUE),
+                 summarise(R = portion_R(cipr, as_percent = TRUE),
-                           I = rsi_I(cipr, as_percent = TRUE),
+                           I = portion_I(cipr, as_percent = TRUE),
-                           S = rsi_S(cipr, as_percent = TRUE),
+                           S = portion_S(cipr, as_percent = TRUE),
-                           n = rsi_n(cipr),
+                           n = n_rsi(cipr),
                           total = n()) %>%
                 pull(n) %>%
                 sum(),
@@ -45,45 +33,45 @@ test_that("resistance works", {
  # count of cases
  expect_equal(septic_patients %>%
                 group_by(hospital_id) %>%
-                 summarise(cipro_p = susceptibility(cipr, as_percent = TRUE),
+                 summarise(cipro_p = portion_S(cipr, as_percent = TRUE),
                           cipro_n = n_rsi(cipr),
-                           genta_p = susceptibility(gent, as_percent = TRUE),
+                           genta_p = portion_S(gent, as_percent = TRUE),
                           genta_n = n_rsi(gent),
-                           combination_p = susceptibility(cipr, gent, as_percent = TRUE),
+                           combination_p = portion_S(cipr, gent, as_percent = TRUE),
-                           combination_n = rsi_n(cipr, gent)) %>%
+                           combination_n = n_rsi(cipr, gent)) %>%
                 pull(combination_n),
               c(202, 482, 201, 499))
-  expect_warning(resistance(as.character(septic_patients$amcl)))
+  expect_warning(portion_R(as.character(septic_patients$amcl)))
-  expect_warning(susceptibility(as.character(septic_patients$amcl)))
+  expect_warning(portion_S(as.character(septic_patients$amcl)))
-  expect_warning(susceptibility(as.character(septic_patients$amcl,
+  expect_warning(portion_S(as.character(septic_patients$amcl,
                                             septic_patients$gent)))
  # check for errors
-  expect_error(rsi_IR(septic_patients %>% select(amox, amcl)))
+  expect_error(portion_IR(septic_patients %>% select(amox, amcl)))
-  expect_error(rsi_IR("test", minimum = "test"))
+  expect_error(portion_IR("test", minimum = "test"))
-  expect_error(rsi_IR("test", as_percent = "test"))
+  expect_error(portion_IR("test", as_percent = "test"))
-  expect_error(rsi_I(septic_patients %>% select(amox, amcl)))
+  expect_error(portion_I(septic_patients %>% select(amox, amcl)))
-  expect_error(rsi_I("test", minimum = "test"))
+  expect_error(portion_I("test", minimum = "test"))
-  expect_error(rsi_I("test", as_percent = "test"))
+  expect_error(portion_I("test", as_percent = "test"))
-  expect_error(rsi_S("test", minimum = "test"))
+  expect_error(portion_S("test", minimum = "test"))
-  expect_error(rsi_S("test", as_percent = "test"))
+  expect_error(portion_S("test", as_percent = "test"))
-  expect_error(rsi_S(septic_patients %>% select(amox, amcl)))
+  expect_error(portion_S(septic_patients %>% select(amox, amcl)))
-  expect_error(rsi_S("R", septic_patients %>% select(amox, amcl)))
+  expect_error(portion_S("R", septic_patients %>% select(amox, amcl)))
  # check too low amount of isolates
-  expect_identical(rsi_R(septic_patients$amox, minimum = nrow(septic_patients) + 1),
+  expect_identical(portion_R(septic_patients$amox, minimum = nrow(septic_patients) + 1),
                   NA)
-  expect_identical(rsi_I(septic_patients$amox, minimum = nrow(septic_patients) + 1),
+  expect_identical(portion_I(septic_patients$amox, minimum = nrow(septic_patients) + 1),
                   NA)
-  expect_identical(rsi_S(septic_patients$amox, minimum = nrow(septic_patients) + 1),
+  expect_identical(portion_S(septic_patients$amox, minimum = nrow(septic_patients) + 1),
                   NA)
  # warning for speed loss
-  expect_warning(rsi_R(as.character(septic_patients$gent)))
+  expect_warning(portion_R(as.character(septic_patients$gent)))
-  expect_warning(rsi_I(as.character(septic_patients$gent)))
+  expect_warning(portion_I(as.character(septic_patients$gent)))
-  expect_warning(rsi_S(septic_patients$amcl, as.character(septic_patients$gent)))
+  expect_warning(portion_S(septic_patients$amcl, as.character(septic_patients$gent)))
 })
@@ -91,11 +79,7 @@ test_that("old rsi works", {
  # amox resistance in `septic_patients` should be around 66.33%
  expect_equal(suppressWarnings(rsi(septic_patients$amox)), 0.6633, tolerance = 0.0001)
  expect_equal(suppressWarnings(rsi(septic_patients$amox, interpretation = "S")), 1 - 0.6633, tolerance = 0.0001)
-  expect_equal(rsi_df(septic_patients,
+
                      ab = "amox",
                      info = TRUE),
               0.6633,
               tolerance = 0.0001)
  # pita+genta susceptibility around 98.09%
  expect_equal(suppressWarnings(rsi(septic_patients$pita,
                                    septic_patients$gent,
@@ -103,17 +87,6 @@ test_that("old rsi works", {
                                    info = TRUE)),
               0.9535,
               tolerance = 0.0001)
  expect_equal(rsi_df(septic_patients,
                      ab = c("pita", "gent"),
                      interpretation = "S",
                      info = TRUE),
               0.9535,
               tolerance = 0.0001)
  # more than 2 not allowed
  expect_error(rsi_df(septic_patients,
                      ab = c("mero", "pita", "gent"),
                      interpretation = "IS",
                      info = TRUE))
  # count of cases
  expect_equal(septic_patients %>%