update to septic_patients, speed improvements

2025-07-04 06:38:35 +02:00 · 2018-07-25 14:17:04 +02:00 · 2018-07-25 14:17:04 +02:00 · d9e204031d
commit d9e204031d
parent 03a3cb397b
26 changed files with 273 additions and 233 deletions
--- a/4
+++ b/4
@ -1,6 +1,6 @@
 Package: AMR
-Version: 0.2.0.9015
-Date: 2018-07-23
+Version: 0.2.0.9016
+Date: 2018-07-25
 Title: Antimicrobial Resistance Analysis
 Authors@R: c(
    person(
--- a/1
+++ b/1
@ -154,7 +154,6 @@ importFrom(utils,View)
 importFrom(utils,browseVignettes)
 importFrom(utils,installed.packages)
 importFrom(utils,object.size)
-importFrom(utils,packageDescription)
 importFrom(utils,read.delim)
 importFrom(utils,write.table)
 importFrom(xml2,read_html)
--- a/NEWS.md
+++ b/NEWS.md
@ -2,8 +2,9 @@
 #### New
 * **BREAKING**: `rsi_df` was removed in favour of new functions `resistance` and `susceptibility`. Now, all functions used to calculate resistance (`resistance` and `susceptibility`) use **hybrid evaluation**. This means calculations are not done in R directly but rather in C++ using the `Rcpp` package, making them 25 to 30 times faster. The function `rsi` still works, but is deprecated.
 * **BREAKING**: the methodology for determining first weighted isolates was changed. The antibiotics that are compared between isolates (call *key antibiotics*) to include more first isolates (afterwards called first *weighted* isolates) are now as follows:
-  * Gram-positive: amoxicillin, amoxicillin/clavlanic acid, cefuroxime, piperacillin/tazobactam, ciprofloxacin, trimethoprim/sulfamethoxazole, vancomycin, teicoplanin, tetracycline, erythromycin, oxacillin, rifampicin
-  * Gram-negative: amoxicillin, amoxicillin/clavlanic acid, cefuroxime, piperacillin/tazobactam, ciprofloxacin, trimethoprim/sulfamethoxazole, gentamicin, tobramycin, colistin, cefotaxime, ceftazidime, meropenem
+  * Universal: amoxicillin, amoxicillin/clavlanic acid, cefuroxime, piperacillin/tazobactam, ciprofloxacin,  trimethoprim/sulfamethoxazole
+  * Gram-positive: vancomycin, teicoplanin, tetracycline, erythromycin, oxacillin, rifampicin
+  * Gram-negative: gentamicin, tobramycin, colistin, cefotaxime, ceftazidime, meropenem
 * Functions `as.bactid` and `is.bactid` to transform/look up microbial ID's; this replaces the function `guess_bactid` but it will remain available for backwards compatibility
 * For convience, new descriptive statistical functions `kurtosis` and `skewness` that are lacking in base R - they are generic functions and have support for vectors, data.frames and matrices
 * Function `g.test` to perform the Χ<sup>2</sup> distributed [*G*-test](https://en.wikipedia.org/wiki/G-test), which use is the same as `chisq.test`
@ -23,6 +24,7 @@
  * Possibility to globally set the default for the amount of items to print, with `options(max.print.freq = n)` where *n* is your preset value

 #### Changed
+* Updates version of the `setic_patients` dataset to better reflect the reality
 * Pretty printing for tibbles removed as it is not really the scope of this package
 * Improved speed of key antibiotics comparison for determining first isolates
 * Column names for the `key_antibiotics` function are now generic: 6 for broadspectrum ABs, 6 for Gram-positive specific and 6 for Gram-negative specific ABs
@ -36,6 +38,7 @@
  * `as.rsi("<=0.002; S")` will return `S`
  * `as.mic("<=0.002; S")` will return `<=0.002`
 * Now possible to coerce MIC values with a space between operator and value, i.e. `as.mic("<= 0.002")` now works
+* Classes `rsi` and `mic` do not add the attribute `package.version` anymore
 * Added `"groups"` option for `atc_property(..., property)`. It will return a vector of the ATC hierarchy as defined by the [WHO](https://www.whocc.no/atc/structure_and_principles/). The new function `atc_groups` is a convenient wrapper around this.
 * Build-in host check for `atc_property` as it requires the host set by `url` to be responsive
 * Improved `first_isolate` algorithm to exclude isolates where bacteria ID or genus is unavailable
--- a/R/bactid.R
+++ b/R/bactid.R
@ -221,7 +221,6 @@ as.bactid <- function(x) {
  }
  class(x) <- "bactid"
  attr(x, 'package') <- 'AMR'
-  attr(x, 'package.version') <- packageDescription('AMR')$Version
  x
 }

--- a/R/classes.R
+++ b/R/classes.R
@ -21,18 +21,18 @@
 #' This transforms a vector to a new class \code{rsi}, which is an ordered factor with levels \code{S < I < R}. Invalid antimicrobial interpretations will be translated as \code{NA} with a warning.
 #' @rdname as.rsi
 #' @param x vector
-#' @return Ordered factor with new class \code{rsi} and new attributes \code{package} and \code{package.version}
+#' @return Ordered factor with new class \code{rsi} and new attribute \code{package}
 #' @keywords rsi
 #' @export
 #' @importFrom dplyr %>%
-#' @importFrom utils packageDescription
+#' @seealso \code{\link{as.mic}}
 #' @examples
 #' rsi_data <- as.rsi(c(rep("S", 474), rep("I", 36), rep("R", 370)))
 #' rsi_data <- as.rsi(c(rep("S", 474), rep("I", 36), rep("R", 370), "A", "B", "C"))
 #' is.rsi(rsi_data)
 #'
 #' # this can also coerce combined MIC/RSI values:
-#' as.rsi("<= 0.002; R") # will return R
+#' as.rsi("<= 0.002; S") # will return S
 #'
 #' plot(rsi_data)    # for percentages
 #' barplot(rsi_data) # for frequencies
@ -76,7 +76,6 @@ as.rsi <- function(x) {
    x <- x %>% factor(levels = c("S", "I", "R"), ordered = TRUE)
    class(x) <- c('rsi', 'ordered', 'factor')
    attr(x, 'package') <- 'AMR'
-    attr(x, 'package.version') <- packageDescription('AMR')$Version
    x
  }
 }
@ -196,21 +195,21 @@ barplot.rsi <- function(height, ...) {

 #' Class 'mic'
 #'
-#' This transforms a vector to a new class\code{mic}, which is an ordered factor with valid MIC values as levels. Invalid MIC values will be translated as \code{NA} with a warning.
+#' This transforms a vector to a new class \code{mic}, which is an ordered factor with valid MIC values as levels. Invalid MIC values will be translated as \code{NA} with a warning.
 #' @rdname as.mic
 #' @param x vector
 #' @param na.rm a logical indicating whether missing values should be removed
-#' @return Ordered factor with new class \code{mic} and new attributes \code{package} and \code{package.version}
+#' @return Ordered factor with new class \code{mic} and new attribute \code{package}
 #' @keywords mic
 #' @export
 #' @importFrom dplyr %>%
-#' @importFrom utils packageDescription
+#' @seealso \code{\link{as.rsi}}
 #' @examples
 #' mic_data <- as.mic(c(">=32", "1.0", "1", "1.00", 8, "<=0.128", "8", "16", "16"))
 #' is.mic(mic_data)
 #'
 #' # this can also coerce combined MIC/RSI values:
-#' as.mic("<=0.002; R") # will return <=0.002
+#' as.mic("<=0.002; S") # will return <=0.002
 #'
 #' plot(mic_data)
 #' barplot(mic_data)
@ -319,7 +318,6 @@ as.mic <- function(x, na.rm = FALSE) {
                ordered = TRUE)
    class(x) <- c('mic', 'ordered', 'factor')
    attr(x, 'package') <- 'AMR'
-    attr(x, 'package.version') <- packageDescription('AMR')$Version
    x
  }
 }
--- a/R/data.R
+++ b/R/data.R
@ -252,7 +252,7 @@
 #'   \item{\code{type_nl}}{Type of microorganism in Dutch, like \code{"Bacterie"} and \code{"Schimmel/gist"}}
 #'   \item{\code{gramstain_nl}}{Gram of microorganism in Dutch, like \code{"Negatieve staven"}}
 #' }
-#' @source MOLIS (LIS of Certe) - \url{https://www.certe.nl}
+#  source MOLIS (LIS of Certe) - \url{https://www.certe.nl}
 #' @seealso \code{\link{guess_bactid}} \code{\link{antibiotics}} \code{\link{microorganisms.umcg}}
 "microorganisms"

@ -264,14 +264,14 @@
 #'   \item{\code{mocode}}{Code of microorganism according to UMCG MMB}
 #'   \item{\code{bactid}}{Code of microorganism in \code{\link{microorganisms}}}
 #' }
-#' @source MOLIS (LIS of Certe) - \url{https://www.certe.nl} \cr \cr GLIMS (LIS of UMCG) - \url{https://www.umcg.nl}
+# source MOLIS (LIS of Certe) - \url{https://www.certe.nl} \cr \cr GLIMS (LIS of UMCG) - \url{https://www.umcg.nl}
 #' @seealso \code{\link{guess_bactid}} \code{\link{microorganisms}}
 "microorganisms.umcg"

 #' Dataset with 2000 blood culture isolates of septic patients
 #'
 #' An anonymised dataset containing 2000 microbial blood culture isolates with their antibiogram of septic patients found in 5 different hospitals in the Netherlands, between 2001 and 2017. This data.frame can be used to practice AMR analysis. For examples, press F1.
-#' @format A data.frame with 2000 observations and 47 variables:
+#' @format A data.frame with 2000 observations and 49 variables:
 #' \describe{
 #'   \item{\code{date}}{date of receipt at the laboratory}
 #'   \item{\code{hospital_id}}{ID of the hospital}
@ -282,9 +282,9 @@
 #'   \item{\code{sex}}{sex of the patient}
 #'   \item{\code{patient_id}}{ID of the patient, first 10 characters of an SHA hash containing irretrievable information}
 #'   \item{\code{bactid}}{ID of microorganism, see \code{\link{microorganisms}}}
-#'   \item{\code{peni:mupi}}{38 different antibiotics with class \code{rsi} (see \code{\link{as.rsi}}); these column names occur in \code{\link{antibiotics}} and can be translated with \code{\link{abname}}}
+#'   \item{\code{peni:rifa}}{40 different antibiotics with class \code{rsi} (see \code{\link{as.rsi}}); these column names occur in \code{\link{antibiotics}} data set and can be translated with \code{\link{abname}}}
 #' }
-#' @source MOLIS (LIS of Certe) - \url{https://www.certe.nl}
+# source MOLIS (LIS of Certe) - \url{https://www.certe.nl}
 #' @examples
 #' # ----------- #
 #' # PREPARATION #
@ -304,15 +304,15 @@
 #' # ANALYSIS #
 #' # -------- #
 #'
-#' # 1. Get the amoxicillin resistance percentages
-#' #    of E. coli, divided by hospital:
+#' # 1. Get the amoxicillin resistance percentages (p)
+#' #     and numbers (n) of E. coli, divided by hospital:
 #'
 #' my_data %>%
-#'   filter(bactid == "ESCCOL",
+#'   filter(bactid == guess_bactid("E. coli"),
 #'          first_isolates == TRUE) %>%
 #'   group_by(hospital_id) %>%
-#'   summarise(n = n(),
-#'             amoxicillin_resistance = rsi(amox))
+#'   summarise(n = n_rsi(amox),
+#'             p = resistance(amox))
 #'
 #'
 #' # 2. Get the amoxicillin/clavulanic acid resistance
@ -322,6 +322,6 @@
 #'   filter(bactid == guess_bactid("E. coli"),
 #'          first_isolates == TRUE) %>%
 #'   group_by(year = format(date, "%Y")) %>%
-#'   summarise(n = n(),
-#'             amoxclav_resistance = rsi(amcl, minimum = 20))
+#'   summarise(n = n_rsi(amcl),
+#'             p = resistance(amcl, minimum = 20))
 "septic_patients"
--- a/R/first_isolate.R
+++ b/R/first_isolate.R
@ -41,7 +41,7 @@
 #' @details \strong{WHY THIS IS SO IMPORTANT} \cr
 #'     To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https://www.ncbi.nlm.nih.gov/pubmed/17304462}{[1]}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}.
 #' @section Key antibiotics:
-#'     There are two ways to determine whether isolates can be included as first \emph{weighted} isolates: \cr
+#'     There are two ways to determine whether isolates can be included as first \emph{weighted} isolates which will give generally the same results: \cr
 #'
 #'     \strong{1. Using} \code{type = "keyantibiotics"} \strong{and parameter} \code{ignore_I} \cr
 #'     Any difference from S to R (or vice versa) will (re)select an isolate as a first weighted isolate. With \code{ignore_I = FALSE}, also differences from I to S|R (or vice versa) will lead to this. This is a reliable method and 30-35 times faster than method 2. \cr
@ -65,6 +65,24 @@
 #'                 col_patient_id = "patient_id",
 #'                 col_bactid = "bactid")
 #'
+#' # Now let's see if first isolates matter:
+#' A <- my_patients %>%
+#'   group_by(hospital_id) %>%
+#'   summarise(count = n_rsi(gent), # gentamicin
+#'             resistance = resistance(gent))
+#'
+#' B <- my_patients %>%
+#'   filter(first_isolate == TRUE) %>%
+#'   group_by(hospital_id) %>%
+#'   summarise(count = n_rsi(gent), # gentamicin
+#'             resistance = resistance(gent))
+#'
+#' # Have a look at A and B. B is more reliable because every isolate is
+#' # counted once. Gentamicin resitance in hospital D seems to be 5%
+#' # higher than originally thought.
+#'
+#' ## OTHER EXAMPLES:
+#'
 #' \dontrun{
 #'
 #' # set key antibiotics to a new variable
@ -153,7 +171,7 @@ first_isolate <- function(tbl,

  if (!is.na(col_bactid)) {
    if (!tbl %>% pull(col_bactid) %>% is.bactid()) {
-      tbl[, col_bactid] <- tbl %>% pull(col_bactid) %>% as.bactid()
+      warning("Improve integrity of the `", col_bactid, "` column by transforming it with 'as.bactid'.")
    }
    tbl <- tbl %>% left_join_microorganisms(by = col_bactid)
    col_genus <- "genus"
@ -179,7 +197,6 @@ first_isolate <- function(tbl,
    filter_specimen <- ''
  }

-  weighted.notice <- ''
  # filter on specimen group and keyantibiotics when they are filled in
  if (!is.na(filter_specimen) & filter_specimen != '') {
    check_columns_existance(col_specimen, tbl)
@ -317,7 +334,9 @@ first_isolate <- function(tbl,
                               (date_lab - lag(date_lab)) + lag(days_diff),
                               0))

+  weighted.notice <- ''
  if (col_keyantibiotics != '') {
+    weighted.notice <- 'weighted '
    if (info == TRUE) {
      if (type == 'keyantibiotics') {
        cat('[Criteria] Inclusion based on key antibiotics, ')
--- a/R/freq.R
+++ b/R/freq.R
@ -415,7 +415,6 @@ frequency_tbl <- function(x,

  class(df) <- c('frequency_tbl', class(df))
  attr(df, 'package') <- 'AMR'
-  attr(df, 'package.version') <- packageDescription('AMR')$Version

  if (markdown == TRUE) {
    tbl_format <- 'markdown'
@ -567,7 +566,6 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
 #' @export
 as.data.frame.frequency_tbl <- function(x, ...) {
  attr(x, 'package') <- NULL
-  attr(x, 'package.version') <- NULL
  attr(x, 'opt') <- NULL
  as.data.frame.data.frame(x, ...)
 }
@ -578,7 +576,6 @@ as.data.frame.frequency_tbl <- function(x, ...) {
 #' @importFrom dplyr as_tibble
 as_tibble.frequency_tbl <- function(x, validate = TRUE, ..., rownames = NA) {
  attr(x, 'package') <- NULL
-  attr(x, 'package.version') <- NULL
  attr(x, 'opt') <- NULL
  as_tibble(x = as.data.frame(x), validate = validate, ..., rownames = rownames)
 }
--- a/R/join_microorganisms.R
+++ b/R/join_microorganisms.R
@ -26,8 +26,8 @@
 #' df2 <- left_join_microorganisms(df, "bacteria_id")
 #' colnames(df2)
 inner_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...) {
-  if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) {
-    x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE)
+  if (!any(class(x) %in% c("data.frame", "matrix"))) {
+    x <- data.frame(bactid = as.character(x), stringsAsFactors = FALSE)
  }
  # no name set to `by` parameter
  if (is.null(names(by))) {
@ -48,8 +48,8 @@ inner_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...
 #' @rdname join
 #' @export
 left_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...) {
-  if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) {
-    x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE)
+  if (!any(class(x) %in% c("data.frame", "matrix"))) {
+    x <- data.frame(bactid = as.character(x), stringsAsFactors = FALSE)
  }
  # no name set to `by` parameter
  if (is.null(names(by))) {
@ -70,8 +70,8 @@ left_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...)
 #' @rdname join
 #' @export
 right_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...) {
-  if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) {
-    x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE)
+  if (!any(class(x) %in% c("data.frame", "matrix"))) {
+    x <- data.frame(bactid = as.character(x), stringsAsFactors = FALSE)
  }
  # no name set to `by` parameter
  if (is.null(names(by))) {
@ -92,8 +92,8 @@ right_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...
 #' @rdname join
 #' @export
 full_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...) {
-  if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) {
-    x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE)
+  if (!any(class(x) %in% c("data.frame", "matrix"))) {
+    x <- data.frame(bactid = as.character(x), stringsAsFactors = FALSE)
  }
  # no name set to `by` parameter
  if (is.null(names(by))) {
@ -114,8 +114,8 @@ full_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...)
 #' @rdname join
 #' @export
 semi_join_microorganisms <- function(x, by = 'bactid', ...) {
-  if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) {
-    x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE)
+  if (!any(class(x) %in% c("data.frame", "matrix"))) {
+    x <- data.frame(bactid = as.character(x), stringsAsFactors = FALSE)
  }
  # no name set to `by` parameter
  if (is.null(names(by))) {
@ -132,8 +132,8 @@ semi_join_microorganisms <- function(x, by = 'bactid', ...) {
 #' @rdname join
 #' @export
 anti_join_microorganisms <- function(x, by = 'bactid', ...) {
-  if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) {
-    x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE)
+  if (!any(class(x) %in% c("data.frame", "matrix"))) {
+    x <- data.frame(bactid = as.character(x), stringsAsFactors = FALSE)
  }
  # no name set to `by` parameter
  if (is.null(names(by))) {
--- a/R/key_antibiotics.R
+++ b/R/key_antibiotics.R
@ -42,19 +42,26 @@
 #' @importFrom dplyr %>% mutate if_else
 #' @seealso \code{\link{first_isolate}}
 #' @examples
-#' \dontrun{
+#' # septic_patients is a dataset available in the AMR package
+#' ?septic_patients
+#' my_patients <- septic_patients
+#'
+#' library(dplyr)
 #' # set key antibiotics to a new variable
-#' tbl$keyab <- key_antibiotics(tbl)
+#' my_patients <- my_patients %>%
+#'   mutate(keyab = key_antibiotics(.)) %>%
+#'   mutate(
+#'     # now calculate first isolates
+#'     first_regular = first_isolate(., "date", "patient_id", "bactid"),
+#'     # and first WEIGHTED isolates
+#'     first_weighted = first_isolate(., "date", "patient_id", "bactid",
+#'                                    col_keyantibiotics = "keyab")
+#'   )
 #'
-#' # add regular first isolates
-#' tbl$first_isolate <-
-#'   first_isolate(tbl)
+#' # Check the difference, in this data set it results in 7% more isolates:
+#' sum(my_patients$first_regular, na.rm = TRUE)
+#' sum(my_patients$first_weighted, na.rm = TRUE)
 #'
-#' # add first WEIGHTED isolates using key antibiotics
-#' tbl$first_isolate_weighed <-
-#'   first_isolate(tbl,
-#'                 col_keyantibiotics = 'keyab')
-#' }
 #'
 #' # output of the `key_antibiotics` function could be like this:
 #' strainA <- "SSSRR.S.R..S"
@ -169,87 +176,80 @@ key_antibiotics_equal <- function(x,
                                  points_threshold = 2,
                                  info = FALSE) {
  # x is active row, y is lag
+
  type <- type[1]

  if (length(x) != length(y)) {
    stop('Length of `x` and `y` must be equal.')
  }

+  # only show progress bar on points or when at least 5000 isolates
+  info_needed <- info == TRUE & (type == "points" | length(x) > 5000)
+
  result <- logical(length(x))

-  if (type == "keyantibiotics") {
-    if (ignore_I == TRUE) {
-      # evaluation using regular expression will treat '.' as any character
-      # so I is actually ignored then
-      x <- gsub('I', '.', x, ignore.case = TRUE)
-      y <- gsub('I', '.', y, ignore.case = TRUE)
+  if (info_needed == TRUE) {
+    p <- dplyr::progress_estimated(length(x))
+  }
+
+  for (i in 1:length(x)) {
+
+    if (info_needed == TRUE) {
+      p$tick()$print()
    }

-    for (i in 1:length(x)) {
+    if (is.na(x[i])) {
+      x[i] <- ''
+    }
+    if (is.na(y[i])) {
+      y[i] <- ''
+    }
+
+    if (x[i] == y[i]) {
+
+      result[i] <- TRUE
+
+    } else if (nchar(x[i]) != nchar(y[i])) {
+
+      result[i] <- FALSE
+
+    } else {
+
      x_split <- strsplit(x[i], "")[[1]]
      y_split <- strsplit(y[i], "")[[1]]
-      y_split[x_split == "."] <- "."
-      x_split[y_split == "."] <- "."
-      x_checkfor <- paste(x_split, collapse = "")
-      y_checkfor <- paste(y_split, collapse = "")
-      result[i] <- nchar(x[i]) == nchar(y[i]) &
-        (x_checkfor %like% y_checkfor |
-           y_checkfor %like% x_checkfor)
-    }
-    return(result)

-  } else {
+      if (type == 'keyantibiotics') {

-    if (type != 'points') {
-      stop('`', type, '` is not a valid value for type, must be "points" or "keyantibiotics". See ?first_isolate.')
-    }
+        if (ignore_I == TRUE) {
+          x_split[x_split == "I"] <- "."
+          y_split[y_split == "I"] <- "."
+        }

-    if (info == TRUE) {
-      p <- dplyr::progress_estimated(length(x))
-    }
+        y_split[x_split == "."] <- "."
+        x_split[y_split == "."] <- "."

-    for (i in 1:length(x)) {
-
-      if (info == TRUE) {
-        p$tick()$print()
-      }
-
-      if (is.na(x[i])) {
-        x[i] <- ''
-      }
-      if (is.na(y[i])) {
-        y[i] <- ''
-      }
-
-      if (nchar(x[i]) != nchar(y[i])) {
-
-        result[i] <- FALSE
-
-      } else if (x[i] == '' & y[i] == '') {
-
-        result[i] <- TRUE
-
-      } else {
-
-        x2 <- strsplit(x[i], "")[[1]]
-        y2 <- strsplit(y[i], "")[[1]]
+        result[i] <- all(x_split == y_split)

+      } else if (type == 'points') {
        # count points for every single character:
        # - no change is 0 points
        # - I <-> S|R is 0.5 point
        # - S|R <-> R|S is 1 point
        # use the levels of as.rsi (S = 1, I = 2, R = 3)

-        suppressWarnings(x2 <- x2 %>% as.rsi() %>% as.double())
-        suppressWarnings(y2 <- y2 %>% as.rsi() %>% as.double())
+        suppressWarnings(x_split <- x_split %>% as.rsi() %>% as.double())
+        suppressWarnings(y_split <- y_split %>% as.rsi() %>% as.double())

-        points <- (x2 - y2) %>% abs() %>% sum(na.rm = TRUE)
-        result[i] <- ((points / 2) >= points_threshold)
+        points <- (x_split - y_split) %>% abs() %>% sum(na.rm = TRUE) / 2
+        result[i] <- points >= points_threshold
+
+      } else {
+        stop('`', type, '` is not a valid value for type, must be "points" or "keyantibiotics". See ?first_isolate.')
      }
    }
-    if (info == TRUE) {
-      cat('\n')
-    }
-    result
  }
+  if (info_needed == TRUE) {
+    cat('\n')
+  }
+  result
 }
--- a/README.md
+++ b/README.md
@ -26,9 +26,13 @@ This R package contains functions to make **microbiological, epidemiological dat

 With `AMR` you can:
 * Calculate the resistance (and even co-resistance) of microbial isolates with the `resistance` and `susceptibility` functions, that can also be used with the `dplyr` package (e.g. in conjunction with `summarise`)
-* Predict antimicrobial resistance for the nextcoming years with the `rsi_predict` function
+* Predict antimicrobial resistance for the nextcoming years with the `resistance_predict` function
 * Apply [EUCAST rules to isolates](http://www.eucast.org/expert_rules_and_intrinsic_resistance/) with the `EUCAST_rules` function
 * Identify first isolates of every patient [using guidelines from the CLSI](https://clsi.org/standards/products/microbiology/documents/m39/) (Clinical and Laboratory Standards Institute) with the `first_isolate` function
+  * You can also identify first *weighted* isolates of every patient, an adjusted version of the CLSI guideline. This takes into account key antibiotics of every strain and compares them. The following 12 antibiotics will be used as key antibiotics at default:
+    * Universal: amoxicillin, amoxicillin/clavlanic acid, cefuroxime, piperacillin/tazobactam, ciprofloxacin,  trimethoprim/sulfamethoxazole
+    * Specific for Gram-positives: vancomycin, teicoplanin, tetracycline, erythromycin, oxacillin, rifampicin
+    * Specific for Gram-negatives: gentamicin, tobramycin, colistin, cefotaxime, ceftazidime, meropenem
 * Get antimicrobial ATC properties from the WHO Collaborating Centre for Drug Statistics Methodology ([WHOCC](https://www.whocc.no/atc_ddd_methodology/who_collaborating_centre/)), to be able to:
  * Translate antibiotic codes (like *AMOX*), official names (like *amoxicillin*) and even trade names (like *Amoxil* or *Trimox*) to an [ATC code](https://www.whocc.no/atc_ddd_index/?code=J01CA04&showdescription=no) (like *J01CA04*) and vice versa with the `abname` function
  * Get the latest antibiotic properties like hierarchic groups and [defined daily dose](https://en.wikipedia.org/wiki/Defined_daily_dose) (DDD) with units and administration form from the WHOCC website with the `atc_property` function
@ -219,35 +223,33 @@ mydata %>% freq(myvariable)
 Factors sort on item by default:
 ```r
 septic_patients %>% freq(hospital_id)
-# Frequency table of `hospital_id`
+# Frequency table of `hospital_id` 
 # Class:     factor
 # Length:    2000 (of which NA: 0 = 0.0%)
-# Unique:    5
+# Unique:    4
 # 
 #      Item    Count   Percent   Cum. Count   Cum. Percent   (Factor Level)
 # ---  -----  ------  --------  -----------  -------------  ---------------
-# 1    A         233     11.7%          233          11.7%                1
-# 2    B         583     29.1%          816          40.8%                2
-# 3    C         221     11.1%         1037          51.8%                3
-# 4    D         650     32.5%         1687          84.4%                4
-# 5    E         313     15.7%         2000         100.0%                5
+# 1    A         319     16.0%          319          16.0%                1
+# 2    B         661     33.1%          980          49.0%                2
+# 3    C         256     12.8%         1236          61.8%                3
+# 4    D         764     38.2%         2000         100.0%                4
 ```

 This can be changed with the `sort.count` parameter:
 ```r
 septic_patients %>% freq(hospital_id, sort.count = TRUE)
-# Frequency table of `hospital_id`
+# Frequency table of `hospital_id` 
 # Class:     factor
 # Length:    2000 (of which NA: 0 = 0.0%)
-# Unique:    5
+# Unique:    4
 # 
 #      Item    Count   Percent   Cum. Count   Cum. Percent   (Factor Level)
 # ---  -----  ------  --------  -----------  -------------  ---------------
-# 1    D         650     32.5%          650          32.5%                4
-# 2    B         583     29.1%         1233          61.7%                2
-# 3    E         313     15.7%         1546          77.3%                5
-# 4    A         233     11.7%         1779          88.9%                1
-# 5    C         221     11.1%         2000         100.0%                3
+# 1    D         764     38.2%          764          38.2%                4
+# 2    B         661     33.1%         1425          71.2%                2
+# 3    A         319     16.0%         1744          87.2%                1
+# 4    C         256     12.8%         2000         100.0%                3
 ```

 All other types, like numbers, characters and dates, sort on count by default:
@ -256,56 +258,56 @@ septic_patients %>% freq(date)
 # Frequency table of `date` 
 # Class:     Date
 # Length:    2000 (of which NA: 0 = 0.0%)
-# Unique:    1662
+# Unique:    1151
 # 
-# Oldest:    2 January 2001
-# Newest:    18 October 2017 (+6133)
-# Median:    6 December 2009 (~53%)
+# Oldest:    2 January 2002
+# Newest:    28 December 2017 (+5839)
+# Median:    7 Augustus 2009 (~48%)
 # 
 #      Item          Count   Percent   Cum. Count   Cum. Percent
 # ---  -----------  ------  --------  -----------  -------------
-# 1    2008-12-24        5      0.2%            5           0.2%
-# 2    2010-12-10        4      0.2%            9           0.4%
-# 3    2011-03-03        4      0.2%           13           0.6%
-# 4    2013-06-24        4      0.2%           17           0.8%
-# 5    2017-09-01        4      0.2%           21           1.1%
-# 6    2002-09-02        3      0.2%           24           1.2%
-# 7    2003-10-14        3      0.2%           27           1.4%
-# 8    2004-06-25        3      0.2%           30           1.5%
-# 9    2004-06-27        3      0.2%           33           1.7%
-# 10   2004-10-29        3      0.2%           36           1.8%
-# 11   2005-09-27        3      0.2%           39           2.0%
-# 12   2006-08-01        3      0.2%           42           2.1%
-# 13   2006-10-10        3      0.2%           45           2.2%
-# 14   2007-11-16        3      0.2%           48           2.4%
-# 15   2008-03-09        3      0.2%           51           2.5%
-# [ reached getOption("max.print.freq") -- omitted 1647 entries, n = 1949 (97.5%) ]
+# 1    2016-05-21       10      0.5%           10           0.5%
+# 2    2004-11-15        8      0.4%           18           0.9%
+# 3    2013-07-29        8      0.4%           26           1.3%
+# 4    2017-06-12        8      0.4%           34           1.7%
+# 5    2015-11-19        7      0.4%           41           2.1%
+# 6    2005-12-22        6      0.3%           47           2.4%
+# 7    2015-10-12        6      0.3%           53           2.6%
+# 8    2002-05-16        5      0.2%           58           2.9%
+# 9    2004-02-02        5      0.2%           63           3.1%
+# 10   2004-02-18        5      0.2%           68           3.4%
+# 11   2005-08-16        5      0.2%           73           3.6%
+# 12   2005-09-01        5      0.2%           78           3.9%
+# 13   2006-06-29        5      0.2%           83           4.2%
+# 14   2007-08-10        5      0.2%           88           4.4%
+# 15   2008-08-29        5      0.2%           93           4.7%
+# [ reached getOption("max.print.freq") -- omitted 1136 entries, n = 1907 (95.3%) ]
 ```
 For numeric values, some extra descriptive statistics will be calculated:
 ```r
 freq(runif(n = 10, min = 1, max = 5))
-# Frequency table 
+# Frequency table  
 # Class:     numeric
 # Length:    10 (of which NA: 0 = 0.0%)
 # Unique:    10
-#   
-# Mean:      2.9
-# Std. dev.: 1.3 (CV: 0.43, MAD: 1.5)
-# Five-Num:  1.5 | 1.7 | 2.6 | 4.0 | 4.7 (IQR: 2.3, CQV: 0.4)
+# 
+# Mean:      3.4
+# Std. dev.: 1.3 (CV: 0.38, MAD: 1.3)
+# Five-Num:  1.6 | 2.0 | 3.9 | 4.7 | 4.8 (IQR: 2.7, CQV: 0.4)
 # Outliers:  0
 # 
-#      Item   Count   Percent   Cum. Count   Cum. Percent
-# ---------  ------  --------  -----------  -------------
-#  1.132033       1     10.0%            1          10.0%
-#  2.226903       1     10.0%            2          20.0%
-#  2.280779       1     10.0%            3          30.0%
-#  2.640898       1     10.0%            4          40.0%
-#  2.913462       1     10.0%            5          50.0%
-#  3.364201       1     10.0%            6          60.0%
-#  3.771975       1     10.0%            7          70.0%
-#  3.802861       1     10.0%            8          80.0%
-#  3.803547       1     10.0%            9          90.0%
-#  3.985691       1     10.0%           10         100.0%
+#           Item   Count   Percent   Cum. Count   Cum. Percent
+# ---  ---------  ------  --------  -----------  -------------
+# 1     1.568997       1     10.0%            1          10.0%
+# 2     1.993575       1     10.0%            2          20.0%
+# 3     2.022348       1     10.0%            3          30.0%
+# 4     2.236038       1     10.0%            4          40.0%
+# 5     3.579828       1     10.0%            5          50.0%
+# 6     4.178081       1     10.0%            6          60.0%
+# 7     4.394818       1     10.0%            7          70.0%
+# 8     4.689871       1     10.0%            8          80.0%
+# 9     4.698626       1     10.0%            9          90.0%
+# 10    4.751488       1     10.0%           10         100.0%
 # 
 # Warning message:
 # All observations are unique. 
@ -320,7 +322,7 @@ Datasets to work with antibiotics and bacteria properties.
 ```r
 # Dataset with 2000 random blood culture isolates from anonymised
 # septic patients between 2001 and 2017 in 5 Dutch hospitals
-septic_patients   # A tibble: 2,000 x 47
+septic_patients   # A tibble: 2,000 x 49

 # Dataset with ATC antibiotics codes, official names, trade names 
 # and DDD's (oral and parenteral)
--- a/data/septic_patients.rda
+++ b/data/septic_patients.rda
--- a/man/as.mic.Rd
+++ b/man/as.mic.Rd
@ -15,19 +15,22 @@ is.mic(x)
 \item{na.rm}{a logical indicating whether missing values should be removed}
 }
 \value{
-Ordered factor with new class \code{mic} and new attributes \code{package} and \code{package.version}
+Ordered factor with new class \code{mic} and new attribute \code{package}
 }
 \description{
-This transforms a vector to a new class\code{mic}, which is an ordered factor with valid MIC values as levels. Invalid MIC values will be translated as \code{NA} with a warning.
+This transforms a vector to a new class \code{mic}, which is an ordered factor with valid MIC values as levels. Invalid MIC values will be translated as \code{NA} with a warning.
 }
 \examples{
 mic_data <- as.mic(c(">=32", "1.0", "1", "1.00", 8, "<=0.128", "8", "16", "16"))
 is.mic(mic_data)

 # this can also coerce combined MIC/RSI values:
-as.mic("<=0.002; R") # will return <=0.002
+as.mic("<=0.002; S") # will return <=0.002

 plot(mic_data)
 barplot(mic_data)
 }
+\seealso{
+\code{\link{as.rsi}}
+}
 \keyword{mic}
--- a/man/as.rsi.Rd
+++ b/man/as.rsi.Rd
@ -13,7 +13,7 @@ is.rsi(x)
 \item{x}{vector}
 }
 \value{
-Ordered factor with new class \code{rsi} and new attributes \code{package} and \code{package.version}
+Ordered factor with new class \code{rsi} and new attribute \code{package}
 }
 \description{
 This transforms a vector to a new class \code{rsi}, which is an ordered factor with levels \code{S < I < R}. Invalid antimicrobial interpretations will be translated as \code{NA} with a warning.
@ -24,9 +24,12 @@ rsi_data <- as.rsi(c(rep("S", 474), rep("I", 36), rep("R", 370), "A", "B", "C"))
 is.rsi(rsi_data)

 # this can also coerce combined MIC/RSI values:
-as.rsi("<= 0.002; R") # will return R
+as.rsi("<= 0.002; S") # will return S

 plot(rsi_data)    # for percentages
 barplot(rsi_data) # for frequencies
 }
+\seealso{
+\code{\link{as.mic}}
+}
 \keyword{rsi}
--- a/man/first_isolate.Rd
+++ b/man/first_isolate.Rd
@ -65,7 +65,7 @@ Determine first (weighted) isolates of all microorganisms of every patient per e
 }
 \section{Key antibiotics}{

-    There are two ways to determine whether isolates can be included as first \emph{weighted} isolates: \cr
+    There are two ways to determine whether isolates can be included as first \emph{weighted} isolates which will give generally the same results: \cr

    \strong{1. Using} \code{type = "keyantibiotics"} \strong{and parameter} \code{ignore_I} \cr
    Any difference from S to R (or vice versa) will (re)select an isolate as a first weighted isolate. With \code{ignore_I = FALSE}, also differences from I to S|R (or vice versa) will lead to this. This is a reliable method and 30-35 times faster than method 2. \cr
@ -85,6 +85,24 @@ my_patients$first_isolate <- my_patients \%>\%
                col_patient_id = "patient_id",
                col_bactid = "bactid")

+# Now let's see if first isolates matter:
+A <- my_patients \%>\%
+  group_by(hospital_id) \%>\%
+  summarise(count = n_rsi(gent), # gentamicin
+            resistance = resistance(gent))
+
+B <- my_patients \%>\%
+  filter(first_isolate == TRUE) \%>\%
+  group_by(hospital_id) \%>\%
+  summarise(count = n_rsi(gent), # gentamicin
+            resistance = resistance(gent))
+
+# Have a look at A and B. B is more reliable because every isolate is
+# counted once. Gentamicin resitance in hospital D seems to be 5\%
+# higher than originally thought.
+
+## OTHER EXAMPLES:
+
 \dontrun{

 # set key antibiotics to a new variable
--- a/man/key_antibiotics.Rd
+++ b/man/key_antibiotics.Rd
@ -56,7 +56,7 @@ The function \code{key_antibiotics} returns a character vector with 12 antibioti
 }
 \section{Key antibiotics}{

-    There are two ways to determine whether isolates can be included as first \emph{weighted} isolates: \cr
+    There are two ways to determine whether isolates can be included as first \emph{weighted} isolates which will give generally the same results: \cr

    \strong{1. Using} \code{type = "keyantibiotics"} \strong{and parameter} \code{ignore_I} \cr
    Any difference from S to R (or vice versa) will (re)select an isolate as a first weighted isolate. With \code{ignore_I = FALSE}, also differences from I to S|R (or vice versa) will lead to this. This is a reliable method and 30-35 times faster than method 2. \cr
@ -66,19 +66,26 @@ The function \code{key_antibiotics} returns a character vector with 12 antibioti
 }

 \examples{
-\dontrun{
+# septic_patients is a dataset available in the AMR package
+?septic_patients
+my_patients <- septic_patients
+
+library(dplyr)
 # set key antibiotics to a new variable
-tbl$keyab <- key_antibiotics(tbl)
+my_patients <- my_patients \%>\%
+  mutate(keyab = key_antibiotics(.)) \%>\%
+  mutate(
+    # now calculate first isolates
+    first_regular = first_isolate(., "date", "patient_id", "bactid"),
+    # and first WEIGHTED isolates
+    first_weighted = first_isolate(., "date", "patient_id", "bactid",
+                                   col_keyantibiotics = "keyab")
+  )

-# add regular first isolates
-tbl$first_isolate <-
-  first_isolate(tbl)
+# Check the difference, in this data set it results in 7\% more isolates:
+sum(my_patients$first_regular, na.rm = TRUE)
+sum(my_patients$first_weighted, na.rm = TRUE)

-# add first WEIGHTED isolates using key antibiotics
-tbl$first_isolate_weighed <-
-  first_isolate(tbl,
-                col_keyantibiotics = 'keyab')
-}

 # output of the `key_antibiotics` function could be like this:
 strainA <- "SSSRR.S.R..S"
--- a/man/microorganisms.Rd
+++ b/man/microorganisms.Rd
@ -19,9 +19,6 @@
  \item{\code{type_nl}}{Type of microorganism in Dutch, like \code{"Bacterie"} and \code{"Schimmel/gist"}}
  \item{\code{gramstain_nl}}{Gram of microorganism in Dutch, like \code{"Negatieve staven"}}
 }}
-\source{
-MOLIS (LIS of Certe) - \url{https://www.certe.nl}
-}
 \usage{
 microorganisms
 }
--- a/man/microorganisms.umcg.Rd
+++ b/man/microorganisms.umcg.Rd
@ -9,9 +9,6 @@
  \item{\code{mocode}}{Code of microorganism according to UMCG MMB}
  \item{\code{bactid}}{Code of microorganism in \code{\link{microorganisms}}}
 }}
-\source{
-MOLIS (LIS of Certe) - \url{https://www.certe.nl} \cr \cr GLIMS (LIS of UMCG) - \url{https://www.umcg.nl}
-}
 \usage{
 microorganisms.umcg
 }
--- a/man/septic_patients.Rd
+++ b/man/septic_patients.Rd
@ -4,7 +4,7 @@
 \name{septic_patients}
 \alias{septic_patients}
 \title{Dataset with 2000 blood culture isolates of septic patients}
-\format{A data.frame with 2000 observations and 47 variables:
+\format{A data.frame with 2000 observations and 49 variables:
 \describe{
  \item{\code{date}}{date of receipt at the laboratory}
  \item{\code{hospital_id}}{ID of the hospital}
@ -15,11 +15,8 @@
  \item{\code{sex}}{sex of the patient}
  \item{\code{patient_id}}{ID of the patient, first 10 characters of an SHA hash containing irretrievable information}
  \item{\code{bactid}}{ID of microorganism, see \code{\link{microorganisms}}}
-  \item{\code{peni:mupi}}{38 different antibiotics with class \code{rsi} (see \code{\link{as.rsi}}); these column names occur in \code{\link{antibiotics}} and can be translated with \code{\link{abname}}}
+  \item{\code{peni:rifa}}{40 different antibiotics with class \code{rsi} (see \code{\link{as.rsi}}); these column names occur in \code{\link{antibiotics}} data set and can be translated with \code{\link{abname}}}
 }}
-\source{
-MOLIS (LIS of Certe) - \url{https://www.certe.nl}
-}
 \usage{
 septic_patients
 }
@ -45,15 +42,15 @@ my_data <- my_data \%>\%
 # ANALYSIS #
 # -------- #

-# 1. Get the amoxicillin resistance percentages
-#    of E. coli, divided by hospital:
+# 1. Get the amoxicillin resistance percentages (p)
+#     and numbers (n) of E. coli, divided by hospital:

 my_data \%>\%
-  filter(bactid == "ESCCOL",
+  filter(bactid == guess_bactid("E. coli"),
         first_isolates == TRUE) \%>\%
  group_by(hospital_id) \%>\%
-  summarise(n = n(),
-            amoxicillin_resistance = rsi(amox))
+  summarise(n = n_rsi(amox),
+            p = resistance(amox))


 # 2. Get the amoxicillin/clavulanic acid resistance
@ -63,7 +60,7 @@ my_data \%>\%
  filter(bactid == guess_bactid("E. coli"),
         first_isolates == TRUE) \%>\%
  group_by(year = format(date, "\%Y")) \%>\%
-  summarise(n = n(),
-            amoxclav_resistance = rsi(amcl, minimum = 20))
+  summarise(n = n_rsi(amcl),
+            p = resistance(amcl, minimum = 20))
 }
 \keyword{datasets}
--- a/tests/testthat/test-bactid.R
+++ b/tests/testthat/test-bactid.R
@ -37,8 +37,8 @@ test_that("as.bactid works", {
      select(genus) %>%
      as.bactid() %>%
      as.character(),
-    c("STC", "STC", "NEI", "STA", "STA",
-      "NEI", "ENT", "ENT", "ESC", "KLE"))
+    c("ESC", "ESC", "STA", "STA", "STA",
+      "STA", "STA", "STA", "STA", "STA"))

  # select with two columns
  expect_identical(
--- a/tests/testthat/test-first_isolate.R
+++ b/tests/testthat/test-first_isolate.R
@ -8,9 +8,9 @@ test_that("first isolates work", {
                    col_date = "date",
                    col_patient_id = "patient_id",
                    col_bactid = "bactid",
-                    info = FALSE),
+                    info = TRUE),
      na.rm = TRUE),
-    1959)
+    1326)

  # septic_patients contains 1962 out of 2000 first *weighted* isolates
  expect_equal(
@ -24,8 +24,8 @@ test_that("first isolates work", {
                      type = "keyantibiotics",
                      info = TRUE),
        na.rm = TRUE)),
-    1962)
-  # and 1997 when using points
+    1421)
+  # and 1961 when using points
  expect_equal(
    suppressWarnings(
      sum(
@ -37,7 +37,7 @@ test_that("first isolates work", {
                      type = "points",
                      info = TRUE),
        na.rm = TRUE)),
-    1997)
+    1425)

  # septic_patients contains 1732 out of 2000 first non-ICU isolates
  expect_equal(
@ -50,7 +50,7 @@ test_that("first isolates work", {
                    info = TRUE,
                    icu_exclude = TRUE),
      na.rm = TRUE),
-    1732)
+    1171)

  # set 1500 random observations to be of specimen type 'Urine'
  random_rows <- sample(x = 1:2000, size = 1500, replace = FALSE)
@ -59,7 +59,7 @@ test_that("first isolates work", {
      first_isolate(tbl = mutate(septic_patients,
                                 specimen = if_else(row_number() %in% random_rows,
                                                    "Urine",
-                                                    "Unknown")),
+                                                    "Other")),
                    col_date = "date",
                    col_patient_id = "patient_id",
                    col_bactid = "bactid",
@ -74,7 +74,7 @@ test_that("first isolates work", {
      first_isolate(tbl = mutate(septic_patients,
                                 specimen = if_else(row_number() %in% random_rows,
                                                    "Urine",
-                                                    "Unknown")),
+                                                    "Other")),
                    col_date = "date",
                    col_patient_id = "patient_id",
                    col_bactid = "bactid",
--- a/tests/testthat/test-freq.R
+++ b/tests/testthat/test-freq.R
@ -4,8 +4,8 @@ test_that("frequency table works", {
  expect_equal(nrow(freq(c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5))), 5)
  expect_equal(nrow(frequency_tbl(c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5))), 5)

-  # date column of septic_patients should contain 1662 unique dates
-  expect_equal(nrow(freq(septic_patients$date)), 1662)
+  # date column of septic_patients should contain 1151 unique dates
+  expect_equal(nrow(freq(septic_patients$date)), 1151)
  expect_equal(nrow(freq(septic_patients$date)),
               length(unique(septic_patients$date)))

--- a/tests/testthat/test-kurtosis.R
+++ b/tests/testthat/test-kurtosis.R
@ -2,12 +2,12 @@ context("kurtosis.R")

 test_that("kurtosis works", {
  expect_equal(kurtosis(septic_patients$age),
-               6.423118,
+               3.57781,
               tolerance = 0.00001)
  expect_equal(unname(kurtosis(data.frame(septic_patients$age))),
-               6.423118,
+               3.57781,
               tolerance = 0.00001)
  expect_equal(kurtosis(matrix(septic_patients$age)),
-               6.423118,
+               3.57781,
               tolerance = 0.00001)
 })
--- a/tests/testthat/test-mdro.R
+++ b/tests/testthat/test-mdro.R
@ -13,7 +13,8 @@ test_that("MDRO works", {
  expect_equal(outcome %>% class(), c('ordered', 'factor'))

  # septic_patients should have these finding using Dutch guidelines
-  expect_equal(outcome %>% freq() %>% pull(count), c(3, 21))
+  expect_equal(outcome %>% freq() %>% pull(count),
+               c(2, 14)) # 2 unconfirmed, 14 positive

  expect_equal(BRMO(septic_patients, info = FALSE), MDRO(septic_patients, "nl", info = FALSE))

--- a/tests/testthat/test-resistance.R
+++ b/tests/testthat/test-resistance.R
@ -1,19 +1,19 @@
 context("resistance.R")

 test_that("resistance works", {
-  # amox resistance in `septic_patients` should be around 57.56%
-  expect_equal(resistance(septic_patients$amox, include_I = TRUE), 0.5756, tolerance = 0.0001)
-  expect_equal(susceptibility(septic_patients$amox, include_I = FALSE), 1 - 0.5756, tolerance = 0.0001)
+  # amox resistance in `septic_patients` should be around 66.33%
+  expect_equal(resistance(septic_patients$amox, include_I = TRUE), 0.6633, tolerance = 0.0001)
+  expect_equal(susceptibility(septic_patients$amox, include_I = FALSE), 1 - 0.6633, tolerance = 0.0001)

  # pita+genta susceptibility around 98.09%
  expect_equal(susceptibility(septic_patients$pita,
                              septic_patients$gent),
-               0.9809,
+               0.9535,
               tolerance = 0.0001)
  expect_equal(suppressWarnings(rsi(septic_patients$pita,
                                    septic_patients$gent,
                                    interpretation = "S")),
-               0.9809,
+               0.9535,
               tolerance = 0.0001)

  # count of cases
@ -26,7 +26,7 @@ test_that("resistance works", {
                           combination_p = susceptibility(cipr, gent, as_percent = TRUE),
                           combination_n = n_rsi(cipr, gent)) %>%
                 pull(combination_n),
-               c(138, 474, 170, 464, 183))
+               c(202, 482, 201, 499))

  expect_warning(resistance(as.character(septic_patients$amcl)))
  expect_warning(susceptibility(as.character(septic_patients$amcl)))
@ -36,26 +36,26 @@ test_that("resistance works", {
 })

 test_that("old rsi works", {
-  # amox resistance in `septic_patients` should be around 53.86%
-  expect_equal(rsi(septic_patients$amox), 0.5756, tolerance = 0.0001)
-  expect_equal(rsi(septic_patients$amox), 0.5756, tolerance = 0.0001)
+  # amox resistance in `septic_patients` should be around 66.33%
+  expect_equal(rsi(septic_patients$amox), 0.6633, tolerance = 0.0001)
+  expect_equal(rsi(septic_patients$amox, interpretation = "S"), 1 - 0.6633, tolerance = 0.0001)
  expect_equal(rsi_df(septic_patients,
                      ab = "amox",
                      info = TRUE),
-               0.5756,
+               0.6633,
               tolerance = 0.0001)
  # pita+genta susceptibility around 98.09%
  expect_equal(rsi(septic_patients$pita,
                   septic_patients$gent,
                   interpretation = "S",
                   info = TRUE),
-               0.9809,
+               0.9535,
               tolerance = 0.0001)
  expect_equal(rsi_df(septic_patients,
                      ab = c("pita", "gent"),
                      interpretation = "S",
                      info = TRUE),
-               0.9809,
+               0.9535,
               tolerance = 0.0001)
  # more than 2 not allowed
  expect_error(rsi_df(septic_patients,
@ -76,7 +76,7 @@ test_that("old rsi works", {
                                               as_percent = TRUE, warning = FALSE),
                           combination_n = n_rsi(cipr, gent)) %>%
                 pull(combination_n),
-               c(138, 474, 170, 464, 183))
+               c(202, 482, 201, 499))
 })

 test_that("prediction of rsi works", {
@ -86,8 +86,8 @@ test_that("prediction of rsi works", {
                col_date = "date",
                info = TRUE) %>%
    pull("probR")
-  # amox resistance will decrease using dataset `septic_patients`
-  expect_true(amox_R[2] > amox_R[20])
+  # amox resistance will increase according to data set `septic_patients`
+  expect_true(amox_R[3] < amox_R[20])

  expect_output(rsi_predict(tbl = filter(septic_patients, bactid == "ESCCOL"),
                            model = "binomial",
--- a/tests/testthat/test-skewness.R
+++ b/tests/testthat/test-skewness.R
@ -2,12 +2,12 @@ context("skewness.R")

 test_that("skewness works", {
  expect_equal(skewness(septic_patients$age),
-               -1.637164,
+               -0.90624,
               tolerance = 0.00001)
  expect_equal(unname(skewness(data.frame(septic_patients$age))),
-               -1.637164,
+               -0.90624,
               tolerance = 0.00001)
  expect_equal(skewness(matrix(septic_patients$age)),
-               -1.637164,
+               -0.90624,
               tolerance = 0.00001)
 })