added septic_patients

2025-07-19 01:03:17 +02:00 · 2018-02-27 20:01:02 +01:00
parent 98b2b99913
commit 34e70a65ed
10 changed files with 202 additions and 94 deletions
--- a/R/data.R
+++ b/R/data.R
@ -75,3 +75,22 @@
 #' @source MOLIS (LIS of Certe) - \url{https://www.certe.nl} \cr \cr GLIMS (LIS of UMCG) - \url{https://www.umcg.nl}
 #' @seealso \code{\link{bactlist}}
 "bactlist.umcg"
+
+#' Dataset with 2000 blood culture isolates of septic patients
+#'
+#' An anonymised dataset containing 2000 microbial blood culture isolates with their antibiogram of septic patients found in 5 different hospitals in the Netherlands, between 2001 and 2017. This data.frame can be used to practice AMR analysis e.g. with \code{\link{rsi}} or \code{\link{rsi_predict}}, or it can be used to practice other statistics.
+#' @format A data.frame with 2000 observations and 47 variables:
+#' \describe{
+#'   \item{\code{date}}{date of receipt at the laboratory}
+#'   \item{\code{hospital_id}}{ID of the hospital}
+#'   \item{\code{ward_icu}}{logical to determine if ward is an intensive care unit}
+#'   \item{\code{ward_clinical}}{logical to determine if ward is a regular clinical ward}
+#'   \item{\code{ward_outpatient}}{logical to determine if ward is an outpatient clinic}
+#'   \item{\code{age}}{age of the patient}
+#'   \item{\code{sex}}{sex of the patient}
+#'   \item{\code{patient_id}}{ID of the patient, first 10 characters of an SHA hash containing irretrievable information}
+#'   \item{\code{bactid}}{ID of microorganism, see \code{\link{bactlist}}}
+#'   \item{\code{peni:mupi}}{38 different antibiotics with class \code{rsi} (see \code{\link{as.rsi}}), these column names occur in \code{\link{ablist}} and can be translated with \code{\link{abname}}}
+#' }
+#' @source MOLIS (LIS of Certe) - \url{https://www.certe.nl}
+"septic_patients"
--- a/R/first_isolates.R
+++ b/R/first_isolates.R
@ -24,7 +24,7 @@
 #' @param col_patient_id column name of the unique IDs of the patients
 #' @param col_genus column name of the genus of the microorganisms
 #' @param col_species column name of the species of the microorganisms
-#' @param col_testcode column name of the test codes, see Details
+#' @param col_testcode column name of the test codes. Use \code{col_testcode = NA} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored.
 #' @param col_specimen column name of the specimen type or group
 #' @param col_icu column name of the logicals (\code{TRUE}/\code{FALSE}) whether a ward or department is an Intensive Care Unit (ICU)
 #' @param col_keyantibiotics column name of the key antibiotics to determine first \emph{weighted} isolates, see \code{\link{key_antibiotics}}.
@ -33,11 +33,13 @@
 #' @param icu_exclude logical whether ICU isolates should be excluded
 #' @param filter_specimen specimen group or type that should be excluded
 #' @param output_logical return output as \code{logical} (will else the values \code{0} or \code{1})
-#' @param ignore_I ignore \code{"I"} as antimicrobial interpretation of key antibiotics (with \code{FALSE}, changes in antibiograms from S to I and I to R will be interpreted as difference)
+#' @param points_threshold points until the comparison of key antibiotics will lead to inclusion of an isolate, see Details
 #' @param info print progress
-#' @details To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that is was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be selection bias.
-#'
-#'     Use \code{col_testcode = NA} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored.
+#' @details \strong{Why this is so important} \cr
+#'     To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https://www.ncbi.nlm.nih.gov/pubmed/17304462}{[1]}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}.
+
+#'     \strong{\code{points_threshold}} \cr
+#'     To compare key antibiotics, the difference between antimicrobial interpretations will be measured. A difference from I to S|R (or vice versa) means 0.5 points. A difference from S to R (or vice versa) means 1 point. When the sum of points exceeds \code{points_threshold}, an isolate will be (re)selected as a first weighted isolate.
 #' @keywords isolate isolates first
 #' @export
 #' @importFrom dplyr arrange_at lag between row_number filter mutate arrange
@ -96,7 +98,7 @@ first_isolate <- function(tbl,
                          icu_exclude = FALSE,
                          filter_specimen = NA,
                          output_logical = TRUE,
-                          ignore_I = TRUE,
+                          points_threshold = 2,
                          info = TRUE) {
  
  # controleren of kolommen wel bestaan
@ -274,20 +276,16 @@ first_isolate <- function(tbl,
                               0))
  
  if (col_keyantibiotics != '') {
-    # dit duurt 2 min bij 120.000 isolaten
    if (info == TRUE) {
-      cat('Comparing key antibiotics for first weighted isolates')
-      if (ignore_I == TRUE) {
-        cat(' (ignoring I)')
-      }
-      cat('...\n')
+      cat(paste0('Comparing key antibiotics for first weighted isolates (using points threshold of '
+                 , points_threshold, ')...\n'))
    }
    all_first <- all_first %>%
      mutate(key_ab_lag = lag(key_ab)) %>%
-      mutate(key_ab_other = !key_antibiotics_equal(key_ab,
-                                                  key_ab_lag,
-                                                  ignore_I = ignore_I,
-                                                  info = info)) %>%
+      mutate(key_ab_other = !key_antibiotics_equal(x = key_ab,
+                                                   y = key_ab_lag,
+                                                   points_threshold = points_threshold,
+                                                   info = info)) %>%
      mutate(
        real_first_isolate =
          if_else(
@ -448,18 +446,11 @@ key_antibiotics <- function(tbl,
  
 }

-# Compare key antibiotics
-#
-# Check whether two text values with key antibiotics match. Supports vectors.
-# @param x,y tekst (or multiple text vectors) with antimicrobial interpretations
-# @param ignore_I ignore \code{"I"} as antimicrobial interpretation of key antibiotics (with \code{FALSE}, changes in antibiograms from S to I and I to R will be interpreted as difference)
-# @param info print progress
-# @return logical
-# @export
-# @seealso \code{\link{key_antibiotics}}
+#' @importFrom dplyr progress_estimated %>%
+#' @noRd
+key_antibiotics_equal <- function(x, y, points_threshold = 2, info = FALSE) {
+  # x is active row, y is lag

-# only internal use
-key_antibiotics_equal <- function(x, y, ignore_I = TRUE, info = FALSE) {
  if (length(x) != length(y)) {
    stop('Length of `x` and `y` must be equal.')
  }
@ -467,13 +458,13 @@ key_antibiotics_equal <- function(x, y, ignore_I = TRUE, info = FALSE) {
  result <- logical(length(x))
  
  if (info == TRUE) {
-    voortgang <- dplyr::progress_estimated(length(x))
+    p <- dplyr::progress_estimated(length(x))
  }
  
  for (i in 1:length(x)) {
    
    if (info == TRUE) {
-      voortgang$tick()$print()
+      p$tick()$print()
    }
    
    if (is.na(x[i])) {
@ -493,22 +484,17 @@ key_antibiotics_equal <- function(x, y, ignore_I = TRUE, info = FALSE) {
      
    } else {
      
-      x2 <- strsplit(x[i], "")[[1]]
-      y2 <- strsplit(y[i], "")[[1]]
+      # count points for every single character:
+      # - no change is 0 points
+      # - I <-> S|R is 0.5 point
+      # - S|R <-> R|S is 1 point
+      # use the levels of as.rsi (S = 1, I = 2, R = 3)
+
+      x2 <- strsplit(x[i], "")[[1]] %>% as.rsi() %>% as.double()
+      y2 <- strsplit(y[i], "")[[1]] %>% as.rsi() %>% as.double()
      
-      if (ignore_I == TRUE) {
-        valid_chars <- c('S', 's', 'R', 'r')
-      } else {
-        valid_chars <- c('S', 's', 'I', 'i', 'R', 'r')
-      }
-      
-      # Ongeldige waarden (zoals "-", NA) op beide locaties verwijderen
-      x2[which(!x2 %in% valid_chars)] <- '?'
-      x2[which(!y2 %in% valid_chars)] <- '?'
-      y2[which(!x2 %in% valid_chars)] <- '?'
-      y2[which(!y2 %in% valid_chars)] <- '?'
-      
-      result[i] <- all(x2 == y2)
+      points <- (x2 - y2) %>% abs() %>% sum(na.rm = TRUE)
+      result[i] <- ((points / 2) >= points_threshold)
    }
  }
  if (info == TRUE) {
--- a/R/join.R
+++ b/R/join.R
@ -6,7 +6,7 @@
 #' @aliases join inner_join
 #' @param x existing table to join
 #' @param by a variable to join by - could be a column name of \code{x} with values that exist in \code{bactlist$bactid} (like \code{by = "bacteria_id"}), or another column in \code{\link{bactlist}} (but then it should be named, like \code{by = c("my_genus_species" = "fullname")})
-#' @param ... other parameters to pass trhough to \code{dplyr::\link[dplyr]{join}}.
+#' @param ... other parameters to pass on to \code{dplyr::\link[dplyr]{join}}.
 #' @details As opposed to the \code{\link[dplyr]{join}} functions of \code{dplyr}, at default existing columns will get a suffix \code{"2"} and the newly joined columns will not get a suffix. See \code{\link[dplyr]{join}} for more information.
 #' @export
 #' @examples 
@ -37,7 +37,7 @@ inner_join_bactlist <- function(x, by = 'bactid', ...) {

 #' @rdname join
 #' @export
-left_join_bactlist <- function(x, by = 'bacteriecode', ...) {
+left_join_bactlist <- function(x, by = 'bactid', ...) {
  # no name set to `by` parameter
  if (is.null(names(by))) {
    joinby <- colnames(AMR::bactlist)[1]
@ -54,7 +54,7 @@ left_join_bactlist <- function(x, by = 'bacteriecode', ...) {

 #' @rdname join
 #' @export
-right_join_bactlist <- function(x, by = 'bacteriecode', ...) {
+right_join_bactlist <- function(x, by = 'bactid', ...) {
  # no name set to `by` parameter
  if (is.null(names(by))) {
    joinby <- colnames(AMR::bactlist)[1]
@ -71,7 +71,7 @@ right_join_bactlist <- function(x, by = 'bacteriecode', ...) {

 #' @rdname join
 #' @export
-full_join_bactlist <- function(x, by = 'bacteriecode', ...) {
+full_join_bactlist <- function(x, by = 'bactid', ...) {
  # no name set to `by` parameter
  if (is.null(names(by))) {
    joinby <- colnames(AMR::bactlist)[1]
@ -84,7 +84,7 @@ full_join_bactlist <- function(x, by = 'bacteriecode', ...) {

 #' @rdname join
 #' @export
-semi_join_bactlist <- function(x, by = 'bacteriecode', ...) {
+semi_join_bactlist <- function(x, by = 'bactid', ...) {
  # no name set to `by` parameter
  if (is.null(names(by))) {
    joinby <- colnames(AMR::bactlist)[1]
@ -97,7 +97,7 @@ semi_join_bactlist <- function(x, by = 'bacteriecode', ...) {

 #' @rdname join
 #' @export
-anti_join_bactlist <- function(x, by = 'bacteriecode', ...) {
+anti_join_bactlist <- function(x, by = 'bactid', ...) {
  # no name set to `by` parameter
  if (is.null(names(by))) {
    joinby <- colnames(AMR::bactlist)[1]
--- a/R/misc.R
+++ b/R/misc.R
@ -29,3 +29,21 @@
 percent <- function(x, round = 1, ...) {
  base::paste0(base::round(x * 100, digits = round), "%")
 }
+
+quasiquotate <- function(deparsed, parsed) {
+  # when text: remove first and last "
+  if (any(deparsed %like% '^".+"$' | deparsed %like% "^'.+'$")) {
+    deparsed <- deparsed %>% substr(2, nchar(.) - 1)
+  }
+  # apply if needed
+  if (any(!deparsed %like% '[[$:()]'
+      & !deparsed %in% c('""', "''", "", # empty text
+                         ".", ".data", # dplyr references
+                         "TRUE", "FALSE", # logicals
+                         "NA", "NaN", "NULL", # empty values
+                         ls(.GlobalEnv)))) {
+    deparsed
+  } else {
+    parsed
+  }
+}
--- a/R/rsi_analysis.R
+++ b/R/rsi_analysis.R
@ -192,15 +192,15 @@ rsi_df <- function(tbl,
 #' rsi(as.rsi(isolates$amcl), interpretation = "S")
 #' }
 rsi <- function(ab1, ab2 = NA, interpretation = 'IR', minimum = 30, percent = FALSE, info = FALSE, warning = FALSE) {
-  functietekst <- as.character(match.call())
+  function_text <- as.character(match.call())
  # param 1 = functienaam
  # param 2 = ab1
  # param 3 = ab2
-  ab1.naam <- functietekst[2]
+  ab1.naam <- function_text[2]
  if (!grepl('^[a-z]{3,4}$', ab1.naam)) {
    ab1.naam <- 'rsi1'
  }
-  ab2.naam <- functietekst[3]
+  ab2.naam <- function_text[3]
  if (!grepl('^[a-z]{3,4}$', ab2.naam)) {
    ab2.naam <- 'rsi2'
  }
@ -236,10 +236,10 @@ rsi <- function(ab1, ab2 = NA, interpretation = 'IR', minimum = 30, percent = FA

 #' Predict antimicrobial resistance
 #'
-#' Create a prediction model to predict antimicrobial resistance for the next years on statistical solid ground. Standard errors (SE) will be returned as columns \code{se_min} and \code{se_max}.
+#' Create a prediction model to predict antimicrobial resistance for the next years on statistical solid ground. Standard errors (SE) will be returned as columns \code{se_min} and \code{se_max}. See Examples for a real live example.
 #' @param tbl table that contains columns \code{col_ab} and \code{col_date}
-#' @param col_ab column name of \code{tbl} with antimicrobial interpretations (\code{R}, \code{I} and \code{S})
-#' @param col_date column name of the date, will be used to calculate years
+#' @param col_ab column name of \code{tbl} with antimicrobial interpretations (\code{R}, \code{I} and \code{S}), supports tidyverse-like quotation
+#' @param col_date column name of the date, will be used to calculate years if this column doesn't consist of years already, supports tidyverse-like quotation
 #' @param year_max highest year to use in the prediction model, deafults to 15 years after today
 #' @param year_every unit of sequence between lowest year found in the data and \code{year_max}
 #' @param model the statistical model of choice. Valid values are \code{"binomial"} (or \code{"binom"} or \code{"logit"}) or \code{"loglin"} or \code{"linear"} (or \code{"lin"}).
@ -255,24 +255,41 @@ rsi <- function(ab1, ab2 = NA, interpretation = 'IR', minimum = 30, percent = FA
 #' \dontrun{
 #' # use it directly:
 #' rsi_predict(tbl = tbl[which(first_isolate == TRUE & genus == "Haemophilus"),],
-#'             col_ab = "amcl", coldate = "date")
+#'             col_ab = "amcl", col_date = "date")
 #'   
 #' # or with dplyr so you can actually read it:
 #' library(dplyr)
 #' tbl %>%
 #'   filter(first_isolate == TRUE,
 #'          genus == "Haemophilus") %>%
-#'   rsi_predict(col_ab = "amcl", coldate = "date")
-#'
-#' tbl %>%
-#'   filter(first_isolate_weighted == TRUE,
-#'          genus == "Haemophilus") %>%
-#'   rsi_predict(col_ab = "amcl",
-#'               coldate = "date",
-#'               year_max = 2050,
-#'               year_every = 5)
-#'
+#'   rsi_predict(amcl, date)
 #' }
+#'
+#'
+#' # real live example:
+#' library(dplyr)
+#' septic_patients %>%
+#'   # get bacteria properties like genus and species
+#'   left_join_bactlist("bactid") %>% 
+#'   # calculate first isolates
+#'   mutate(first_isolate = 
+#'            first_isolate(.,
+#'                          "date",
+#'                          "patient_id",
+#'                          "genus",
+#'                          "species",
+#'                          col_specimen = NA,
+#'                          col_icu = NA)) %>% 
+#'   # filter on first E. coli isolates
+#'   filter(genus == "Escherichia", 
+#'          species == "coli", 
+#'          first_isolate == TRUE) %>%
+#'   # predict resistance of cefotaxime for next years
+#'   rsi_predict(col_ab = cfot,
+#'               col_date = date,
+#'               year_max = 2025,
+#'               preserve_measurements = FALSE)
+#'
 rsi_predict <- function(tbl,
                        col_ab,
                        col_date,
@ -283,12 +300,33 @@ rsi_predict <- function(tbl,
                        preserve_measurements = TRUE,
                        info = TRUE) {
  
+  col_ab <- quasiquotate(deparse(substitute(col_ab)), col_ab)
+  if (!col_ab %in% colnames(tbl)) {
+    stop('Column ', col_ab, ' not found.')
+  }
+  col_date <- quasiquotate(deparse(substitute(col_date)), col_date)
+  if (!col_date %in% colnames(tbl)) {
+    stop('Column ', col_date, ' not found.')
+  }
+  if ('grouped_df' %in% class(tbl)) {
+    # no grouped tibbles please, mutate will throw errors
+    tbl <- base::as.data.frame(tbl, stringsAsFactors = FALSE)
+  }
+
  if (I_as_R == TRUE) {
    tbl[, col_ab] <- gsub('I', 'R', tbl %>% pull(col_ab))
  }
+
+  if (!all(tbl %>% pull(col_ab) %>% as.rsi() %in% c(NA, 'S', 'I', 'R'))) {
+    stop('Column ', col_ab, ' must contain antimicrobial interpretations (S, I, R).')
+  }
  
  year <- function(x) {
-    as.integer(format(as.Date(x), '%Y'))
+    if (all(grepl('^[0-9]{4}$', x))) {
+      x
+    } else {
+      as.integer(format(as.Date(x), '%Y'))
+    }
  }
  
  years_predict <- seq(from = min(year(tbl %>% pull(col_date))), to = year_max, by = year_every)
--- a/data/septic_patients.rda
+++ b/data/septic_patients.rda
--- a/man/first_isolate.Rd
+++ b/man/first_isolate.Rd
@ -7,7 +7,7 @@
 first_isolate(tbl, col_date, col_patient_id, col_genus, col_species,
  col_testcode = NA, col_specimen, col_icu, col_keyantibiotics = NA,
  episode_days = 365, testcodes_exclude = "", icu_exclude = FALSE,
-  filter_specimen = NA, output_logical = TRUE, ignore_I = TRUE,
+  filter_specimen = NA, output_logical = TRUE, points_threshold = 2,
  info = TRUE)
 }
 \arguments{
@ -21,7 +21,7 @@ first_isolate(tbl, col_date, col_patient_id, col_genus, col_species,

 \item{col_species}{column name of the species of the microorganisms}

-\item{col_testcode}{column name of the test codes, see Details}
+\item{col_testcode}{column name of the test codes. Use \code{col_testcode = NA} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored.}

 \item{col_specimen}{column name of the specimen type or group}

@ -39,7 +39,7 @@ first_isolate(tbl, col_date, col_patient_id, col_genus, col_species,

 \item{output_logical}{return output as \code{logical} (will else the values \code{0} or \code{1})}

-\item{ignore_I}{ignore \code{"I"} as antimicrobial interpretation of key antibiotics (with \code{FALSE}, changes in antibiograms from S to I and I to R will be interpreted as difference)}
+\item{points_threshold}{points until the comparison of key antibiotics will lead to inclusion of an isolate, see Details}

 \item{info}{print progress}
 }
@ -50,9 +50,10 @@ A vector to add to table, see Examples.
 Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type.
 }
 \details{
-To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that is was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be selection bias.
-
-    Use \code{col_testcode = NA} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored.
+\strong{Why this is so important} \cr
+    To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https://www.ncbi.nlm.nih.gov/pubmed/17304462}{[1]}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}.
+    \strong{\code{points_threshold}} \cr
+    To compare key antibiotics, the difference between antimicrobial interpretations will be measured. A difference from I to S|R (or vice versa) means 0.5 points. A difference from S to R (or vice versa) means 1 point. When the sum of points exceeds \code{points_threshold}, an isolate will be (re)selected as a first weighted isolate.
 }
 \examples{
 \dontrun{
--- a/man/join.Rd
+++ b/man/join.Rd
@ -13,22 +13,22 @@
 \usage{
 inner_join_bactlist(x, by = "bactid", ...)

-left_join_bactlist(x, by = "bacteriecode", ...)
+left_join_bactlist(x, by = "bactid", ...)

-right_join_bactlist(x, by = "bacteriecode", ...)
+right_join_bactlist(x, by = "bactid", ...)

-full_join_bactlist(x, by = "bacteriecode", ...)
+full_join_bactlist(x, by = "bactid", ...)

-semi_join_bactlist(x, by = "bacteriecode", ...)
+semi_join_bactlist(x, by = "bactid", ...)

-anti_join_bactlist(x, by = "bacteriecode", ...)
+anti_join_bactlist(x, by = "bactid", ...)
 }
 \arguments{
 \item{x}{existing table to join}

 \item{by}{a variable to join by - could be a column name of \code{x} with values that exist in \code{bactlist$bactid} (like \code{by = "bacteria_id"}), or another column in \code{\link{bactlist}} (but then it should be named, like \code{by = c("my_genus_species" = "fullname")})}

-\item{...}{other parameters to pass trhough to \code{dplyr::\link[dplyr]{join}}.}
+\item{...}{other parameters to pass on to \code{dplyr::\link[dplyr]{join}}.}
 }
 \description{
 Join the list of microorganisms \code{\link{bactlist}} easily to an existing table.
--- a/man/rsi_predict.Rd
+++ b/man/rsi_predict.Rd
@ -12,9 +12,9 @@ rsi_predict(tbl, col_ab, col_date,
 \arguments{
 \item{tbl}{table that contains columns \code{col_ab} and \code{col_date}}

-\item{col_ab}{column name of \code{tbl} with antimicrobial interpretations (\code{R}, \code{I} and \code{S})}
+\item{col_ab}{column name of \code{tbl} with antimicrobial interpretations (\code{R}, \code{I} and \code{S}), supports tidyverse-like quotation}

-\item{col_date}{column name of the date, will be used to calculate years}
+\item{col_date}{column name of the date, will be used to calculate years if this column doesn't consist of years already, supports tidyverse-like quotation}

 \item{year_max}{highest year to use in the prediction model, deafults to 15 years after today}

@ -32,30 +32,47 @@ rsi_predict(tbl, col_ab, col_date,
 \code{data.frame} with columns \code{year}, \code{probR}, \code{se_min} and \code{se_max}.
 }
 \description{
-Create a prediction model to predict antimicrobial resistance for the next years on statistical solid ground. Standard errors (SE) will be returned as columns \code{se_min} and \code{se_max}.
+Create a prediction model to predict antimicrobial resistance for the next years on statistical solid ground. Standard errors (SE) will be returned as columns \code{se_min} and \code{se_max}. See Examples for a real live example.
 }
 \examples{
 \dontrun{
 # use it directly:
 rsi_predict(tbl = tbl[which(first_isolate == TRUE & genus == "Haemophilus"),],
-            col_ab = "amcl", coldate = "date")
+            col_ab = "amcl", col_date = "date")
  
 # or with dplyr so you can actually read it:
 library(dplyr)
 tbl \%>\%
  filter(first_isolate == TRUE,
         genus == "Haemophilus") \%>\%
-  rsi_predict(col_ab = "amcl", coldate = "date")
-
-tbl \%>\%
-  filter(first_isolate_weighted == TRUE,
-         genus == "Haemophilus") \%>\%
-  rsi_predict(col_ab = "amcl",
-              coldate = "date",
-              year_max = 2050,
-              year_every = 5)
-
+  rsi_predict(amcl, date)
 }
+
+
+# real live example:
+library(dplyr)
+septic_patients \%>\%
+  # get bacteria properties like genus and species
+  left_join_bactlist("bactid") \%>\% 
+  # calculate first isolates
+  mutate(first_isolate = 
+           first_isolate(.,
+                         "date",
+                         "patient_id",
+                         "genus",
+                         "species",
+                         col_specimen = NA,
+                         col_icu = NA)) \%>\% 
+  # filter on first E. coli isolates
+  filter(genus == "Escherichia", 
+         species == "coli", 
+         first_isolate == TRUE) \%>\%
+  # predict resistance of cefotaxime for next years
+  rsi_predict(col_ab = cfot,
+              col_date = date,
+              year_max = 2025,
+              preserve_measurements = FALSE)
+
 }
 \seealso{
 \code{\link{lm}} \cr \code{\link{glm}}
--- a/man/septic_patients.Rd
+++ b/man/septic_patients.Rd
@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{septic_patients}
+\alias{septic_patients}
+\title{Dataset with 2000 blood culture isolates of septic patients}
+\format{A data.frame with 2000 observations and 47 variables:
+\describe{
+  \item{\code{date}}{date of receipt at the laboratory}
+  \item{\code{hospital_id}}{ID of the hospital}
+  \item{\code{ward_icu}}{logical to determine if ward is an intensive care unit}
+  \item{\code{ward_clinical}}{logical to determine if ward is a regular clinical ward}
+  \item{\code{ward_outpatient}}{logical to determine if ward is an outpatient clinic}
+  \item{\code{age}}{age of the patient}
+  \item{\code{sex}}{sex of the patient}
+  \item{\code{patient_id}}{ID of the patient, first 10 characters of an SHA hash containing irretrievable information}
+  \item{\code{bactid}}{ID of microorganism, see \code{\link{bactlist}}}
+  \item{\code{peni:mupi}}{38 different antibiotics with class \code{rsi} (see \code{\link{as.rsi}}), these column names occur in \code{\link{ablist}} and can be translated with \code{\link{abname}}}
+}}
+\source{
+MOLIS (LIS of Certe) - \url{https://www.certe.nl}
+}
+\usage{
+septic_patients
+}
+\description{
+An anonymised dataset containing 2000 microbial blood culture isolates with their antibiogram of septic patients found in 5 different hospitals in the Netherlands, between 2001 and 2017. This data.frame can be used to practice AMR analysis e.g. with \code{\link{rsi}} or \code{\link{rsi_predict}}, or it can be used to practice other statistics.
+}
+\keyword{datasets}