From 363218da7e87e548ef13e7fde2acb4b701e1385b Mon Sep 17 00:00:00 2001
From: "Matthijs S. Berends" <berendsms@gmail.com>
Date: Tue, 17 Nov 2020 16:57:41 +0100
Subject: [PATCH] (v1.4.0.9024) is_new_episode()

---
 .github/workflows/check.yaml        |   8 +-
 DESCRIPTION                         |   2 +-
 NAMESPACE                           |   1 +
 NEWS.md                             |  10 +-
 R/aa_helper_functions.R             |   4 +
 R/first_isolate.R                   | 157 ++++++++++++++++++++++++----
 docs/404.html                       |   2 +-
 docs/LICENSE-text.html              |   2 +-
 docs/articles/index.html            |   2 +-
 docs/authors.html                   |   2 +-
 docs/index.html                     |   2 +-
 docs/news/index.html                |   9 +-
 docs/pkgdown.yml                    |   2 +-
 docs/reference/first_isolate.html   |  76 +++++++++++---
 docs/reference/index.html           |   4 +-
 docs/survey.html                    |   2 +-
 man/first_isolate.Rd                |  71 +++++++++++--
 tests/testthat/test-_misc.R         |  37 -------
 tests/testthat/test-first_isolate.R |  11 ++
 tests/testthat/test-zzz.R           |  69 ++++++++++++
 20 files changed, 379 insertions(+), 94 deletions(-)
 create mode 100644 tests/testthat/test-zzz.R
diff --git a/.github/workflows/check.yaml b/.github/workflows/check.yaml
index 1088df41e..50f6771e6 100644
--- a/.github/workflows/check.yaml
+++ b/.github/workflows/check.yaml
@@ -64,6 +64,7 @@ jobs:
           - {os: ubuntu-16.04,   r: '3.5',     allowfail: false, rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
           - {os: ubuntu-16.04,   r: '3.4',     allowfail: true,  rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
           - {os: ubuntu-16.04,   r: '3.3',     allowfail: true,  rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
+          - {os: ubuntu-16.04,   r: '3.2',     allowfail: true,  rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
           # older R versions cannot be tested, since tidyverse only supports last 4 R x.x versions
     env:
       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
@@ -120,14 +121,15 @@ jobs:
         shell: Rscript {0}
 
       - name: Check on older R versions
-        if: matrix.config.r == '3.3'
+        # no vignettes here, since they rely on R 3.3 and higher
+        if: matrix.config.r == '3.2'
         env:
           _R_CHECK_CRAN_INCOMING_: false
-        run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran", "--no-build-vignettes" , "--ignore-vignettes"), error_on = "warning", check_dir = "check")
+        run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran", "--ignore-vignettes"), build_args = "--no-build-vignettes" , error_on = "warning", check_dir = "check")
         shell: Rscript {0}
         
       - name: Check on newer R versions
-        if: matrix.config.r != '3.3'
+        if: matrix.config.r != '3.2'
         env:
           _R_CHECK_CRAN_INCOMING_: false
         run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
diff --git a/DESCRIPTION b/DESCRIPTION
index f30c3e96f..45d1100af 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: AMR
-Version: 1.4.0.9023
+Version: 1.4.0.9024
 Date: 2020-11-17
 Title: Antimicrobial Resistance Analysis
 Authors@R: c(
diff --git a/NAMESPACE b/NAMESPACE
index 7605a88e3..1467837be 100755
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -152,6 +152,7 @@ export(is.mic)
 export(is.mo)
 export(is.rsi)
 export(is.rsi.eligible)
+export(is_new_episode)
 export(key_antibiotics)
 export(key_antibiotics_equal)
 export(kurtosis)
diff --git a/NEWS.md b/NEWS.md
index a80c8cb24..5b99ed0a1 100755
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,7 +1,15 @@
-# AMR 1.4.0.9023
+# AMR 1.4.0.9024
 ## <small>Last updated: 17 November 2020</small>
 
 ### New
+* Function `is_new_episode()` to determine patient episodes which are not necessarily based on microorganisms. It also supports grouped variables with e.g. `mutate()` and `summarise()` of the `dplyr` package:
+   ```r
+   example_isolates %>%
+     group_by(hospital_id) %>% 
+     summarise(patients = n_distinct(patient_id),
+               n_episodes_365 = sum(is_new_episode(episode_days = 365)),
+               n_episodes_60 = sum(is_new_episode(episode_days = 60)))
+  ```
 * Functions `mo_is_gram_negative()` and `mo_is_gram_positive()` as wrappers around `mo_gramstain()`. They always return `TRUE` or `FALSE` (except when the input is `NA` or the MO code is `UNKNOWN`), thus always return `FALSE` for species outside the taxonomic kingdom of Bacteria. If you have the `dplyr` package installed, they can even determine the column with microorganisms themselves when used inside `dplyr` verbs:
   ```r
   example_isolates %>%
diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R
index e53e1650b..2c70d9eb0 100755
--- a/R/aa_helper_functions.R
+++ b/R/aa_helper_functions.R
@@ -139,9 +139,13 @@ check_dataset_integrity <- function() {
 }
 
 search_type_in_df <- function(x, type, info = TRUE) {
+  meet_criteria(x, allow_class = "data.frame")
+  meet_criteria(type, allow_class = "character", has_length = 1)
+  
   # try to find columns based on type
   found <- NULL
 
+  # remove attributes from other packages
   x <- as.data.frame(x, stringsAsFactors = FALSE)
   colnames(x) <- trimws(colnames(x))
 
diff --git a/R/first_isolate.R b/R/first_isolate.R
index 7f537074b..99557b250 100755
--- a/R/first_isolate.R
+++ b/R/first_isolate.R
@@ -25,10 +25,10 @@
 
 #' Determine first (weighted) isolates
 #'
-#' Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type.
+#' Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type. To determine patient episodes not necessarily based on microorganisms, use [is_new_episode()] that also supports grouping with the `dplyr` package, see *Examples*.
 #' @inheritSection lifecycle Stable lifecycle
-#' @param x a [data.frame] containing isolates.
-#' @param col_date column name of the result date (or date that is was received on the lab), defaults to the first column of with a date class
+#' @param x,.data a [data.frame] containing isolates.
+#' @param col_date column name of the result date (or date that is was received on the lab), defaults to the first column with a date class
 #' @param col_patient_id column name of the unique IDs of the patients, defaults to the first column that starts with 'patient' or 'patid' (case insensitive)
 #' @param col_mo column name of the IDs of the microorganisms (see [as.mo()]), defaults to the first column of class [`mo`]. Values will be coerced using [as.mo()].
 #' @param col_testcode column name of the test codes. Use `col_testcode = NULL` to **not** exclude certain test codes (like test codes for screening). In that case `testcodes_exclude` will be ignored.
@@ -45,17 +45,26 @@
 #' @param info print progress
 #' @param include_unknown logical to determine whether 'unknown' microorganisms should be included too, i.e. microbial code `"UNKNOWN"`, which defaults to `FALSE`. For WHONET users, this means that all records with organism code `"con"` (*contamination*) will be excluded at default. Isolates with a microbial ID of `NA` will always be excluded as first isolate.
 #' @param ... parameters passed on to the [first_isolate()] function
-#' @details **WHY THIS IS SO IMPORTANT** \cr
+#' @details The [is_new_episode()] function is a wrapper around the [first_isolate()] function and can be used for data sets without isolates to just determine patient episodes based on any combination of grouping variables (using `dplyr`), please see *Examples*. Since it runs [first_isolate()] for every group, it is quite slow.
+#' 
+#' All isolates with a microbial ID of `NA` will be excluded as first isolate.
+#' 
+#' ### Why this is so important
 #' To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode [(ref)](https:/pubmed.ncbi.nlm.nih.gov/17304462/). If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all *S. aureus* isolates would be overestimated, because you included this MRSA more than once. It would be [selection bias](https://en.wikipedia.org/wiki/Selection_bias).
 #'
-#' All isolates with a microbial ID of `NA` will be excluded as first isolate.
+#' ### `filter_*()` shortcuts
 #'
-#' The functions [filter_first_isolate()] and [filter_first_weighted_isolate()] are helper functions to quickly filter on first isolates. The function [filter_first_isolate()] is essentially equal to either:
+#' The functions [filter_first_isolate()] and [filter_first_weighted_isolate()] are helper functions to quickly filter on first isolates.
+#' 
+#' The function [filter_first_isolate()] is essentially equal to either:
+#' 
 #' ```
 #'   x[first_isolate(x, ...), ]
 #'   x %>% filter(first_isolate(x, ...))
 #' ```
+#' 
 #' The function [filter_first_weighted_isolate()] is essentially equal to:
+#' 
 #' ```
 #'   x %>%
 #'     mutate(keyab = key_antibiotics(.)) %>%
@@ -89,21 +98,22 @@
 #' # basic filtering on first isolates
 #' example_isolates[first_isolate(example_isolates), ]
 #' 
+#' # filtering based on isolates ----------------------------------------------
 #' \donttest{
 #' if (require("dplyr")) {
-#'   # Filter on first isolates:
+#'   # filter on first isolates:
 #'   example_isolates %>%
 #'     mutate(first_isolate = first_isolate(.)) %>%
 #'     filter(first_isolate == TRUE)
 #'  
-#'   # Short-hand versions:
+#'   # short-hand versions:
 #'   example_isolates %>%
 #'     filter_first_isolate()
 #'     
 #'   example_isolates %>%
 #'     filter_first_weighted_isolate()
 #'   
-#'   # Now let's see if first isolates matter:
+#'   # now let's see if first isolates matter:
 #'   A <- example_isolates %>%
 #'     group_by(hospital_id) %>%
 #'     summarise(count = n_rsi(GEN),            # gentamicin availability
@@ -120,6 +130,42 @@
 #'   # Gentamicin resistance in hospital D appears to be 3.7% higher than
 #'   # when you (erroneously) would have used all isolates for analysis.
 #' }
+#' 
+#' # filtering based on any other condition -----------------------------------
+#' 
+#' if (require("dplyr")) {
+#'   # is_new_episode() can be used in dplyr verbs to determine patient
+#'   # episodes based on any (combination of) grouping variables:
+#'   example_isolates %>%
+#'     mutate(condition = sample(x = c("A", "B", "C"), 
+#'                               size = 2000,
+#'                               replace = TRUE)) %>% 
+#'     group_by(condition) %>%
+#'     mutate(new_episode = is_new_episode())
+#'   
+#'   example_isolates %>%
+#'     group_by(hospital_id) %>% 
+#'     summarise(patients = n_distinct(patient_id),
+#'               n_episodes_365 = sum(is_new_episode(episode_days = 365)),
+#'               n_episodes_60  = sum(is_new_episode(episode_days = 60)),
+#'               n_episodes_30  = sum(is_new_episode(episode_days = 30)))
+#'     
+#'     
+#'   # grouping on microorganisms leads to the same results as first_isolate():
+#'   x <- example_isolates %>%
+#'     filter_first_isolate(include_unknown = TRUE)
+#'     
+#'   y <- example_isolates %>%
+#'     group_by(mo) %>%
+#'     filter(is_new_episode())
+#' 
+#'   identical(x$patient_id, y$patient_id)
+#'   
+#'   # but now you can group on isolates and many more:
+#'   example_isolates %>%
+#'     group_by(mo, hospital_id, ward_icu) %>%
+#'     mutate(flag_episode = is_new_episode())
+#' }
 #' }
 first_isolate <- function(x,
                           col_date = NULL,
@@ -139,7 +185,7 @@ first_isolate <- function(x,
                           info = interactive(),
                           include_unknown = FALSE,
                           ...) {
-  meet_criteria(x, allow_class = "data.frame")
+  meet_criteria(x, allow_class = "data.frame") # also checks dimensions to be >0
   meet_criteria(col_date, allow_class = "character", has_length = 1, allow_NULL = TRUE, is_in = colnames(x))
   meet_criteria(col_patient_id, allow_class = "character", has_length = 1, allow_NULL = TRUE, is_in = colnames(x))
   meet_criteria(col_mo, allow_class = "character", has_length = 1, allow_NULL = TRUE, is_in = colnames(x))
@@ -175,13 +221,10 @@ first_isolate <- function(x,
     }
   }
   
-  stop_ifnot(is.data.frame(x), "`x` must be a data.frame")
-  stop_if(any(dim(x) == 0), "`x` must contain rows and columns")
-  
   # remove data.table, grouping from tibbles, etc.
   x <- as.data.frame(x, stringsAsFactors = FALSE)
   
-  # try to find columns based on type
+ # try to find columns based on type
   # -- mo
   if (is.null(col_mo)) {
     col_mo <- search_type_in_df(x = x, type = "mo")
@@ -299,13 +342,32 @@ first_isolate <- function(x,
     )
   }
   
-  # no isolates found
+  # speed up - return immediately if obvious
   if (abs(row.start) == Inf | abs(row.end) == Inf) {
     if (info == TRUE) {
-      message_("=> Found ", font_bold("no isolates"), as_note = FALSE)
+      message_("=> Found ", font_bold("no isolates"),
+               add_fn = font_black, 
+               as_note = FALSE)
     }
     return(rep(FALSE, nrow(x)))
   }
+  if (row.start == row.end) {
+    if (info == TRUE) {
+      message_("=> Found ", font_bold("1 isolate"), ", as the data only contained 1 row", 
+               add_fn = font_black,
+               as_note = FALSE)
+    }
+    return(TRUE)
+  }
+  if (length(c(row.start:row.end)) == pm_n_distinct(x[c(row.start:row.end), col_mo, drop = TRUE])) {
+    if (info == TRUE) {
+      message_("=> Found ", font_bold(paste(length(c(row.start:row.end)), "isolates")),
+               ", as all isolates were different microorganisms",
+               add_fn = font_black,
+               as_note = FALSE)
+    }
+    return(rep(TRUE, length(c(row.start:row.end))))
+  }
   
   # did find some isolates - add new index numbers of rows
   x$newvar_row_index_sorted <- seq_len(nrow(x))
@@ -511,7 +573,66 @@ filter_first_weighted_isolate <- function(x,
   subset(x, first_isolate(x = y,
                           col_date = col_date,
                           col_patient_id = col_patient_id,
-                          col_mo = col_mo,
-                          col_keyantibiotics = col_keyantibiotics,
                           ...))
 }
+
+#' @rdname first_isolate
+#' @export
+is_new_episode <- function(.data,
+                           episode_days = 365,
+                           col_date = NULL,
+                           col_patient_id = NULL) {
+  if (missing(.data)) {
+    # look it up - this also supports grouping variables
+    cur_data <- import_fn("cur_data", "dplyr", error_on_fail = FALSE)
+    if (is.null(cur_data)) {
+      stop_("parameter '.data' not set.")
+    }
+    .data <- cur_data()
+  }
+  meet_criteria(.data, allow_class = "data.frame") # also checks dimensions to be >0
+  meet_criteria(col_date, allow_class = "character", has_length = 1, allow_NULL = TRUE, is_in = colnames(x))
+  meet_criteria(col_patient_id, allow_class = "character", has_length = 1, allow_NULL = TRUE, is_in = colnames(x))
+  meet_criteria(episode_days, allow_class = c("numeric", "integer"), has_length = 1)
+  
+  # get i'th ID of group, so notices will only be thrown once
+  cur_group_id <- import_fn("cur_group_id", "dplyr", error_on_fail = FALSE)
+  first_group <- tryCatch(is.null(cur_group_id) || cur_group_id() == 1,
+                          error = function(e) TRUE)
+
+  # try to find columns based on type
+  # -- date
+  if (is.null(col_date)) {
+    col_date <- search_type_in_df(x = .data,
+                                  type = "date",
+                                  info = first_group)
+    stop_if(is.null(col_date), "`col_date` must be set")
+  }
+  
+  # -- patient id
+  if (is.null(col_patient_id)) {
+    if (all(c("First name", "Last name", "Sex") %in% colnames(.data))) {
+      # WHONET support
+      .data$patient_id <- paste(.data$`First name`, .data$`Last name`, .data$Sex)
+      col_patient_id <- "patient_id"
+      if (is.null(cur_group_id) || cur_group_id() == 1) {
+        message_("Using combined columns `", font_bold("First name"), "`, `", font_bold("Last name"), "` and `", font_bold("Sex"), "` as input for `col_patient_id`")
+      }
+    } else {
+      col_patient_id <- search_type_in_df(x = .data,
+                                          type = "patient_id",
+                                          info = first_group)
+    }
+    stop_if(is.null(col_patient_id), "`col_patient_id` must be set")
+  }
+  
+  # create any random mo, so first isolates can be calculated
+  .data$a94a8fe5 <- as.mo("Escherichia coli")
+  
+  first_isolate(.data,
+                col_date = col_date,
+                col_patient_id = col_patient_id,
+                episode_days = episode_days,
+                col_mo = "a94a8fe5",
+                info = FALSE)
+}
diff --git a/docs/404.html b/docs/404.html
index 366719f6a..edc34440f 100644
--- a/docs/404.html
+++ b/docs/404.html
@@ -81,7 +81,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="https://msberends.github.io/AMR//index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9023</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
       </span>
     </div>
 
diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html
index 05ebc80cb..8c61edb99 100644
--- a/docs/LICENSE-text.html
+++ b/docs/LICENSE-text.html
@@ -81,7 +81,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9023</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
       </span>
     </div>
 
diff --git a/docs/articles/index.html b/docs/articles/index.html
index b815de3d1..0e812de69 100644
--- a/docs/articles/index.html
+++ b/docs/articles/index.html
@@ -81,7 +81,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9023</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
       </span>
     </div>
 
diff --git a/docs/authors.html b/docs/authors.html
index c8229f1c3..cf09397b8 100644
--- a/docs/authors.html
+++ b/docs/authors.html
@@ -81,7 +81,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9023</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
       </span>
     </div>
 
diff --git a/docs/index.html b/docs/index.html
index 07cdd4a81..60f4a7ffb 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -43,7 +43,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9023</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
       </span>
     </div>
 
diff --git a/docs/news/index.html b/docs/news/index.html
index fc57574e6..13e22ebe2 100644
--- a/docs/news/index.html
+++ b/docs/news/index.html
@@ -81,7 +81,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9023</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
       </span>
     </div>
 
@@ -236,9 +236,9 @@
       <small>Source: <a href='https://github.com/msberends/AMR/blob/master/NEWS.md'><code>NEWS.md</code></a></small>
     </div>
 
-    <div id="amr-1409023" class="section level1">
-<h1 class="page-header" data-toc-text="1.4.0.9023">
-<a href="#amr-1409023" class="anchor"></a>AMR 1.4.0.9023<small> Unreleased </small>
+    <div id="amr-1409024" class="section level1">
+<h1 class="page-header" data-toc-text="1.4.0.9024">
+<a href="#amr-1409024" class="anchor"></a>AMR 1.4.0.9024<small> Unreleased </small>
 </h1>
 <div id="last-updated-17-november-2020" class="section level2">
 <h2 class="hasAnchor">
@@ -248,6 +248,7 @@
 <h3 class="hasAnchor">
 <a href="#new" class="anchor"></a>New</h3>
 <ul>
+<li><p>Function <code><a href="../reference/first_isolate.html">is_new_episode()</a></code> to determine patient episodes which are not necessarily based on microorganisms. It also supports grouped variables with e.g. <code><a href="https://dplyr.tidyverse.org/reference/mutate.html">mutate()</a></code> and <code><a href="https://dplyr.tidyverse.org/reference/summarise.html">summarise()</a></code> of the <code>dplyr</code> package: <code>r  example_isolates %&gt;%    group_by(hospital_id) %&gt;%     summarise(patients = n_distinct(patient_id),              n_episodes_365 = sum(is_new_episode(episode_days = 365)),              n_episodes_60 = sum(is_new_episode(episode_days = 60)))</code></p></li>
 <li>
 <p>Functions <code><a href="../reference/mo_property.html">mo_is_gram_negative()</a></code> and <code><a href="../reference/mo_property.html">mo_is_gram_positive()</a></code> as wrappers around <code><a href="../reference/mo_property.html">mo_gramstain()</a></code>. They always return <code>TRUE</code> or <code>FALSE</code> (except when the input is <code>NA</code> or the MO code is <code>UNKNOWN</code>), thus always return <code>FALSE</code> for species outside the taxonomic kingdom of Bacteria. If you have the <code>dplyr</code> package installed, they can even determine the column with microorganisms themselves when used inside <code>dplyr</code> verbs:</p>
 <div class="sourceCode" id="cb1"><pre class="downlit">
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml
index 3618acfe1..4884f74af 100644
--- a/docs/pkgdown.yml
+++ b/docs/pkgdown.yml
@@ -12,7 +12,7 @@ articles:
   datasets: datasets.html
   resistance_predict: resistance_predict.html
   welcome_to_AMR: welcome_to_AMR.html
-last_built: 2020-11-17T10:53Z
+last_built: 2020-11-17T15:56Z
 urls:
   reference: https://msberends.github.io/AMR//reference
   article: https://msberends.github.io/AMR//articles
diff --git a/docs/reference/first_isolate.html b/docs/reference/first_isolate.html
index a8c060b95..1dcba621e 100644
--- a/docs/reference/first_isolate.html
+++ b/docs/reference/first_isolate.html
@@ -49,7 +49,7 @@
   <script src="../extra.js"></script>
 
 <meta property="og:title" content="Determine first (weighted) isolates — first_isolate" />
-<meta property="og:description" content="Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type." />
+<meta property="og:description" content="Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type. To determine patient episodes not necessarily based on microorganisms, use is_new_episode() that also supports grouping with the dplyr package, see Examples." />
 <meta property="og:image" content="https://msberends.github.io/AMR/logo.png" />
 
 
@@ -82,7 +82,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
       </span>
     </div>
 
@@ -239,7 +239,7 @@
     </div>
 
     <div class="ref-description">
-    <p>Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type.</p>
+    <p>Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type. To determine patient episodes not necessarily based on microorganisms, use <code>is_new_episode()</code> that also supports grouping with the <code>dplyr</code> package, see <em>Examples</em>.</p>
     </div>
 
     <pre class="usage"><span class='fu'>first_isolate</span><span class='op'>(</span>
@@ -278,18 +278,25 @@
   col_mo <span class='op'>=</span> <span class='cn'>NULL</span>,
   col_keyantibiotics <span class='op'>=</span> <span class='cn'>NULL</span>,
   <span class='va'>...</span>
+<span class='op'>)</span>
+
+<span class='fu'>is_new_episode</span><span class='op'>(</span>
+  <span class='va'>.data</span>,
+  episode_days <span class='op'>=</span> <span class='fl'>365</span>,
+  col_date <span class='op'>=</span> <span class='cn'>NULL</span>,
+  col_patient_id <span class='op'>=</span> <span class='cn'>NULL</span>
 <span class='op'>)</span></pre>
 
     <h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
     <table class="ref-arguments">
     <colgroup><col class="name" /><col class="desc" /></colgroup>
     <tr>
-      <th>x</th>
+      <th>x, .data</th>
       <td><p>a <a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a> containing isolates.</p></td>
     </tr>
     <tr>
       <th>col_date</th>
-      <td><p>column name of the result date (or date that is was received on the lab), defaults to the first column of with a date class</p></td>
+      <td><p>column name of the result date (or date that is was received on the lab), defaults to the first column with a date class</p></td>
     </tr>
     <tr>
       <th>col_patient_id</th>
@@ -366,10 +373,17 @@
     <p>A <code><a href='https://rdrr.io/r/base/logical.html'>logical</a></code> vector</p>
     <h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>
 
-    <p><strong>WHY THIS IS SO IMPORTANT</strong> <br />
-To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode <a href='https:/pubmed.ncbi.nlm.nih.gov/17304462/'>(ref)</a>. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all <em>S. aureus</em> isolates would be overestimated, because you included this MRSA more than once. It would be <a href='https://en.wikipedia.org/wiki/Selection_bias'>selection bias</a>.</p>
-<p>All isolates with a microbial ID of <code>NA</code> will be excluded as first isolate.</p>
-<p>The functions <code>filter_first_isolate()</code> and <code>filter_first_weighted_isolate()</code> are helper functions to quickly filter on first isolates. The function <code>filter_first_isolate()</code> is essentially equal to either:</p><pre>  <span class='va'>x</span><span class='op'>[</span><span class='fu'>first_isolate</span><span class='op'>(</span><span class='va'>x</span>, <span class='va'>...</span><span class='op'>)</span>, <span class='op'>]</span>
+    <p>The <code>is_new_episode()</code> function is a wrapper around the <code>first_isolate()</code> function and can be used for data sets without isolates to just determine patient episodes based on any combination of grouping variables (using <code>dplyr</code>), please see <em>Examples</em>. Since it runs <code>first_isolate()</code> for every group, it is quite slow.</p>
+<p>All isolates with a microbial ID of <code>NA</code> will be excluded as first isolate.</p><h3 class='hasAnchor' id='arguments'><a class='anchor' href='#arguments'></a>Why this is so important</h3>
+
+
+<p>To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode <a href='https:/pubmed.ncbi.nlm.nih.gov/17304462/'>(ref)</a>. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all <em>S. aureus</em> isolates would be overestimated, because you included this MRSA more than once. It would be <a href='https://en.wikipedia.org/wiki/Selection_bias'>selection bias</a>.</p>
+
+<h3 class='hasAnchor' id='arguments'><a class='anchor' href='#arguments'></a><code>filter_*()</code> shortcuts</h3>
+
+
+<p>The functions <code>filter_first_isolate()</code> and <code>filter_first_weighted_isolate()</code> are helper functions to quickly filter on first isolates.</p>
+<p>The function <code>filter_first_isolate()</code> is essentially equal to either:</p><pre>  <span class='va'>x</span><span class='op'>[</span><span class='fu'>first_isolate</span><span class='op'>(</span><span class='va'>x</span>, <span class='va'>...</span><span class='op'>)</span>, <span class='op'>]</span>
   <span class='va'>x</span> <span class='op'>%&gt;%</span> <span class='fu'><a href='https://dplyr.tidyverse.org/reference/filter.html'>filter</a></span><span class='op'>(</span><span class='fu'>first_isolate</span><span class='op'>(</span><span class='va'>x</span>, <span class='va'>...</span><span class='op'>)</span><span class='op'>)</span>
 </pre>
 
@@ -381,6 +395,7 @@ To conduct an analysis of antimicrobial resistance, you should only include the
     <span class='fu'><a href='https://dplyr.tidyverse.org/reference/select.html'>select</a></span><span class='op'>(</span><span class='op'>-</span><span class='va'>only_weighted_firsts</span>, <span class='op'>-</span><span class='va'>keyab</span><span class='op'>)</span>
 </pre>
 
+
     <h2 class="hasAnchor" id="key-antibiotics"><a class="anchor" href="#key-antibiotics"></a>Key antibiotics</h2>
 
     
@@ -415,21 +430,22 @@ The <a href='lifecycle.html'>lifecycle</a> of this function is <strong>stable</s
 <span class='co'># basic filtering on first isolates</span>
 <span class='va'>example_isolates</span><span class='op'>[</span><span class='fu'>first_isolate</span><span class='op'>(</span><span class='va'>example_isolates</span><span class='op'>)</span>, <span class='op'>]</span>
 
+<span class='co'># filtering based on isolates ----------------------------------------------</span>
 <span class='co'># \donttest{</span>
 <span class='kw'>if</span> <span class='op'>(</span><span class='kw'><a href='https://rdrr.io/r/base/library.html'>require</a></span><span class='op'>(</span><span class='st'><a href='https://dplyr.tidyverse.org'>"dplyr"</a></span><span class='op'>)</span><span class='op'>)</span> <span class='op'>{</span>
-  <span class='co'># Filter on first isolates:</span>
+  <span class='co'># filter on first isolates:</span>
   <span class='va'>example_isolates</span> <span class='op'>%&gt;%</span>
     <span class='fu'><a href='https://dplyr.tidyverse.org/reference/mutate.html'>mutate</a></span><span class='op'>(</span>first_isolate <span class='op'>=</span> <span class='fu'>first_isolate</span><span class='op'>(</span><span class='va'>.</span><span class='op'>)</span><span class='op'>)</span> <span class='op'>%&gt;%</span>
     <span class='fu'><a href='https://dplyr.tidyverse.org/reference/filter.html'>filter</a></span><span class='op'>(</span><span class='va'>first_isolate</span> <span class='op'>==</span> <span class='cn'>TRUE</span><span class='op'>)</span>
  
-  <span class='co'># Short-hand versions:</span>
+  <span class='co'># short-hand versions:</span>
   <span class='va'>example_isolates</span> <span class='op'>%&gt;%</span>
     <span class='fu'>filter_first_isolate</span><span class='op'>(</span><span class='op'>)</span>
     
   <span class='va'>example_isolates</span> <span class='op'>%&gt;%</span>
     <span class='fu'>filter_first_weighted_isolate</span><span class='op'>(</span><span class='op'>)</span>
   
-  <span class='co'># Now let's see if first isolates matter:</span>
+  <span class='co'># now let's see if first isolates matter:</span>
   <span class='va'>A</span> <span class='op'>&lt;-</span> <span class='va'>example_isolates</span> <span class='op'>%&gt;%</span>
     <span class='fu'><a href='https://dplyr.tidyverse.org/reference/group_by.html'>group_by</a></span><span class='op'>(</span><span class='va'>hospital_id</span><span class='op'>)</span> <span class='op'>%&gt;%</span>
     <span class='fu'><a href='https://dplyr.tidyverse.org/reference/summarise.html'>summarise</a></span><span class='op'>(</span>count <span class='op'>=</span> <span class='fu'><a href='count.html'>n_rsi</a></span><span class='op'>(</span><span class='va'>GEN</span><span class='op'>)</span>,            <span class='co'># gentamicin availability</span>
@@ -446,6 +462,42 @@ The <a href='lifecycle.html'>lifecycle</a> of this function is <strong>stable</s
   <span class='co'># Gentamicin resistance in hospital D appears to be 3.7% higher than</span>
   <span class='co'># when you (erroneously) would have used all isolates for analysis.</span>
 <span class='op'>}</span>
+
+<span class='co'># filtering based on any other condition -----------------------------------</span>
+
+<span class='kw'>if</span> <span class='op'>(</span><span class='kw'><a href='https://rdrr.io/r/base/library.html'>require</a></span><span class='op'>(</span><span class='st'><a href='https://dplyr.tidyverse.org'>"dplyr"</a></span><span class='op'>)</span><span class='op'>)</span> <span class='op'>{</span>
+  <span class='co'># is_new_episode() can be used in dplyr verbs to determine patient</span>
+  <span class='co'># episodes based on any (combination of) grouping variables:</span>
+  <span class='va'>example_isolates</span> <span class='op'>%&gt;%</span>
+    <span class='fu'><a href='https://dplyr.tidyverse.org/reference/mutate.html'>mutate</a></span><span class='op'>(</span>condition <span class='op'>=</span> <span class='fu'><a href='https://rdrr.io/r/base/sample.html'>sample</a></span><span class='op'>(</span>x <span class='op'>=</span> <span class='fu'><a href='https://rdrr.io/r/base/c.html'>c</a></span><span class='op'>(</span><span class='st'>"A"</span>, <span class='st'>"B"</span>, <span class='st'>"C"</span><span class='op'>)</span>, 
+                              size <span class='op'>=</span> <span class='fl'>2000</span>,
+                              replace <span class='op'>=</span> <span class='cn'>TRUE</span><span class='op'>)</span><span class='op'>)</span> <span class='op'>%&gt;%</span> 
+    <span class='fu'><a href='https://dplyr.tidyverse.org/reference/group_by.html'>group_by</a></span><span class='op'>(</span><span class='va'>condition</span><span class='op'>)</span> <span class='op'>%&gt;%</span>
+    <span class='fu'><a href='https://dplyr.tidyverse.org/reference/mutate.html'>mutate</a></span><span class='op'>(</span>new_episode <span class='op'>=</span> <span class='fu'>is_new_episode</span><span class='op'>(</span><span class='op'>)</span><span class='op'>)</span>
+  
+  <span class='va'>example_isolates</span> <span class='op'>%&gt;%</span>
+    <span class='fu'><a href='https://dplyr.tidyverse.org/reference/group_by.html'>group_by</a></span><span class='op'>(</span><span class='va'>hospital_id</span><span class='op'>)</span> <span class='op'>%&gt;%</span> 
+    <span class='fu'><a href='https://dplyr.tidyverse.org/reference/summarise.html'>summarise</a></span><span class='op'>(</span>patients <span class='op'>=</span> <span class='fu'><a href='https://dplyr.tidyverse.org/reference/n_distinct.html'>n_distinct</a></span><span class='op'>(</span><span class='va'>patient_id</span><span class='op'>)</span>,
+              n_episodes_365 <span class='op'>=</span> <span class='fu'><a href='https://rdrr.io/r/base/sum.html'>sum</a></span><span class='op'>(</span><span class='fu'>is_new_episode</span><span class='op'>(</span>episode_days <span class='op'>=</span> <span class='fl'>365</span><span class='op'>)</span><span class='op'>)</span>,
+              n_episodes_60  <span class='op'>=</span> <span class='fu'><a href='https://rdrr.io/r/base/sum.html'>sum</a></span><span class='op'>(</span><span class='fu'>is_new_episode</span><span class='op'>(</span>episode_days <span class='op'>=</span> <span class='fl'>60</span><span class='op'>)</span><span class='op'>)</span>,
+              n_episodes_30  <span class='op'>=</span> <span class='fu'><a href='https://rdrr.io/r/base/sum.html'>sum</a></span><span class='op'>(</span><span class='fu'>is_new_episode</span><span class='op'>(</span>episode_days <span class='op'>=</span> <span class='fl'>30</span><span class='op'>)</span><span class='op'>)</span><span class='op'>)</span>
+    
+    
+  <span class='co'># grouping on microorganisms leads to the same results as first_isolate():</span>
+  <span class='va'>x</span> <span class='op'>&lt;-</span> <span class='va'>example_isolates</span> <span class='op'>%&gt;%</span>
+    <span class='fu'>filter_first_isolate</span><span class='op'>(</span>include_unknown <span class='op'>=</span> <span class='cn'>TRUE</span><span class='op'>)</span>
+    
+  <span class='va'>y</span> <span class='op'>&lt;-</span> <span class='va'>example_isolates</span> <span class='op'>%&gt;%</span>
+    <span class='fu'><a href='https://dplyr.tidyverse.org/reference/group_by.html'>group_by</a></span><span class='op'>(</span><span class='va'>mo</span><span class='op'>)</span> <span class='op'>%&gt;%</span>
+    <span class='fu'><a href='https://dplyr.tidyverse.org/reference/filter.html'>filter</a></span><span class='op'>(</span><span class='fu'>is_new_episode</span><span class='op'>(</span><span class='op'>)</span><span class='op'>)</span>
+
+  <span class='fu'><a href='https://rdrr.io/r/base/identical.html'>identical</a></span><span class='op'>(</span><span class='va'>x</span><span class='op'>$</span><span class='va'>patient_id</span>, <span class='va'>y</span><span class='op'>$</span><span class='va'>patient_id</span><span class='op'>)</span>
+  
+  <span class='co'># but now you can group on isolates and many more:</span>
+  <span class='va'>example_isolates</span> <span class='op'>%&gt;%</span>
+    <span class='fu'><a href='https://dplyr.tidyverse.org/reference/group_by.html'>group_by</a></span><span class='op'>(</span><span class='va'>mo</span>, <span class='va'>hospital_id</span>, <span class='va'>ward_icu</span><span class='op'>)</span> <span class='op'>%&gt;%</span>
+    <span class='fu'><a href='https://dplyr.tidyverse.org/reference/mutate.html'>mutate</a></span><span class='op'>(</span>flag_episode <span class='op'>=</span> <span class='fu'>is_new_episode</span><span class='op'>(</span><span class='op'>)</span><span class='op'>)</span>
+<span class='op'>}</span>
 <span class='co'># }</span>
 </pre>
   </div>
diff --git a/docs/reference/index.html b/docs/reference/index.html
index 05da0206c..d6a81d37c 100644
--- a/docs/reference/index.html
+++ b/docs/reference/index.html
@@ -81,7 +81,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9023</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
       </span>
     </div>
 
@@ -478,7 +478,7 @@
       </tr><tr>
         
         <td>
-          <p><code><a href="first_isolate.html">first_isolate()</a></code> <code><a href="first_isolate.html">filter_first_isolate()</a></code> <code><a href="first_isolate.html">filter_first_weighted_isolate()</a></code> </p>
+          <p><code><a href="first_isolate.html">first_isolate()</a></code> <code><a href="first_isolate.html">filter_first_isolate()</a></code> <code><a href="first_isolate.html">filter_first_weighted_isolate()</a></code> <code><a href="first_isolate.html">is_new_episode()</a></code> </p>
         </td>
         <td><p>Determine first (weighted) isolates</p></td>
       </tr><tr>
diff --git a/docs/survey.html b/docs/survey.html
index 2ec337622..2d50307e4 100644
--- a/docs/survey.html
+++ b/docs/survey.html
@@ -81,7 +81,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9023</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
       </span>
     </div>
 
diff --git a/man/first_isolate.Rd b/man/first_isolate.Rd
index 18c0c9a3f..4e0f18f3b 100755
--- a/man/first_isolate.Rd
+++ b/man/first_isolate.Rd
@@ -4,6 +4,7 @@
 \alias{first_isolate}
 \alias{filter_first_isolate}
 \alias{filter_first_weighted_isolate}
+\alias{is_new_episode}
 \title{Determine first (weighted) isolates}
 \source{
 Methodology of this function is strictly based on:
@@ -48,11 +49,18 @@ filter_first_weighted_isolate(
   col_keyantibiotics = NULL,
   ...
 )
+
+is_new_episode(
+  .data,
+  episode_days = 365,
+  col_date = NULL,
+  col_patient_id = NULL
+)
 }
 \arguments{
-\item{x}{a \link{data.frame} containing isolates.}
+\item{x, .data}{a \link{data.frame} containing isolates.}
 
-\item{col_date}{column name of the result date (or date that is was received on the lab), defaults to the first column of with a date class}
+\item{col_date}{column name of the result date (or date that is was received on the lab), defaults to the first column with a date class}
 
 \item{col_patient_id}{column name of the unique IDs of the patients, defaults to the first column that starts with 'patient' or 'patid' (case insensitive)}
 
@@ -90,15 +98,22 @@ filter_first_weighted_isolate(
 A \code{\link{logical}} vector
 }
 \description{
-Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type.
+Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type. To determine patient episodes not necessarily based on microorganisms, use \code{\link[=is_new_episode]{is_new_episode()}} that also supports grouping with the \code{dplyr} package, see \emph{Examples}.
 }
 \details{
-\strong{WHY THIS IS SO IMPORTANT} \cr
-To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https:/pubmed.ncbi.nlm.nih.gov/17304462/}{(ref)}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}.
+The \code{\link[=is_new_episode]{is_new_episode()}} function is a wrapper around the \code{\link[=first_isolate]{first_isolate()}} function and can be used for data sets without isolates to just determine patient episodes based on any combination of grouping variables (using \code{dplyr}), please see \emph{Examples}. Since it runs \code{\link[=first_isolate]{first_isolate()}} for every group, it is quite slow.
 
 All isolates with a microbial ID of \code{NA} will be excluded as first isolate.
+\subsection{Why this is so important}{
 
-The functions \code{\link[=filter_first_isolate]{filter_first_isolate()}} and \code{\link[=filter_first_weighted_isolate]{filter_first_weighted_isolate()}} are helper functions to quickly filter on first isolates. The function \code{\link[=filter_first_isolate]{filter_first_isolate()}} is essentially equal to either:\preformatted{  x[first_isolate(x, ...), ]
+To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https:/pubmed.ncbi.nlm.nih.gov/17304462/}{(ref)}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}.
+}
+
+\subsection{\verb{filter_*()} shortcuts}{
+
+The functions \code{\link[=filter_first_isolate]{filter_first_isolate()}} and \code{\link[=filter_first_weighted_isolate]{filter_first_weighted_isolate()}} are helper functions to quickly filter on first isolates.
+
+The function \code{\link[=filter_first_isolate]{filter_first_isolate()}} is essentially equal to either:\preformatted{  x[first_isolate(x, ...), ]
   x \%>\% filter(first_isolate(x, ...))
 }
 
@@ -110,6 +125,7 @@ The function \code{\link[=filter_first_weighted_isolate]{filter_first_weighted_i
     select(-only_weighted_firsts, -keyab)
 }
 }
+}
 \section{Key antibiotics}{
 
 There are two ways to determine whether isolates can be included as first \emph{weighted} isolates which will give generally the same results:
@@ -143,21 +159,22 @@ On our website \url{https://msberends.github.io/AMR/} you can find \href{https:/
 # basic filtering on first isolates
 example_isolates[first_isolate(example_isolates), ]
 
+# filtering based on isolates ----------------------------------------------
 \donttest{
 if (require("dplyr")) {
-  # Filter on first isolates:
+  # filter on first isolates:
   example_isolates \%>\%
     mutate(first_isolate = first_isolate(.)) \%>\%
     filter(first_isolate == TRUE)
  
-  # Short-hand versions:
+  # short-hand versions:
   example_isolates \%>\%
     filter_first_isolate()
     
   example_isolates \%>\%
     filter_first_weighted_isolate()
   
-  # Now let's see if first isolates matter:
+  # now let's see if first isolates matter:
   A <- example_isolates \%>\%
     group_by(hospital_id) \%>\%
     summarise(count = n_rsi(GEN),            # gentamicin availability
@@ -174,6 +191,42 @@ if (require("dplyr")) {
   # Gentamicin resistance in hospital D appears to be 3.7\% higher than
   # when you (erroneously) would have used all isolates for analysis.
 }
+
+# filtering based on any other condition -----------------------------------
+
+if (require("dplyr")) {
+  # is_new_episode() can be used in dplyr verbs to determine patient
+  # episodes based on any (combination of) grouping variables:
+  example_isolates \%>\%
+    mutate(condition = sample(x = c("A", "B", "C"), 
+                              size = 2000,
+                              replace = TRUE)) \%>\% 
+    group_by(condition) \%>\%
+    mutate(new_episode = is_new_episode())
+  
+  example_isolates \%>\%
+    group_by(hospital_id) \%>\% 
+    summarise(patients = n_distinct(patient_id),
+              n_episodes_365 = sum(is_new_episode(episode_days = 365)),
+              n_episodes_60  = sum(is_new_episode(episode_days = 60)),
+              n_episodes_30  = sum(is_new_episode(episode_days = 30)))
+    
+    
+  # grouping on microorganisms leads to the same results as first_isolate():
+  x <- example_isolates \%>\%
+    filter_first_isolate(include_unknown = TRUE)
+    
+  y <- example_isolates \%>\%
+    group_by(mo) \%>\%
+    filter(is_new_episode())
+
+  identical(x$patient_id, y$patient_id)
+  
+  # but now you can group on isolates and many more:
+  example_isolates \%>\%
+    group_by(mo, hospital_id, ward_icu) \%>\%
+    mutate(flag_episode = is_new_episode())
+}
 }
 }
 \seealso{
diff --git a/tests/testthat/test-_misc.R b/tests/testthat/test-_misc.R
index 9d1ef03d5..b93097ce5 100755
--- a/tests/testthat/test-_misc.R
+++ b/tests/testthat/test-_misc.R
@@ -54,40 +54,3 @@ test_that("looking up ab columns works", {
   expect_warning(get_column_abx(dplyr::rename(example_isolates, thisone = AMX), amox = "thisone", tmp = "thisone", verbose = TRUE))
   expect_warning(get_column_abx(dplyr::rename(example_isolates, thisone = AMX), amox = "thisone", tmp = "thisone", verbose = FALSE))
 })
-
-test_that("imports work", {
-  skip_on_cran()
-  
-  import_functions <- c(
-    "anti_join" = "dplyr",
-    "cur_column" = "dplyr",
-    "freq.default" = "cleaner",
-    "full_join" = "dplyr",
-    "has_internet" = "curl",
-    "html_attr" = "rvest",
-    "html_children" = "rvest",
-    "html_node" = "rvest",
-    "html_nodes" = "rvest",
-    "html_table" = "rvest",
-    "html_text" = "rvest",
-    "inline_hist" = "skimr",
-    "inner_join" = "dplyr",
-    "insertText" = "rstudioapi",
-    "left_join" = "dplyr",
-    "new_pillar_shaft_simple" = "pillar",
-    "peek_mask" = "dplyr",
-    "peek_vars" = "tidyselect",
-    "read_excel" = "readxl",
-    "read_html" = "xml2",
-    "right_join" = "dplyr",
-    "semi_join" = "dplyr",
-    "sfl" = "skimr",
-    "showQuestion" = "rstudioapi")
-  
-  for (i in seq_len(length(import_functions))) {
-    fn <- names(import_functions)[i]
-    pkg <- unname(import_functions[i])
-    expect(!is.null(import_fn(name = fn, pkg = pkg, error_on_fail = FALSE)),
-           failure_message = paste0("Function ", pkg, "::", fn, "() does not exist"))
-  }
-})
diff --git a/tests/testthat/test-first_isolate.R b/tests/testthat/test-first_isolate.R
index 27f9009a6..3245f93ce 100755
--- a/tests/testthat/test-first_isolate.R
+++ b/tests/testthat/test-first_isolate.R
@@ -200,4 +200,15 @@ test_that("first isolates work", {
   expect_identical(filter_first_weighted_isolate(example_isolates),
                    subset(example_isolates, first_isolate(ex)))
   
+  # notice that all mo's are distinct, so all are TRUE
+  expect_true(all(example_isolates %pm>%
+                    pm_distinct(mo, .keep_all = TRUE) %pm>%
+                    first_isolate() == TRUE))
+  
+  library(dplyr)
+  # is_new_episode
+  old <- example_isolates %>% mutate(out = first_isolate(., include_unknown = TRUE))
+  new <- example_isolates %>% group_by(mo) %>% mutate(out = is_new_episode())
+  expect_identical(which(old$out), which(new$out))
+
 })
diff --git a/tests/testthat/test-zzz.R b/tests/testthat/test-zzz.R
new file mode 100644
index 000000000..2ea56e8f4
--- /dev/null
+++ b/tests/testthat/test-zzz.R
@@ -0,0 +1,69 @@
+# ==================================================================== #
+# TITLE                                                                #
+# Antimicrobial Resistance (AMR) Analysis for R                        #
+#                                                                      #
+# SOURCE                                                               #
+# https://github.com/msberends/AMR                                     #
+#                                                                      #
+# LICENCE                                                              #
+# (c) 2018-2020 Berends MS, Luz CF et al.                              #
+# Developed at the University of Groningen, the Netherlands, in        #
+# collaboration with non-profit organisations Certe Medical            #
+# Diagnostics & Advice, and University Medical Center Groningen.       # 
+#                                                                      #
+# This R package is free software; you can freely use and distribute   #
+# it for both personal and commercial purposes under the terms of the  #
+# GNU General Public License version 2.0 (GNU GPL-2), as published by  #
+# the Free Software Foundation.                                        #
+# We created this package for both routine data analysis and academic  #
+# research and it was publicly released in the hope that it will be    #
+# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY.              #
+#                                                                      #
+# Visit our website for the full manual and a complete tutorial about  #
+# how to conduct AMR analysis: https://msberends.github.io/AMR/        #
+# ==================================================================== #
+
+context("zzz.R")
+
+test_that("imports work", {
+  skip_on_cran()
+  
+  import_functions <- c(
+    "anti_join" = "dplyr",
+    "cur_column" = "dplyr",
+    "cur_data" = "dplyr",
+    "document_position" = "rstudioapi",
+    "document_range" = "rstudioapi",
+    "freq.default" = "cleaner",
+    "full_join" = "dplyr",
+    "getSourceEditorContext" = "rstudioapi",
+    "has_internet" = "curl",
+    "html_attr" = "rvest",
+    "html_children" = "rvest",
+    "html_node" = "rvest",
+    "html_nodes" = "rvest",
+    "html_table" = "rvest",
+    "html_text" = "rvest",
+    "inline_hist" = "skimr",
+    "inner_join" = "dplyr",
+    "insertText" = "rstudioapi",
+    "insertText" = "rstudioapi",
+    "insertText" = "rstudioapi",
+    "left_join" = "dplyr",
+    "new_pillar_shaft_simple" = "pillar",
+    "peek_mask" = "dplyr",
+    "peek_vars" = "tidyselect",
+    "read_excel" = "readxl",
+    "read_html" = "xml2",
+    "right_join" = "dplyr",
+    "semi_join" = "dplyr",
+    "sfl" = "skimr",
+    "showQuestion" = "rstudioapi")
+  
+  for (i in seq_len(length(import_functions))) {
+    fn <- names(import_functions)[i]
+    pkg <- unname(import_functions[i])
+    expect(!is.null(import_fn(name = fn, pkg = pkg, error_on_fail = FALSE)),
+           failure_message = paste0("Function ", pkg, "::", fn, "() does not exist"))
+  }
+})