(v1.4.0.9024) is_new_episode()

This commit is contained in:
dr. M.S. (Matthijs) Berends 2020-11-17 16:57:41 +01:00
parent 0800d33228
commit 363218da7e
20 changed files with 379 additions and 94 deletions

View File

@ -64,6 +64,7 @@ jobs:
- {os: ubuntu-16.04, r: '3.5', allowfail: false, rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
- {os: ubuntu-16.04, r: '3.4', allowfail: true, rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
- {os: ubuntu-16.04, r: '3.3', allowfail: true, rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
- {os: ubuntu-16.04, r: '3.2', allowfail: true, rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
# older R versions cannot be tested, since tidyverse only supports last 4 R x.x versions
env:
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
@ -120,14 +121,15 @@ jobs:
shell: Rscript {0}
- name: Check on older R versions
if: matrix.config.r == '3.3'
# no vignettes here, since they rely on R 3.3 and higher
if: matrix.config.r == '3.2'
env:
_R_CHECK_CRAN_INCOMING_: false
run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran", "--no-build-vignettes" , "--ignore-vignettes"), error_on = "warning", check_dir = "check")
run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran", "--ignore-vignettes"), build_args = "--no-build-vignettes" , error_on = "warning", check_dir = "check")
shell: Rscript {0}
- name: Check on newer R versions
if: matrix.config.r != '3.3'
if: matrix.config.r != '3.2'
env:
_R_CHECK_CRAN_INCOMING_: false
run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")

View File

@ -1,5 +1,5 @@
Package: AMR
Version: 1.4.0.9023
Version: 1.4.0.9024
Date: 2020-11-17
Title: Antimicrobial Resistance Analysis
Authors@R: c(

View File

@ -152,6 +152,7 @@ export(is.mic)
export(is.mo)
export(is.rsi)
export(is.rsi.eligible)
export(is_new_episode)
export(key_antibiotics)
export(key_antibiotics_equal)
export(kurtosis)

10
NEWS.md
View File

@ -1,7 +1,15 @@
# AMR 1.4.0.9023
# AMR 1.4.0.9024
## <small>Last updated: 17 November 2020</small>
### New
* Function `is_new_episode()` to determine patient episodes which are not necessarily based on microorganisms. It also supports grouped variables with e.g. `mutate()` and `summarise()` of the `dplyr` package:
```r
example_isolates %>%
group_by(hospital_id) %>%
summarise(patients = n_distinct(patient_id),
n_episodes_365 = sum(is_new_episode(episode_days = 365)),
n_episodes_60 = sum(is_new_episode(episode_days = 60)))
```
* Functions `mo_is_gram_negative()` and `mo_is_gram_positive()` as wrappers around `mo_gramstain()`. They always return `TRUE` or `FALSE` (except when the input is `NA` or the MO code is `UNKNOWN`), thus always return `FALSE` for species outside the taxonomic kingdom of Bacteria. If you have the `dplyr` package installed, they can even determine the column with microorganisms themselves when used inside `dplyr` verbs:
```r
example_isolates %>%

View File

@ -139,9 +139,13 @@ check_dataset_integrity <- function() {
}
search_type_in_df <- function(x, type, info = TRUE) {
meet_criteria(x, allow_class = "data.frame")
meet_criteria(type, allow_class = "character", has_length = 1)
# try to find columns based on type
found <- NULL
# remove attributes from other packages
x <- as.data.frame(x, stringsAsFactors = FALSE)
colnames(x) <- trimws(colnames(x))

View File

@ -25,10 +25,10 @@
#' Determine first (weighted) isolates
#'
#' Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type.
#' Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type. To determine patient episodes not necessarily based on microorganisms, use [is_new_episode()] that also supports grouping with the `dplyr` package, see *Examples*.
#' @inheritSection lifecycle Stable lifecycle
#' @param x a [data.frame] containing isolates.
#' @param col_date column name of the result date (or date that is was received on the lab), defaults to the first column of with a date class
#' @param x,.data a [data.frame] containing isolates.
#' @param col_date column name of the result date (or date that is was received on the lab), defaults to the first column with a date class
#' @param col_patient_id column name of the unique IDs of the patients, defaults to the first column that starts with 'patient' or 'patid' (case insensitive)
#' @param col_mo column name of the IDs of the microorganisms (see [as.mo()]), defaults to the first column of class [`mo`]. Values will be coerced using [as.mo()].
#' @param col_testcode column name of the test codes. Use `col_testcode = NULL` to **not** exclude certain test codes (like test codes for screening). In that case `testcodes_exclude` will be ignored.
@ -45,17 +45,26 @@
#' @param info print progress
#' @param include_unknown logical to determine whether 'unknown' microorganisms should be included too, i.e. microbial code `"UNKNOWN"`, which defaults to `FALSE`. For WHONET users, this means that all records with organism code `"con"` (*contamination*) will be excluded at default. Isolates with a microbial ID of `NA` will always be excluded as first isolate.
#' @param ... parameters passed on to the [first_isolate()] function
#' @details **WHY THIS IS SO IMPORTANT** \cr
#' @details The [is_new_episode()] function is a wrapper around the [first_isolate()] function and can be used for data sets without isolates to just determine patient episodes based on any combination of grouping variables (using `dplyr`), please see *Examples*. Since it runs [first_isolate()] for every group, it is quite slow.
#'
#' All isolates with a microbial ID of `NA` will be excluded as first isolate.
#'
#' ### Why this is so important
#' To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode [(ref)](https:/pubmed.ncbi.nlm.nih.gov/17304462/). If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all *S. aureus* isolates would be overestimated, because you included this MRSA more than once. It would be [selection bias](https://en.wikipedia.org/wiki/Selection_bias).
#'
#' All isolates with a microbial ID of `NA` will be excluded as first isolate.
#' ### `filter_*()` shortcuts
#'
#' The functions [filter_first_isolate()] and [filter_first_weighted_isolate()] are helper functions to quickly filter on first isolates. The function [filter_first_isolate()] is essentially equal to either:
#' The functions [filter_first_isolate()] and [filter_first_weighted_isolate()] are helper functions to quickly filter on first isolates.
#'
#' The function [filter_first_isolate()] is essentially equal to either:
#'
#' ```
#' x[first_isolate(x, ...), ]
#' x %>% filter(first_isolate(x, ...))
#' ```
#'
#' The function [filter_first_weighted_isolate()] is essentially equal to:
#'
#' ```
#' x %>%
#' mutate(keyab = key_antibiotics(.)) %>%
@ -89,21 +98,22 @@
#' # basic filtering on first isolates
#' example_isolates[first_isolate(example_isolates), ]
#'
#' # filtering based on isolates ----------------------------------------------
#' \donttest{
#' if (require("dplyr")) {
#' # Filter on first isolates:
#' # filter on first isolates:
#' example_isolates %>%
#' mutate(first_isolate = first_isolate(.)) %>%
#' filter(first_isolate == TRUE)
#'
#' # Short-hand versions:
#' # short-hand versions:
#' example_isolates %>%
#' filter_first_isolate()
#'
#' example_isolates %>%
#' filter_first_weighted_isolate()
#'
#' # Now let's see if first isolates matter:
#' # now let's see if first isolates matter:
#' A <- example_isolates %>%
#' group_by(hospital_id) %>%
#' summarise(count = n_rsi(GEN), # gentamicin availability
@ -120,6 +130,42 @@
#' # Gentamicin resistance in hospital D appears to be 3.7% higher than
#' # when you (erroneously) would have used all isolates for analysis.
#' }
#'
#' # filtering based on any other condition -----------------------------------
#'
#' if (require("dplyr")) {
#' # is_new_episode() can be used in dplyr verbs to determine patient
#' # episodes based on any (combination of) grouping variables:
#' example_isolates %>%
#' mutate(condition = sample(x = c("A", "B", "C"),
#' size = 2000,
#' replace = TRUE)) %>%
#' group_by(condition) %>%
#' mutate(new_episode = is_new_episode())
#'
#' example_isolates %>%
#' group_by(hospital_id) %>%
#' summarise(patients = n_distinct(patient_id),
#' n_episodes_365 = sum(is_new_episode(episode_days = 365)),
#' n_episodes_60 = sum(is_new_episode(episode_days = 60)),
#' n_episodes_30 = sum(is_new_episode(episode_days = 30)))
#'
#'
#' # grouping on microorganisms leads to the same results as first_isolate():
#' x <- example_isolates %>%
#' filter_first_isolate(include_unknown = TRUE)
#'
#' y <- example_isolates %>%
#' group_by(mo) %>%
#' filter(is_new_episode())
#'
#' identical(x$patient_id, y$patient_id)
#'
#' # but now you can group on isolates and many more:
#' example_isolates %>%
#' group_by(mo, hospital_id, ward_icu) %>%
#' mutate(flag_episode = is_new_episode())
#' }
#' }
first_isolate <- function(x,
col_date = NULL,
@ -139,7 +185,7 @@ first_isolate <- function(x,
info = interactive(),
include_unknown = FALSE,
...) {
meet_criteria(x, allow_class = "data.frame")
meet_criteria(x, allow_class = "data.frame") # also checks dimensions to be >0
meet_criteria(col_date, allow_class = "character", has_length = 1, allow_NULL = TRUE, is_in = colnames(x))
meet_criteria(col_patient_id, allow_class = "character", has_length = 1, allow_NULL = TRUE, is_in = colnames(x))
meet_criteria(col_mo, allow_class = "character", has_length = 1, allow_NULL = TRUE, is_in = colnames(x))
@ -175,13 +221,10 @@ first_isolate <- function(x,
}
}
stop_ifnot(is.data.frame(x), "`x` must be a data.frame")
stop_if(any(dim(x) == 0), "`x` must contain rows and columns")
# remove data.table, grouping from tibbles, etc.
x <- as.data.frame(x, stringsAsFactors = FALSE)
# try to find columns based on type
# try to find columns based on type
# -- mo
if (is.null(col_mo)) {
col_mo <- search_type_in_df(x = x, type = "mo")
@ -299,13 +342,32 @@ first_isolate <- function(x,
)
}
# no isolates found
# speed up - return immediately if obvious
if (abs(row.start) == Inf | abs(row.end) == Inf) {
if (info == TRUE) {
message_("=> Found ", font_bold("no isolates"), as_note = FALSE)
message_("=> Found ", font_bold("no isolates"),
add_fn = font_black,
as_note = FALSE)
}
return(rep(FALSE, nrow(x)))
}
if (row.start == row.end) {
if (info == TRUE) {
message_("=> Found ", font_bold("1 isolate"), ", as the data only contained 1 row",
add_fn = font_black,
as_note = FALSE)
}
return(TRUE)
}
if (length(c(row.start:row.end)) == pm_n_distinct(x[c(row.start:row.end), col_mo, drop = TRUE])) {
if (info == TRUE) {
message_("=> Found ", font_bold(paste(length(c(row.start:row.end)), "isolates")),
", as all isolates were different microorganisms",
add_fn = font_black,
as_note = FALSE)
}
return(rep(TRUE, length(c(row.start:row.end))))
}
# did find some isolates - add new index numbers of rows
x$newvar_row_index_sorted <- seq_len(nrow(x))
@ -511,7 +573,66 @@ filter_first_weighted_isolate <- function(x,
subset(x, first_isolate(x = y,
col_date = col_date,
col_patient_id = col_patient_id,
col_mo = col_mo,
col_keyantibiotics = col_keyantibiotics,
...))
}
#' @rdname first_isolate
#' @export
is_new_episode <- function(.data,
episode_days = 365,
col_date = NULL,
col_patient_id = NULL) {
if (missing(.data)) {
# look it up - this also supports grouping variables
cur_data <- import_fn("cur_data", "dplyr", error_on_fail = FALSE)
if (is.null(cur_data)) {
stop_("parameter '.data' not set.")
}
.data <- cur_data()
}
meet_criteria(.data, allow_class = "data.frame") # also checks dimensions to be >0
meet_criteria(col_date, allow_class = "character", has_length = 1, allow_NULL = TRUE, is_in = colnames(x))
meet_criteria(col_patient_id, allow_class = "character", has_length = 1, allow_NULL = TRUE, is_in = colnames(x))
meet_criteria(episode_days, allow_class = c("numeric", "integer"), has_length = 1)
# get i'th ID of group, so notices will only be thrown once
cur_group_id <- import_fn("cur_group_id", "dplyr", error_on_fail = FALSE)
first_group <- tryCatch(is.null(cur_group_id) || cur_group_id() == 1,
error = function(e) TRUE)
# try to find columns based on type
# -- date
if (is.null(col_date)) {
col_date <- search_type_in_df(x = .data,
type = "date",
info = first_group)
stop_if(is.null(col_date), "`col_date` must be set")
}
# -- patient id
if (is.null(col_patient_id)) {
if (all(c("First name", "Last name", "Sex") %in% colnames(.data))) {
# WHONET support
.data$patient_id <- paste(.data$`First name`, .data$`Last name`, .data$Sex)
col_patient_id <- "patient_id"
if (is.null(cur_group_id) || cur_group_id() == 1) {
message_("Using combined columns `", font_bold("First name"), "`, `", font_bold("Last name"), "` and `", font_bold("Sex"), "` as input for `col_patient_id`")
}
} else {
col_patient_id <- search_type_in_df(x = .data,
type = "patient_id",
info = first_group)
}
stop_if(is.null(col_patient_id), "`col_patient_id` must be set")
}
# create any random mo, so first isolates can be calculated
.data$a94a8fe5 <- as.mo("Escherichia coli")
first_isolate(.data,
col_date = col_date,
col_patient_id = col_patient_id,
episode_days = episode_days,
col_mo = "a94a8fe5",
info = FALSE)
}

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="https://msberends.github.io/AMR//index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9023</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
</span>
</div>

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9023</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
</span>
</div>

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9023</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
</span>
</div>

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9023</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
</span>
</div>

View File

@ -43,7 +43,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9023</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
</span>
</div>

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9023</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
</span>
</div>
@ -236,9 +236,9 @@
<small>Source: <a href='https://github.com/msberends/AMR/blob/master/NEWS.md'><code>NEWS.md</code></a></small>
</div>
<div id="amr-1409023" class="section level1">
<h1 class="page-header" data-toc-text="1.4.0.9023">
<a href="#amr-1409023" class="anchor"></a>AMR 1.4.0.9023<small> Unreleased </small>
<div id="amr-1409024" class="section level1">
<h1 class="page-header" data-toc-text="1.4.0.9024">
<a href="#amr-1409024" class="anchor"></a>AMR 1.4.0.9024<small> Unreleased </small>
</h1>
<div id="last-updated-17-november-2020" class="section level2">
<h2 class="hasAnchor">
@ -248,6 +248,7 @@
<h3 class="hasAnchor">
<a href="#new" class="anchor"></a>New</h3>
<ul>
<li><p>Function <code><a href="../reference/first_isolate.html">is_new_episode()</a></code> to determine patient episodes which are not necessarily based on microorganisms. It also supports grouped variables with e.g. <code><a href="https://dplyr.tidyverse.org/reference/mutate.html">mutate()</a></code> and <code><a href="https://dplyr.tidyverse.org/reference/summarise.html">summarise()</a></code> of the <code>dplyr</code> package: <code>r example_isolates %&gt;% group_by(hospital_id) %&gt;% summarise(patients = n_distinct(patient_id), n_episodes_365 = sum(is_new_episode(episode_days = 365)), n_episodes_60 = sum(is_new_episode(episode_days = 60)))</code></p></li>
<li>
<p>Functions <code><a href="../reference/mo_property.html">mo_is_gram_negative()</a></code> and <code><a href="../reference/mo_property.html">mo_is_gram_positive()</a></code> as wrappers around <code><a href="../reference/mo_property.html">mo_gramstain()</a></code>. They always return <code>TRUE</code> or <code>FALSE</code> (except when the input is <code>NA</code> or the MO code is <code>UNKNOWN</code>), thus always return <code>FALSE</code> for species outside the taxonomic kingdom of Bacteria. If you have the <code>dplyr</code> package installed, they can even determine the column with microorganisms themselves when used inside <code>dplyr</code> verbs:</p>
<div class="sourceCode" id="cb1"><pre class="downlit">

View File

@ -12,7 +12,7 @@ articles:
datasets: datasets.html
resistance_predict: resistance_predict.html
welcome_to_AMR: welcome_to_AMR.html
last_built: 2020-11-17T10:53Z
last_built: 2020-11-17T15:56Z
urls:
reference: https://msberends.github.io/AMR//reference
article: https://msberends.github.io/AMR//articles

View File

@ -49,7 +49,7 @@
<script src="../extra.js"></script>
<meta property="og:title" content="Determine first (weighted) isolates — first_isolate" />
<meta property="og:description" content="Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type." />
<meta property="og:description" content="Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type. To determine patient episodes not necessarily based on microorganisms, use is_new_episode() that also supports grouping with the dplyr package, see Examples." />
<meta property="og:image" content="https://msberends.github.io/AMR/logo.png" />
@ -82,7 +82,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9000</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
</span>
</div>
@ -239,7 +239,7 @@
</div>
<div class="ref-description">
<p>Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type.</p>
<p>Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type. To determine patient episodes not necessarily based on microorganisms, use <code>is_new_episode()</code> that also supports grouping with the <code>dplyr</code> package, see <em>Examples</em>.</p>
</div>
<pre class="usage"><span class='fu'>first_isolate</span><span class='op'>(</span>
@ -278,18 +278,25 @@
col_mo <span class='op'>=</span> <span class='cn'>NULL</span>,
col_keyantibiotics <span class='op'>=</span> <span class='cn'>NULL</span>,
<span class='va'>...</span>
<span class='op'>)</span>
<span class='fu'>is_new_episode</span><span class='op'>(</span>
<span class='va'>.data</span>,
episode_days <span class='op'>=</span> <span class='fl'>365</span>,
col_date <span class='op'>=</span> <span class='cn'>NULL</span>,
col_patient_id <span class='op'>=</span> <span class='cn'>NULL</span>
<span class='op'>)</span></pre>
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
<table class="ref-arguments">
<colgroup><col class="name" /><col class="desc" /></colgroup>
<tr>
<th>x</th>
<th>x, .data</th>
<td><p>a <a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a> containing isolates.</p></td>
</tr>
<tr>
<th>col_date</th>
<td><p>column name of the result date (or date that is was received on the lab), defaults to the first column of with a date class</p></td>
<td><p>column name of the result date (or date that is was received on the lab), defaults to the first column with a date class</p></td>
</tr>
<tr>
<th>col_patient_id</th>
@ -366,10 +373,17 @@
<p>A <code><a href='https://rdrr.io/r/base/logical.html'>logical</a></code> vector</p>
<h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>
<p><strong>WHY THIS IS SO IMPORTANT</strong> <br />
To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode <a href='https:/pubmed.ncbi.nlm.nih.gov/17304462/'>(ref)</a>. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all <em>S. aureus</em> isolates would be overestimated, because you included this MRSA more than once. It would be <a href='https://en.wikipedia.org/wiki/Selection_bias'>selection bias</a>.</p>
<p>All isolates with a microbial ID of <code>NA</code> will be excluded as first isolate.</p>
<p>The functions <code>filter_first_isolate()</code> and <code>filter_first_weighted_isolate()</code> are helper functions to quickly filter on first isolates. The function <code>filter_first_isolate()</code> is essentially equal to either:</p><pre> <span class='va'>x</span><span class='op'>[</span><span class='fu'>first_isolate</span><span class='op'>(</span><span class='va'>x</span>, <span class='va'>...</span><span class='op'>)</span>, <span class='op'>]</span>
<p>The <code>is_new_episode()</code> function is a wrapper around the <code>first_isolate()</code> function and can be used for data sets without isolates to just determine patient episodes based on any combination of grouping variables (using <code>dplyr</code>), please see <em>Examples</em>. Since it runs <code>first_isolate()</code> for every group, it is quite slow.</p>
<p>All isolates with a microbial ID of <code>NA</code> will be excluded as first isolate.</p><h3 class='hasAnchor' id='arguments'><a class='anchor' href='#arguments'></a>Why this is so important</h3>
<p>To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode <a href='https:/pubmed.ncbi.nlm.nih.gov/17304462/'>(ref)</a>. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all <em>S. aureus</em> isolates would be overestimated, because you included this MRSA more than once. It would be <a href='https://en.wikipedia.org/wiki/Selection_bias'>selection bias</a>.</p>
<h3 class='hasAnchor' id='arguments'><a class='anchor' href='#arguments'></a><code>filter_*()</code> shortcuts</h3>
<p>The functions <code>filter_first_isolate()</code> and <code>filter_first_weighted_isolate()</code> are helper functions to quickly filter on first isolates.</p>
<p>The function <code>filter_first_isolate()</code> is essentially equal to either:</p><pre> <span class='va'>x</span><span class='op'>[</span><span class='fu'>first_isolate</span><span class='op'>(</span><span class='va'>x</span>, <span class='va'>...</span><span class='op'>)</span>, <span class='op'>]</span>
<span class='va'>x</span> <span class='op'>%&gt;%</span> <span class='fu'><a href='https://dplyr.tidyverse.org/reference/filter.html'>filter</a></span><span class='op'>(</span><span class='fu'>first_isolate</span><span class='op'>(</span><span class='va'>x</span>, <span class='va'>...</span><span class='op'>)</span><span class='op'>)</span>
</pre>
@ -381,6 +395,7 @@ To conduct an analysis of antimicrobial resistance, you should only include the
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/select.html'>select</a></span><span class='op'>(</span><span class='op'>-</span><span class='va'>only_weighted_firsts</span>, <span class='op'>-</span><span class='va'>keyab</span><span class='op'>)</span>
</pre>
<h2 class="hasAnchor" id="key-antibiotics"><a class="anchor" href="#key-antibiotics"></a>Key antibiotics</h2>
@ -415,21 +430,22 @@ The <a href='lifecycle.html'>lifecycle</a> of this function is <strong>stable</s
<span class='co'># basic filtering on first isolates</span>
<span class='va'>example_isolates</span><span class='op'>[</span><span class='fu'>first_isolate</span><span class='op'>(</span><span class='va'>example_isolates</span><span class='op'>)</span>, <span class='op'>]</span>
<span class='co'># filtering based on isolates ----------------------------------------------</span>
<span class='co'># \donttest{</span>
<span class='kw'>if</span> <span class='op'>(</span><span class='kw'><a href='https://rdrr.io/r/base/library.html'>require</a></span><span class='op'>(</span><span class='st'><a href='https://dplyr.tidyverse.org'>"dplyr"</a></span><span class='op'>)</span><span class='op'>)</span> <span class='op'>{</span>
<span class='co'># Filter on first isolates:</span>
<span class='co'># filter on first isolates:</span>
<span class='va'>example_isolates</span> <span class='op'>%&gt;%</span>
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/mutate.html'>mutate</a></span><span class='op'>(</span>first_isolate <span class='op'>=</span> <span class='fu'>first_isolate</span><span class='op'>(</span><span class='va'>.</span><span class='op'>)</span><span class='op'>)</span> <span class='op'>%&gt;%</span>
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/filter.html'>filter</a></span><span class='op'>(</span><span class='va'>first_isolate</span> <span class='op'>==</span> <span class='cn'>TRUE</span><span class='op'>)</span>
<span class='co'># Short-hand versions:</span>
<span class='co'># short-hand versions:</span>
<span class='va'>example_isolates</span> <span class='op'>%&gt;%</span>
<span class='fu'>filter_first_isolate</span><span class='op'>(</span><span class='op'>)</span>
<span class='va'>example_isolates</span> <span class='op'>%&gt;%</span>
<span class='fu'>filter_first_weighted_isolate</span><span class='op'>(</span><span class='op'>)</span>
<span class='co'># Now let's see if first isolates matter:</span>
<span class='co'># now let's see if first isolates matter:</span>
<span class='va'>A</span> <span class='op'>&lt;-</span> <span class='va'>example_isolates</span> <span class='op'>%&gt;%</span>
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/group_by.html'>group_by</a></span><span class='op'>(</span><span class='va'>hospital_id</span><span class='op'>)</span> <span class='op'>%&gt;%</span>
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/summarise.html'>summarise</a></span><span class='op'>(</span>count <span class='op'>=</span> <span class='fu'><a href='count.html'>n_rsi</a></span><span class='op'>(</span><span class='va'>GEN</span><span class='op'>)</span>, <span class='co'># gentamicin availability</span>
@ -446,6 +462,42 @@ The <a href='lifecycle.html'>lifecycle</a> of this function is <strong>stable</s
<span class='co'># Gentamicin resistance in hospital D appears to be 3.7% higher than</span>
<span class='co'># when you (erroneously) would have used all isolates for analysis.</span>
<span class='op'>}</span>
<span class='co'># filtering based on any other condition -----------------------------------</span>
<span class='kw'>if</span> <span class='op'>(</span><span class='kw'><a href='https://rdrr.io/r/base/library.html'>require</a></span><span class='op'>(</span><span class='st'><a href='https://dplyr.tidyverse.org'>"dplyr"</a></span><span class='op'>)</span><span class='op'>)</span> <span class='op'>{</span>
<span class='co'># is_new_episode() can be used in dplyr verbs to determine patient</span>
<span class='co'># episodes based on any (combination of) grouping variables:</span>
<span class='va'>example_isolates</span> <span class='op'>%&gt;%</span>
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/mutate.html'>mutate</a></span><span class='op'>(</span>condition <span class='op'>=</span> <span class='fu'><a href='https://rdrr.io/r/base/sample.html'>sample</a></span><span class='op'>(</span>x <span class='op'>=</span> <span class='fu'><a href='https://rdrr.io/r/base/c.html'>c</a></span><span class='op'>(</span><span class='st'>"A"</span>, <span class='st'>"B"</span>, <span class='st'>"C"</span><span class='op'>)</span>,
size <span class='op'>=</span> <span class='fl'>2000</span>,
replace <span class='op'>=</span> <span class='cn'>TRUE</span><span class='op'>)</span><span class='op'>)</span> <span class='op'>%&gt;%</span>
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/group_by.html'>group_by</a></span><span class='op'>(</span><span class='va'>condition</span><span class='op'>)</span> <span class='op'>%&gt;%</span>
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/mutate.html'>mutate</a></span><span class='op'>(</span>new_episode <span class='op'>=</span> <span class='fu'>is_new_episode</span><span class='op'>(</span><span class='op'>)</span><span class='op'>)</span>
<span class='va'>example_isolates</span> <span class='op'>%&gt;%</span>
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/group_by.html'>group_by</a></span><span class='op'>(</span><span class='va'>hospital_id</span><span class='op'>)</span> <span class='op'>%&gt;%</span>
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/summarise.html'>summarise</a></span><span class='op'>(</span>patients <span class='op'>=</span> <span class='fu'><a href='https://dplyr.tidyverse.org/reference/n_distinct.html'>n_distinct</a></span><span class='op'>(</span><span class='va'>patient_id</span><span class='op'>)</span>,
n_episodes_365 <span class='op'>=</span> <span class='fu'><a href='https://rdrr.io/r/base/sum.html'>sum</a></span><span class='op'>(</span><span class='fu'>is_new_episode</span><span class='op'>(</span>episode_days <span class='op'>=</span> <span class='fl'>365</span><span class='op'>)</span><span class='op'>)</span>,
n_episodes_60 <span class='op'>=</span> <span class='fu'><a href='https://rdrr.io/r/base/sum.html'>sum</a></span><span class='op'>(</span><span class='fu'>is_new_episode</span><span class='op'>(</span>episode_days <span class='op'>=</span> <span class='fl'>60</span><span class='op'>)</span><span class='op'>)</span>,
n_episodes_30 <span class='op'>=</span> <span class='fu'><a href='https://rdrr.io/r/base/sum.html'>sum</a></span><span class='op'>(</span><span class='fu'>is_new_episode</span><span class='op'>(</span>episode_days <span class='op'>=</span> <span class='fl'>30</span><span class='op'>)</span><span class='op'>)</span><span class='op'>)</span>
<span class='co'># grouping on microorganisms leads to the same results as first_isolate():</span>
<span class='va'>x</span> <span class='op'>&lt;-</span> <span class='va'>example_isolates</span> <span class='op'>%&gt;%</span>
<span class='fu'>filter_first_isolate</span><span class='op'>(</span>include_unknown <span class='op'>=</span> <span class='cn'>TRUE</span><span class='op'>)</span>
<span class='va'>y</span> <span class='op'>&lt;-</span> <span class='va'>example_isolates</span> <span class='op'>%&gt;%</span>
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/group_by.html'>group_by</a></span><span class='op'>(</span><span class='va'>mo</span><span class='op'>)</span> <span class='op'>%&gt;%</span>
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/filter.html'>filter</a></span><span class='op'>(</span><span class='fu'>is_new_episode</span><span class='op'>(</span><span class='op'>)</span><span class='op'>)</span>
<span class='fu'><a href='https://rdrr.io/r/base/identical.html'>identical</a></span><span class='op'>(</span><span class='va'>x</span><span class='op'>$</span><span class='va'>patient_id</span>, <span class='va'>y</span><span class='op'>$</span><span class='va'>patient_id</span><span class='op'>)</span>
<span class='co'># but now you can group on isolates and many more:</span>
<span class='va'>example_isolates</span> <span class='op'>%&gt;%</span>
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/group_by.html'>group_by</a></span><span class='op'>(</span><span class='va'>mo</span>, <span class='va'>hospital_id</span>, <span class='va'>ward_icu</span><span class='op'>)</span> <span class='op'>%&gt;%</span>
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/mutate.html'>mutate</a></span><span class='op'>(</span>flag_episode <span class='op'>=</span> <span class='fu'>is_new_episode</span><span class='op'>(</span><span class='op'>)</span><span class='op'>)</span>
<span class='op'>}</span>
<span class='co'># }</span>
</pre>
</div>

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9023</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
</span>
</div>
@ -478,7 +478,7 @@
</tr><tr>
<td>
<p><code><a href="first_isolate.html">first_isolate()</a></code> <code><a href="first_isolate.html">filter_first_isolate()</a></code> <code><a href="first_isolate.html">filter_first_weighted_isolate()</a></code> </p>
<p><code><a href="first_isolate.html">first_isolate()</a></code> <code><a href="first_isolate.html">filter_first_isolate()</a></code> <code><a href="first_isolate.html">filter_first_weighted_isolate()</a></code> <code><a href="first_isolate.html">is_new_episode()</a></code> </p>
</td>
<td><p>Determine first (weighted) isolates</p></td>
</tr><tr>

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9023</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
</span>
</div>

View File

@ -4,6 +4,7 @@
\alias{first_isolate}
\alias{filter_first_isolate}
\alias{filter_first_weighted_isolate}
\alias{is_new_episode}
\title{Determine first (weighted) isolates}
\source{
Methodology of this function is strictly based on:
@ -48,11 +49,18 @@ filter_first_weighted_isolate(
col_keyantibiotics = NULL,
...
)
is_new_episode(
.data,
episode_days = 365,
col_date = NULL,
col_patient_id = NULL
)
}
\arguments{
\item{x}{a \link{data.frame} containing isolates.}
\item{x, .data}{a \link{data.frame} containing isolates.}
\item{col_date}{column name of the result date (or date that is was received on the lab), defaults to the first column of with a date class}
\item{col_date}{column name of the result date (or date that is was received on the lab), defaults to the first column with a date class}
\item{col_patient_id}{column name of the unique IDs of the patients, defaults to the first column that starts with 'patient' or 'patid' (case insensitive)}
@ -90,15 +98,22 @@ filter_first_weighted_isolate(
A \code{\link{logical}} vector
}
\description{
Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type.
Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type. To determine patient episodes not necessarily based on microorganisms, use \code{\link[=is_new_episode]{is_new_episode()}} that also supports grouping with the \code{dplyr} package, see \emph{Examples}.
}
\details{
\strong{WHY THIS IS SO IMPORTANT} \cr
To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https:/pubmed.ncbi.nlm.nih.gov/17304462/}{(ref)}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}.
The \code{\link[=is_new_episode]{is_new_episode()}} function is a wrapper around the \code{\link[=first_isolate]{first_isolate()}} function and can be used for data sets without isolates to just determine patient episodes based on any combination of grouping variables (using \code{dplyr}), please see \emph{Examples}. Since it runs \code{\link[=first_isolate]{first_isolate()}} for every group, it is quite slow.
All isolates with a microbial ID of \code{NA} will be excluded as first isolate.
\subsection{Why this is so important}{
The functions \code{\link[=filter_first_isolate]{filter_first_isolate()}} and \code{\link[=filter_first_weighted_isolate]{filter_first_weighted_isolate()}} are helper functions to quickly filter on first isolates. The function \code{\link[=filter_first_isolate]{filter_first_isolate()}} is essentially equal to either:\preformatted{ x[first_isolate(x, ...), ]
To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https:/pubmed.ncbi.nlm.nih.gov/17304462/}{(ref)}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}.
}
\subsection{\verb{filter_*()} shortcuts}{
The functions \code{\link[=filter_first_isolate]{filter_first_isolate()}} and \code{\link[=filter_first_weighted_isolate]{filter_first_weighted_isolate()}} are helper functions to quickly filter on first isolates.
The function \code{\link[=filter_first_isolate]{filter_first_isolate()}} is essentially equal to either:\preformatted{ x[first_isolate(x, ...), ]
x \%>\% filter(first_isolate(x, ...))
}
@ -110,6 +125,7 @@ The function \code{\link[=filter_first_weighted_isolate]{filter_first_weighted_i
select(-only_weighted_firsts, -keyab)
}
}
}
\section{Key antibiotics}{
There are two ways to determine whether isolates can be included as first \emph{weighted} isolates which will give generally the same results:
@ -143,21 +159,22 @@ On our website \url{https://msberends.github.io/AMR/} you can find \href{https:/
# basic filtering on first isolates
example_isolates[first_isolate(example_isolates), ]
# filtering based on isolates ----------------------------------------------
\donttest{
if (require("dplyr")) {
# Filter on first isolates:
# filter on first isolates:
example_isolates \%>\%
mutate(first_isolate = first_isolate(.)) \%>\%
filter(first_isolate == TRUE)
# Short-hand versions:
# short-hand versions:
example_isolates \%>\%
filter_first_isolate()
example_isolates \%>\%
filter_first_weighted_isolate()
# Now let's see if first isolates matter:
# now let's see if first isolates matter:
A <- example_isolates \%>\%
group_by(hospital_id) \%>\%
summarise(count = n_rsi(GEN), # gentamicin availability
@ -174,6 +191,42 @@ if (require("dplyr")) {
# Gentamicin resistance in hospital D appears to be 3.7\% higher than
# when you (erroneously) would have used all isolates for analysis.
}
# filtering based on any other condition -----------------------------------
if (require("dplyr")) {
# is_new_episode() can be used in dplyr verbs to determine patient
# episodes based on any (combination of) grouping variables:
example_isolates \%>\%
mutate(condition = sample(x = c("A", "B", "C"),
size = 2000,
replace = TRUE)) \%>\%
group_by(condition) \%>\%
mutate(new_episode = is_new_episode())
example_isolates \%>\%
group_by(hospital_id) \%>\%
summarise(patients = n_distinct(patient_id),
n_episodes_365 = sum(is_new_episode(episode_days = 365)),
n_episodes_60 = sum(is_new_episode(episode_days = 60)),
n_episodes_30 = sum(is_new_episode(episode_days = 30)))
# grouping on microorganisms leads to the same results as first_isolate():
x <- example_isolates \%>\%
filter_first_isolate(include_unknown = TRUE)
y <- example_isolates \%>\%
group_by(mo) \%>\%
filter(is_new_episode())
identical(x$patient_id, y$patient_id)
# but now you can group on isolates and many more:
example_isolates \%>\%
group_by(mo, hospital_id, ward_icu) \%>\%
mutate(flag_episode = is_new_episode())
}
}
}
\seealso{

View File

@ -54,40 +54,3 @@ test_that("looking up ab columns works", {
expect_warning(get_column_abx(dplyr::rename(example_isolates, thisone = AMX), amox = "thisone", tmp = "thisone", verbose = TRUE))
expect_warning(get_column_abx(dplyr::rename(example_isolates, thisone = AMX), amox = "thisone", tmp = "thisone", verbose = FALSE))
})
test_that("imports work", {
skip_on_cran()
import_functions <- c(
"anti_join" = "dplyr",
"cur_column" = "dplyr",
"freq.default" = "cleaner",
"full_join" = "dplyr",
"has_internet" = "curl",
"html_attr" = "rvest",
"html_children" = "rvest",
"html_node" = "rvest",
"html_nodes" = "rvest",
"html_table" = "rvest",
"html_text" = "rvest",
"inline_hist" = "skimr",
"inner_join" = "dplyr",
"insertText" = "rstudioapi",
"left_join" = "dplyr",
"new_pillar_shaft_simple" = "pillar",
"peek_mask" = "dplyr",
"peek_vars" = "tidyselect",
"read_excel" = "readxl",
"read_html" = "xml2",
"right_join" = "dplyr",
"semi_join" = "dplyr",
"sfl" = "skimr",
"showQuestion" = "rstudioapi")
for (i in seq_len(length(import_functions))) {
fn <- names(import_functions)[i]
pkg <- unname(import_functions[i])
expect(!is.null(import_fn(name = fn, pkg = pkg, error_on_fail = FALSE)),
failure_message = paste0("Function ", pkg, "::", fn, "() does not exist"))
}
})

View File

@ -200,4 +200,15 @@ test_that("first isolates work", {
expect_identical(filter_first_weighted_isolate(example_isolates),
subset(example_isolates, first_isolate(ex)))
# notice that all mo's are distinct, so all are TRUE
expect_true(all(example_isolates %pm>%
pm_distinct(mo, .keep_all = TRUE) %pm>%
first_isolate() == TRUE))
library(dplyr)
# is_new_episode
old <- example_isolates %>% mutate(out = first_isolate(., include_unknown = TRUE))
new <- example_isolates %>% group_by(mo) %>% mutate(out = is_new_episode())
expect_identical(which(old$out), which(new$out))
})

69
tests/testthat/test-zzz.R Normal file
View File

@ -0,0 +1,69 @@
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis for R #
# #
# SOURCE #
# https://github.com/msberends/AMR #
# #
# LICENCE #
# (c) 2018-2020 Berends MS, Luz CF et al. #
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
# #
# Visit our website for the full manual and a complete tutorial about #
# how to conduct AMR analysis: https://msberends.github.io/AMR/ #
# ==================================================================== #
context("zzz.R")
test_that("imports work", {
skip_on_cran()
import_functions <- c(
"anti_join" = "dplyr",
"cur_column" = "dplyr",
"cur_data" = "dplyr",
"document_position" = "rstudioapi",
"document_range" = "rstudioapi",
"freq.default" = "cleaner",
"full_join" = "dplyr",
"getSourceEditorContext" = "rstudioapi",
"has_internet" = "curl",
"html_attr" = "rvest",
"html_children" = "rvest",
"html_node" = "rvest",
"html_nodes" = "rvest",
"html_table" = "rvest",
"html_text" = "rvest",
"inline_hist" = "skimr",
"inner_join" = "dplyr",
"insertText" = "rstudioapi",
"insertText" = "rstudioapi",
"insertText" = "rstudioapi",
"left_join" = "dplyr",
"new_pillar_shaft_simple" = "pillar",
"peek_mask" = "dplyr",
"peek_vars" = "tidyselect",
"read_excel" = "readxl",
"read_html" = "xml2",
"right_join" = "dplyr",
"semi_join" = "dplyr",
"sfl" = "skimr",
"showQuestion" = "rstudioapi")
for (i in seq_len(length(import_functions))) {
fn <- names(import_functions)[i]
pkg <- unname(import_functions[i])
expect(!is.null(import_fn(name = fn, pkg = pkg, error_on_fail = FALSE)),
failure_message = paste0("Function ", pkg, "::", fn, "() does not exist"))
}
})