mirror of https://github.com/msberends/AMR.git
(v1.4.0.9024) is_new_episode()
This commit is contained in:
parent
0800d33228
commit
363218da7e
|
@ -64,6 +64,7 @@ jobs:
|
|||
- {os: ubuntu-16.04, r: '3.5', allowfail: false, rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
|
||||
- {os: ubuntu-16.04, r: '3.4', allowfail: true, rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
|
||||
- {os: ubuntu-16.04, r: '3.3', allowfail: true, rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
|
||||
- {os: ubuntu-16.04, r: '3.2', allowfail: true, rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
|
||||
# older R versions cannot be tested, since tidyverse only supports last 4 R x.x versions
|
||||
env:
|
||||
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
|
||||
|
@ -120,14 +121,15 @@ jobs:
|
|||
shell: Rscript {0}
|
||||
|
||||
- name: Check on older R versions
|
||||
if: matrix.config.r == '3.3'
|
||||
# no vignettes here, since they rely on R 3.3 and higher
|
||||
if: matrix.config.r == '3.2'
|
||||
env:
|
||||
_R_CHECK_CRAN_INCOMING_: false
|
||||
run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran", "--no-build-vignettes" , "--ignore-vignettes"), error_on = "warning", check_dir = "check")
|
||||
run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran", "--ignore-vignettes"), build_args = "--no-build-vignettes" , error_on = "warning", check_dir = "check")
|
||||
shell: Rscript {0}
|
||||
|
||||
- name: Check on newer R versions
|
||||
if: matrix.config.r != '3.3'
|
||||
if: matrix.config.r != '3.2'
|
||||
env:
|
||||
_R_CHECK_CRAN_INCOMING_: false
|
||||
run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
Package: AMR
|
||||
Version: 1.4.0.9023
|
||||
Version: 1.4.0.9024
|
||||
Date: 2020-11-17
|
||||
Title: Antimicrobial Resistance Analysis
|
||||
Authors@R: c(
|
||||
|
|
|
@ -152,6 +152,7 @@ export(is.mic)
|
|||
export(is.mo)
|
||||
export(is.rsi)
|
||||
export(is.rsi.eligible)
|
||||
export(is_new_episode)
|
||||
export(key_antibiotics)
|
||||
export(key_antibiotics_equal)
|
||||
export(kurtosis)
|
||||
|
|
10
NEWS.md
10
NEWS.md
|
@ -1,7 +1,15 @@
|
|||
# AMR 1.4.0.9023
|
||||
# AMR 1.4.0.9024
|
||||
## <small>Last updated: 17 November 2020</small>
|
||||
|
||||
### New
|
||||
* Function `is_new_episode()` to determine patient episodes which are not necessarily based on microorganisms. It also supports grouped variables with e.g. `mutate()` and `summarise()` of the `dplyr` package:
|
||||
```r
|
||||
example_isolates %>%
|
||||
group_by(hospital_id) %>%
|
||||
summarise(patients = n_distinct(patient_id),
|
||||
n_episodes_365 = sum(is_new_episode(episode_days = 365)),
|
||||
n_episodes_60 = sum(is_new_episode(episode_days = 60)))
|
||||
```
|
||||
* Functions `mo_is_gram_negative()` and `mo_is_gram_positive()` as wrappers around `mo_gramstain()`. They always return `TRUE` or `FALSE` (except when the input is `NA` or the MO code is `UNKNOWN`), thus always return `FALSE` for species outside the taxonomic kingdom of Bacteria. If you have the `dplyr` package installed, they can even determine the column with microorganisms themselves when used inside `dplyr` verbs:
|
||||
```r
|
||||
example_isolates %>%
|
||||
|
|
|
@ -139,9 +139,13 @@ check_dataset_integrity <- function() {
|
|||
}
|
||||
|
||||
search_type_in_df <- function(x, type, info = TRUE) {
|
||||
meet_criteria(x, allow_class = "data.frame")
|
||||
meet_criteria(type, allow_class = "character", has_length = 1)
|
||||
|
||||
# try to find columns based on type
|
||||
found <- NULL
|
||||
|
||||
# remove attributes from other packages
|
||||
x <- as.data.frame(x, stringsAsFactors = FALSE)
|
||||
colnames(x) <- trimws(colnames(x))
|
||||
|
||||
|
|
|
@ -25,10 +25,10 @@
|
|||
|
||||
#' Determine first (weighted) isolates
|
||||
#'
|
||||
#' Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type.
|
||||
#' Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type. To determine patient episodes not necessarily based on microorganisms, use [is_new_episode()] that also supports grouping with the `dplyr` package, see *Examples*.
|
||||
#' @inheritSection lifecycle Stable lifecycle
|
||||
#' @param x a [data.frame] containing isolates.
|
||||
#' @param col_date column name of the result date (or date that is was received on the lab), defaults to the first column of with a date class
|
||||
#' @param x,.data a [data.frame] containing isolates.
|
||||
#' @param col_date column name of the result date (or date that is was received on the lab), defaults to the first column with a date class
|
||||
#' @param col_patient_id column name of the unique IDs of the patients, defaults to the first column that starts with 'patient' or 'patid' (case insensitive)
|
||||
#' @param col_mo column name of the IDs of the microorganisms (see [as.mo()]), defaults to the first column of class [`mo`]. Values will be coerced using [as.mo()].
|
||||
#' @param col_testcode column name of the test codes. Use `col_testcode = NULL` to **not** exclude certain test codes (like test codes for screening). In that case `testcodes_exclude` will be ignored.
|
||||
|
@ -45,17 +45,26 @@
|
|||
#' @param info print progress
|
||||
#' @param include_unknown logical to determine whether 'unknown' microorganisms should be included too, i.e. microbial code `"UNKNOWN"`, which defaults to `FALSE`. For WHONET users, this means that all records with organism code `"con"` (*contamination*) will be excluded at default. Isolates with a microbial ID of `NA` will always be excluded as first isolate.
|
||||
#' @param ... parameters passed on to the [first_isolate()] function
|
||||
#' @details **WHY THIS IS SO IMPORTANT** \cr
|
||||
#' To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode [(ref)](https:/pubmed.ncbi.nlm.nih.gov/17304462/). If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all *S. aureus* isolates would be overestimated, because you included this MRSA more than once. It would be [selection bias](https://en.wikipedia.org/wiki/Selection_bias).
|
||||
#' @details The [is_new_episode()] function is a wrapper around the [first_isolate()] function and can be used for data sets without isolates to just determine patient episodes based on any combination of grouping variables (using `dplyr`), please see *Examples*. Since it runs [first_isolate()] for every group, it is quite slow.
|
||||
#'
|
||||
#' All isolates with a microbial ID of `NA` will be excluded as first isolate.
|
||||
#'
|
||||
#' The functions [filter_first_isolate()] and [filter_first_weighted_isolate()] are helper functions to quickly filter on first isolates. The function [filter_first_isolate()] is essentially equal to either:
|
||||
#' ### Why this is so important
|
||||
#' To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode [(ref)](https:/pubmed.ncbi.nlm.nih.gov/17304462/). If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all *S. aureus* isolates would be overestimated, because you included this MRSA more than once. It would be [selection bias](https://en.wikipedia.org/wiki/Selection_bias).
|
||||
#'
|
||||
#' ### `filter_*()` shortcuts
|
||||
#'
|
||||
#' The functions [filter_first_isolate()] and [filter_first_weighted_isolate()] are helper functions to quickly filter on first isolates.
|
||||
#'
|
||||
#' The function [filter_first_isolate()] is essentially equal to either:
|
||||
#'
|
||||
#' ```
|
||||
#' x[first_isolate(x, ...), ]
|
||||
#' x %>% filter(first_isolate(x, ...))
|
||||
#' ```
|
||||
#'
|
||||
#' The function [filter_first_weighted_isolate()] is essentially equal to:
|
||||
#'
|
||||
#' ```
|
||||
#' x %>%
|
||||
#' mutate(keyab = key_antibiotics(.)) %>%
|
||||
|
@ -89,21 +98,22 @@
|
|||
#' # basic filtering on first isolates
|
||||
#' example_isolates[first_isolate(example_isolates), ]
|
||||
#'
|
||||
#' # filtering based on isolates ----------------------------------------------
|
||||
#' \donttest{
|
||||
#' if (require("dplyr")) {
|
||||
#' # Filter on first isolates:
|
||||
#' # filter on first isolates:
|
||||
#' example_isolates %>%
|
||||
#' mutate(first_isolate = first_isolate(.)) %>%
|
||||
#' filter(first_isolate == TRUE)
|
||||
#'
|
||||
#' # Short-hand versions:
|
||||
#' # short-hand versions:
|
||||
#' example_isolates %>%
|
||||
#' filter_first_isolate()
|
||||
#'
|
||||
#' example_isolates %>%
|
||||
#' filter_first_weighted_isolate()
|
||||
#'
|
||||
#' # Now let's see if first isolates matter:
|
||||
#' # now let's see if first isolates matter:
|
||||
#' A <- example_isolates %>%
|
||||
#' group_by(hospital_id) %>%
|
||||
#' summarise(count = n_rsi(GEN), # gentamicin availability
|
||||
|
@ -120,6 +130,42 @@
|
|||
#' # Gentamicin resistance in hospital D appears to be 3.7% higher than
|
||||
#' # when you (erroneously) would have used all isolates for analysis.
|
||||
#' }
|
||||
#'
|
||||
#' # filtering based on any other condition -----------------------------------
|
||||
#'
|
||||
#' if (require("dplyr")) {
|
||||
#' # is_new_episode() can be used in dplyr verbs to determine patient
|
||||
#' # episodes based on any (combination of) grouping variables:
|
||||
#' example_isolates %>%
|
||||
#' mutate(condition = sample(x = c("A", "B", "C"),
|
||||
#' size = 2000,
|
||||
#' replace = TRUE)) %>%
|
||||
#' group_by(condition) %>%
|
||||
#' mutate(new_episode = is_new_episode())
|
||||
#'
|
||||
#' example_isolates %>%
|
||||
#' group_by(hospital_id) %>%
|
||||
#' summarise(patients = n_distinct(patient_id),
|
||||
#' n_episodes_365 = sum(is_new_episode(episode_days = 365)),
|
||||
#' n_episodes_60 = sum(is_new_episode(episode_days = 60)),
|
||||
#' n_episodes_30 = sum(is_new_episode(episode_days = 30)))
|
||||
#'
|
||||
#'
|
||||
#' # grouping on microorganisms leads to the same results as first_isolate():
|
||||
#' x <- example_isolates %>%
|
||||
#' filter_first_isolate(include_unknown = TRUE)
|
||||
#'
|
||||
#' y <- example_isolates %>%
|
||||
#' group_by(mo) %>%
|
||||
#' filter(is_new_episode())
|
||||
#'
|
||||
#' identical(x$patient_id, y$patient_id)
|
||||
#'
|
||||
#' # but now you can group on isolates and many more:
|
||||
#' example_isolates %>%
|
||||
#' group_by(mo, hospital_id, ward_icu) %>%
|
||||
#' mutate(flag_episode = is_new_episode())
|
||||
#' }
|
||||
#' }
|
||||
first_isolate <- function(x,
|
||||
col_date = NULL,
|
||||
|
@ -139,7 +185,7 @@ first_isolate <- function(x,
|
|||
info = interactive(),
|
||||
include_unknown = FALSE,
|
||||
...) {
|
||||
meet_criteria(x, allow_class = "data.frame")
|
||||
meet_criteria(x, allow_class = "data.frame") # also checks dimensions to be >0
|
||||
meet_criteria(col_date, allow_class = "character", has_length = 1, allow_NULL = TRUE, is_in = colnames(x))
|
||||
meet_criteria(col_patient_id, allow_class = "character", has_length = 1, allow_NULL = TRUE, is_in = colnames(x))
|
||||
meet_criteria(col_mo, allow_class = "character", has_length = 1, allow_NULL = TRUE, is_in = colnames(x))
|
||||
|
@ -175,13 +221,10 @@ first_isolate <- function(x,
|
|||
}
|
||||
}
|
||||
|
||||
stop_ifnot(is.data.frame(x), "`x` must be a data.frame")
|
||||
stop_if(any(dim(x) == 0), "`x` must contain rows and columns")
|
||||
|
||||
# remove data.table, grouping from tibbles, etc.
|
||||
x <- as.data.frame(x, stringsAsFactors = FALSE)
|
||||
|
||||
# try to find columns based on type
|
||||
# try to find columns based on type
|
||||
# -- mo
|
||||
if (is.null(col_mo)) {
|
||||
col_mo <- search_type_in_df(x = x, type = "mo")
|
||||
|
@ -299,13 +342,32 @@ first_isolate <- function(x,
|
|||
)
|
||||
}
|
||||
|
||||
# no isolates found
|
||||
# speed up - return immediately if obvious
|
||||
if (abs(row.start) == Inf | abs(row.end) == Inf) {
|
||||
if (info == TRUE) {
|
||||
message_("=> Found ", font_bold("no isolates"), as_note = FALSE)
|
||||
message_("=> Found ", font_bold("no isolates"),
|
||||
add_fn = font_black,
|
||||
as_note = FALSE)
|
||||
}
|
||||
return(rep(FALSE, nrow(x)))
|
||||
}
|
||||
if (row.start == row.end) {
|
||||
if (info == TRUE) {
|
||||
message_("=> Found ", font_bold("1 isolate"), ", as the data only contained 1 row",
|
||||
add_fn = font_black,
|
||||
as_note = FALSE)
|
||||
}
|
||||
return(TRUE)
|
||||
}
|
||||
if (length(c(row.start:row.end)) == pm_n_distinct(x[c(row.start:row.end), col_mo, drop = TRUE])) {
|
||||
if (info == TRUE) {
|
||||
message_("=> Found ", font_bold(paste(length(c(row.start:row.end)), "isolates")),
|
||||
", as all isolates were different microorganisms",
|
||||
add_fn = font_black,
|
||||
as_note = FALSE)
|
||||
}
|
||||
return(rep(TRUE, length(c(row.start:row.end))))
|
||||
}
|
||||
|
||||
# did find some isolates - add new index numbers of rows
|
||||
x$newvar_row_index_sorted <- seq_len(nrow(x))
|
||||
|
@ -511,7 +573,66 @@ filter_first_weighted_isolate <- function(x,
|
|||
subset(x, first_isolate(x = y,
|
||||
col_date = col_date,
|
||||
col_patient_id = col_patient_id,
|
||||
col_mo = col_mo,
|
||||
col_keyantibiotics = col_keyantibiotics,
|
||||
...))
|
||||
}
|
||||
|
||||
#' @rdname first_isolate
|
||||
#' @export
|
||||
is_new_episode <- function(.data,
|
||||
episode_days = 365,
|
||||
col_date = NULL,
|
||||
col_patient_id = NULL) {
|
||||
if (missing(.data)) {
|
||||
# look it up - this also supports grouping variables
|
||||
cur_data <- import_fn("cur_data", "dplyr", error_on_fail = FALSE)
|
||||
if (is.null(cur_data)) {
|
||||
stop_("parameter '.data' not set.")
|
||||
}
|
||||
.data <- cur_data()
|
||||
}
|
||||
meet_criteria(.data, allow_class = "data.frame") # also checks dimensions to be >0
|
||||
meet_criteria(col_date, allow_class = "character", has_length = 1, allow_NULL = TRUE, is_in = colnames(x))
|
||||
meet_criteria(col_patient_id, allow_class = "character", has_length = 1, allow_NULL = TRUE, is_in = colnames(x))
|
||||
meet_criteria(episode_days, allow_class = c("numeric", "integer"), has_length = 1)
|
||||
|
||||
# get i'th ID of group, so notices will only be thrown once
|
||||
cur_group_id <- import_fn("cur_group_id", "dplyr", error_on_fail = FALSE)
|
||||
first_group <- tryCatch(is.null(cur_group_id) || cur_group_id() == 1,
|
||||
error = function(e) TRUE)
|
||||
|
||||
# try to find columns based on type
|
||||
# -- date
|
||||
if (is.null(col_date)) {
|
||||
col_date <- search_type_in_df(x = .data,
|
||||
type = "date",
|
||||
info = first_group)
|
||||
stop_if(is.null(col_date), "`col_date` must be set")
|
||||
}
|
||||
|
||||
# -- patient id
|
||||
if (is.null(col_patient_id)) {
|
||||
if (all(c("First name", "Last name", "Sex") %in% colnames(.data))) {
|
||||
# WHONET support
|
||||
.data$patient_id <- paste(.data$`First name`, .data$`Last name`, .data$Sex)
|
||||
col_patient_id <- "patient_id"
|
||||
if (is.null(cur_group_id) || cur_group_id() == 1) {
|
||||
message_("Using combined columns `", font_bold("First name"), "`, `", font_bold("Last name"), "` and `", font_bold("Sex"), "` as input for `col_patient_id`")
|
||||
}
|
||||
} else {
|
||||
col_patient_id <- search_type_in_df(x = .data,
|
||||
type = "patient_id",
|
||||
info = first_group)
|
||||
}
|
||||
stop_if(is.null(col_patient_id), "`col_patient_id` must be set")
|
||||
}
|
||||
|
||||
# create any random mo, so first isolates can be calculated
|
||||
.data$a94a8fe5 <- as.mo("Escherichia coli")
|
||||
|
||||
first_isolate(.data,
|
||||
col_date = col_date,
|
||||
col_patient_id = col_patient_id,
|
||||
episode_days = episode_days,
|
||||
col_mo = "a94a8fe5",
|
||||
info = FALSE)
|
||||
}
|
||||
|
|
|
@ -81,7 +81,7 @@
|
|||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="https://msberends.github.io/AMR//index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9023</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
|
|
|
@ -81,7 +81,7 @@
|
|||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9023</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
|
|
|
@ -81,7 +81,7 @@
|
|||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="../index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9023</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
|
|
|
@ -81,7 +81,7 @@
|
|||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9023</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
|
|
|
@ -43,7 +43,7 @@
|
|||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9023</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
|
|
|
@ -81,7 +81,7 @@
|
|||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="../index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9023</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
|
@ -236,9 +236,9 @@
|
|||
<small>Source: <a href='https://github.com/msberends/AMR/blob/master/NEWS.md'><code>NEWS.md</code></a></small>
|
||||
</div>
|
||||
|
||||
<div id="amr-1409023" class="section level1">
|
||||
<h1 class="page-header" data-toc-text="1.4.0.9023">
|
||||
<a href="#amr-1409023" class="anchor"></a>AMR 1.4.0.9023<small> Unreleased </small>
|
||||
<div id="amr-1409024" class="section level1">
|
||||
<h1 class="page-header" data-toc-text="1.4.0.9024">
|
||||
<a href="#amr-1409024" class="anchor"></a>AMR 1.4.0.9024<small> Unreleased </small>
|
||||
</h1>
|
||||
<div id="last-updated-17-november-2020" class="section level2">
|
||||
<h2 class="hasAnchor">
|
||||
|
@ -248,6 +248,7 @@
|
|||
<h3 class="hasAnchor">
|
||||
<a href="#new" class="anchor"></a>New</h3>
|
||||
<ul>
|
||||
<li><p>Function <code><a href="../reference/first_isolate.html">is_new_episode()</a></code> to determine patient episodes which are not necessarily based on microorganisms. It also supports grouped variables with e.g. <code><a href="https://dplyr.tidyverse.org/reference/mutate.html">mutate()</a></code> and <code><a href="https://dplyr.tidyverse.org/reference/summarise.html">summarise()</a></code> of the <code>dplyr</code> package: <code>r example_isolates %>% group_by(hospital_id) %>% summarise(patients = n_distinct(patient_id), n_episodes_365 = sum(is_new_episode(episode_days = 365)), n_episodes_60 = sum(is_new_episode(episode_days = 60)))</code></p></li>
|
||||
<li>
|
||||
<p>Functions <code><a href="../reference/mo_property.html">mo_is_gram_negative()</a></code> and <code><a href="../reference/mo_property.html">mo_is_gram_positive()</a></code> as wrappers around <code><a href="../reference/mo_property.html">mo_gramstain()</a></code>. They always return <code>TRUE</code> or <code>FALSE</code> (except when the input is <code>NA</code> or the MO code is <code>UNKNOWN</code>), thus always return <code>FALSE</code> for species outside the taxonomic kingdom of Bacteria. If you have the <code>dplyr</code> package installed, they can even determine the column with microorganisms themselves when used inside <code>dplyr</code> verbs:</p>
|
||||
<div class="sourceCode" id="cb1"><pre class="downlit">
|
||||
|
|
|
@ -12,7 +12,7 @@ articles:
|
|||
datasets: datasets.html
|
||||
resistance_predict: resistance_predict.html
|
||||
welcome_to_AMR: welcome_to_AMR.html
|
||||
last_built: 2020-11-17T10:53Z
|
||||
last_built: 2020-11-17T15:56Z
|
||||
urls:
|
||||
reference: https://msberends.github.io/AMR//reference
|
||||
article: https://msberends.github.io/AMR//articles
|
||||
|
|
|
@ -49,7 +49,7 @@
|
|||
<script src="../extra.js"></script>
|
||||
|
||||
<meta property="og:title" content="Determine first (weighted) isolates — first_isolate" />
|
||||
<meta property="og:description" content="Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type." />
|
||||
<meta property="og:description" content="Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type. To determine patient episodes not necessarily based on microorganisms, use is_new_episode() that also supports grouping with the dplyr package, see Examples." />
|
||||
<meta property="og:image" content="https://msberends.github.io/AMR/logo.png" />
|
||||
|
||||
|
||||
|
@ -82,7 +82,7 @@
|
|||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="../index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9000</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
|
@ -239,7 +239,7 @@
|
|||
</div>
|
||||
|
||||
<div class="ref-description">
|
||||
<p>Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type.</p>
|
||||
<p>Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type. To determine patient episodes not necessarily based on microorganisms, use <code>is_new_episode()</code> that also supports grouping with the <code>dplyr</code> package, see <em>Examples</em>.</p>
|
||||
</div>
|
||||
|
||||
<pre class="usage"><span class='fu'>first_isolate</span><span class='op'>(</span>
|
||||
|
@ -278,18 +278,25 @@
|
|||
col_mo <span class='op'>=</span> <span class='cn'>NULL</span>,
|
||||
col_keyantibiotics <span class='op'>=</span> <span class='cn'>NULL</span>,
|
||||
<span class='va'>...</span>
|
||||
<span class='op'>)</span>
|
||||
|
||||
<span class='fu'>is_new_episode</span><span class='op'>(</span>
|
||||
<span class='va'>.data</span>,
|
||||
episode_days <span class='op'>=</span> <span class='fl'>365</span>,
|
||||
col_date <span class='op'>=</span> <span class='cn'>NULL</span>,
|
||||
col_patient_id <span class='op'>=</span> <span class='cn'>NULL</span>
|
||||
<span class='op'>)</span></pre>
|
||||
|
||||
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
|
||||
<table class="ref-arguments">
|
||||
<colgroup><col class="name" /><col class="desc" /></colgroup>
|
||||
<tr>
|
||||
<th>x</th>
|
||||
<th>x, .data</th>
|
||||
<td><p>a <a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a> containing isolates.</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>col_date</th>
|
||||
<td><p>column name of the result date (or date that is was received on the lab), defaults to the first column of with a date class</p></td>
|
||||
<td><p>column name of the result date (or date that is was received on the lab), defaults to the first column with a date class</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>col_patient_id</th>
|
||||
|
@ -366,10 +373,17 @@
|
|||
<p>A <code><a href='https://rdrr.io/r/base/logical.html'>logical</a></code> vector</p>
|
||||
<h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>
|
||||
|
||||
<p><strong>WHY THIS IS SO IMPORTANT</strong> <br />
|
||||
To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode <a href='https:/pubmed.ncbi.nlm.nih.gov/17304462/'>(ref)</a>. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all <em>S. aureus</em> isolates would be overestimated, because you included this MRSA more than once. It would be <a href='https://en.wikipedia.org/wiki/Selection_bias'>selection bias</a>.</p>
|
||||
<p>All isolates with a microbial ID of <code>NA</code> will be excluded as first isolate.</p>
|
||||
<p>The functions <code>filter_first_isolate()</code> and <code>filter_first_weighted_isolate()</code> are helper functions to quickly filter on first isolates. The function <code>filter_first_isolate()</code> is essentially equal to either:</p><pre> <span class='va'>x</span><span class='op'>[</span><span class='fu'>first_isolate</span><span class='op'>(</span><span class='va'>x</span>, <span class='va'>...</span><span class='op'>)</span>, <span class='op'>]</span>
|
||||
<p>The <code>is_new_episode()</code> function is a wrapper around the <code>first_isolate()</code> function and can be used for data sets without isolates to just determine patient episodes based on any combination of grouping variables (using <code>dplyr</code>), please see <em>Examples</em>. Since it runs <code>first_isolate()</code> for every group, it is quite slow.</p>
|
||||
<p>All isolates with a microbial ID of <code>NA</code> will be excluded as first isolate.</p><h3 class='hasAnchor' id='arguments'><a class='anchor' href='#arguments'></a>Why this is so important</h3>
|
||||
|
||||
|
||||
<p>To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode <a href='https:/pubmed.ncbi.nlm.nih.gov/17304462/'>(ref)</a>. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all <em>S. aureus</em> isolates would be overestimated, because you included this MRSA more than once. It would be <a href='https://en.wikipedia.org/wiki/Selection_bias'>selection bias</a>.</p>
|
||||
|
||||
<h3 class='hasAnchor' id='arguments'><a class='anchor' href='#arguments'></a><code>filter_*()</code> shortcuts</h3>
|
||||
|
||||
|
||||
<p>The functions <code>filter_first_isolate()</code> and <code>filter_first_weighted_isolate()</code> are helper functions to quickly filter on first isolates.</p>
|
||||
<p>The function <code>filter_first_isolate()</code> is essentially equal to either:</p><pre> <span class='va'>x</span><span class='op'>[</span><span class='fu'>first_isolate</span><span class='op'>(</span><span class='va'>x</span>, <span class='va'>...</span><span class='op'>)</span>, <span class='op'>]</span>
|
||||
<span class='va'>x</span> <span class='op'>%>%</span> <span class='fu'><a href='https://dplyr.tidyverse.org/reference/filter.html'>filter</a></span><span class='op'>(</span><span class='fu'>first_isolate</span><span class='op'>(</span><span class='va'>x</span>, <span class='va'>...</span><span class='op'>)</span><span class='op'>)</span>
|
||||
</pre>
|
||||
|
||||
|
@ -381,6 +395,7 @@ To conduct an analysis of antimicrobial resistance, you should only include the
|
|||
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/select.html'>select</a></span><span class='op'>(</span><span class='op'>-</span><span class='va'>only_weighted_firsts</span>, <span class='op'>-</span><span class='va'>keyab</span><span class='op'>)</span>
|
||||
</pre>
|
||||
|
||||
|
||||
<h2 class="hasAnchor" id="key-antibiotics"><a class="anchor" href="#key-antibiotics"></a>Key antibiotics</h2>
|
||||
|
||||
|
||||
|
@ -415,21 +430,22 @@ The <a href='lifecycle.html'>lifecycle</a> of this function is <strong>stable</s
|
|||
<span class='co'># basic filtering on first isolates</span>
|
||||
<span class='va'>example_isolates</span><span class='op'>[</span><span class='fu'>first_isolate</span><span class='op'>(</span><span class='va'>example_isolates</span><span class='op'>)</span>, <span class='op'>]</span>
|
||||
|
||||
<span class='co'># filtering based on isolates ----------------------------------------------</span>
|
||||
<span class='co'># \donttest{</span>
|
||||
<span class='kw'>if</span> <span class='op'>(</span><span class='kw'><a href='https://rdrr.io/r/base/library.html'>require</a></span><span class='op'>(</span><span class='st'><a href='https://dplyr.tidyverse.org'>"dplyr"</a></span><span class='op'>)</span><span class='op'>)</span> <span class='op'>{</span>
|
||||
<span class='co'># Filter on first isolates:</span>
|
||||
<span class='co'># filter on first isolates:</span>
|
||||
<span class='va'>example_isolates</span> <span class='op'>%>%</span>
|
||||
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/mutate.html'>mutate</a></span><span class='op'>(</span>first_isolate <span class='op'>=</span> <span class='fu'>first_isolate</span><span class='op'>(</span><span class='va'>.</span><span class='op'>)</span><span class='op'>)</span> <span class='op'>%>%</span>
|
||||
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/filter.html'>filter</a></span><span class='op'>(</span><span class='va'>first_isolate</span> <span class='op'>==</span> <span class='cn'>TRUE</span><span class='op'>)</span>
|
||||
|
||||
<span class='co'># Short-hand versions:</span>
|
||||
<span class='co'># short-hand versions:</span>
|
||||
<span class='va'>example_isolates</span> <span class='op'>%>%</span>
|
||||
<span class='fu'>filter_first_isolate</span><span class='op'>(</span><span class='op'>)</span>
|
||||
|
||||
<span class='va'>example_isolates</span> <span class='op'>%>%</span>
|
||||
<span class='fu'>filter_first_weighted_isolate</span><span class='op'>(</span><span class='op'>)</span>
|
||||
|
||||
<span class='co'># Now let's see if first isolates matter:</span>
|
||||
<span class='co'># now let's see if first isolates matter:</span>
|
||||
<span class='va'>A</span> <span class='op'><-</span> <span class='va'>example_isolates</span> <span class='op'>%>%</span>
|
||||
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/group_by.html'>group_by</a></span><span class='op'>(</span><span class='va'>hospital_id</span><span class='op'>)</span> <span class='op'>%>%</span>
|
||||
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/summarise.html'>summarise</a></span><span class='op'>(</span>count <span class='op'>=</span> <span class='fu'><a href='count.html'>n_rsi</a></span><span class='op'>(</span><span class='va'>GEN</span><span class='op'>)</span>, <span class='co'># gentamicin availability</span>
|
||||
|
@ -446,6 +462,42 @@ The <a href='lifecycle.html'>lifecycle</a> of this function is <strong>stable</s
|
|||
<span class='co'># Gentamicin resistance in hospital D appears to be 3.7% higher than</span>
|
||||
<span class='co'># when you (erroneously) would have used all isolates for analysis.</span>
|
||||
<span class='op'>}</span>
|
||||
|
||||
<span class='co'># filtering based on any other condition -----------------------------------</span>
|
||||
|
||||
<span class='kw'>if</span> <span class='op'>(</span><span class='kw'><a href='https://rdrr.io/r/base/library.html'>require</a></span><span class='op'>(</span><span class='st'><a href='https://dplyr.tidyverse.org'>"dplyr"</a></span><span class='op'>)</span><span class='op'>)</span> <span class='op'>{</span>
|
||||
<span class='co'># is_new_episode() can be used in dplyr verbs to determine patient</span>
|
||||
<span class='co'># episodes based on any (combination of) grouping variables:</span>
|
||||
<span class='va'>example_isolates</span> <span class='op'>%>%</span>
|
||||
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/mutate.html'>mutate</a></span><span class='op'>(</span>condition <span class='op'>=</span> <span class='fu'><a href='https://rdrr.io/r/base/sample.html'>sample</a></span><span class='op'>(</span>x <span class='op'>=</span> <span class='fu'><a href='https://rdrr.io/r/base/c.html'>c</a></span><span class='op'>(</span><span class='st'>"A"</span>, <span class='st'>"B"</span>, <span class='st'>"C"</span><span class='op'>)</span>,
|
||||
size <span class='op'>=</span> <span class='fl'>2000</span>,
|
||||
replace <span class='op'>=</span> <span class='cn'>TRUE</span><span class='op'>)</span><span class='op'>)</span> <span class='op'>%>%</span>
|
||||
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/group_by.html'>group_by</a></span><span class='op'>(</span><span class='va'>condition</span><span class='op'>)</span> <span class='op'>%>%</span>
|
||||
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/mutate.html'>mutate</a></span><span class='op'>(</span>new_episode <span class='op'>=</span> <span class='fu'>is_new_episode</span><span class='op'>(</span><span class='op'>)</span><span class='op'>)</span>
|
||||
|
||||
<span class='va'>example_isolates</span> <span class='op'>%>%</span>
|
||||
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/group_by.html'>group_by</a></span><span class='op'>(</span><span class='va'>hospital_id</span><span class='op'>)</span> <span class='op'>%>%</span>
|
||||
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/summarise.html'>summarise</a></span><span class='op'>(</span>patients <span class='op'>=</span> <span class='fu'><a href='https://dplyr.tidyverse.org/reference/n_distinct.html'>n_distinct</a></span><span class='op'>(</span><span class='va'>patient_id</span><span class='op'>)</span>,
|
||||
n_episodes_365 <span class='op'>=</span> <span class='fu'><a href='https://rdrr.io/r/base/sum.html'>sum</a></span><span class='op'>(</span><span class='fu'>is_new_episode</span><span class='op'>(</span>episode_days <span class='op'>=</span> <span class='fl'>365</span><span class='op'>)</span><span class='op'>)</span>,
|
||||
n_episodes_60 <span class='op'>=</span> <span class='fu'><a href='https://rdrr.io/r/base/sum.html'>sum</a></span><span class='op'>(</span><span class='fu'>is_new_episode</span><span class='op'>(</span>episode_days <span class='op'>=</span> <span class='fl'>60</span><span class='op'>)</span><span class='op'>)</span>,
|
||||
n_episodes_30 <span class='op'>=</span> <span class='fu'><a href='https://rdrr.io/r/base/sum.html'>sum</a></span><span class='op'>(</span><span class='fu'>is_new_episode</span><span class='op'>(</span>episode_days <span class='op'>=</span> <span class='fl'>30</span><span class='op'>)</span><span class='op'>)</span><span class='op'>)</span>
|
||||
|
||||
|
||||
<span class='co'># grouping on microorganisms leads to the same results as first_isolate():</span>
|
||||
<span class='va'>x</span> <span class='op'><-</span> <span class='va'>example_isolates</span> <span class='op'>%>%</span>
|
||||
<span class='fu'>filter_first_isolate</span><span class='op'>(</span>include_unknown <span class='op'>=</span> <span class='cn'>TRUE</span><span class='op'>)</span>
|
||||
|
||||
<span class='va'>y</span> <span class='op'><-</span> <span class='va'>example_isolates</span> <span class='op'>%>%</span>
|
||||
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/group_by.html'>group_by</a></span><span class='op'>(</span><span class='va'>mo</span><span class='op'>)</span> <span class='op'>%>%</span>
|
||||
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/filter.html'>filter</a></span><span class='op'>(</span><span class='fu'>is_new_episode</span><span class='op'>(</span><span class='op'>)</span><span class='op'>)</span>
|
||||
|
||||
<span class='fu'><a href='https://rdrr.io/r/base/identical.html'>identical</a></span><span class='op'>(</span><span class='va'>x</span><span class='op'>$</span><span class='va'>patient_id</span>, <span class='va'>y</span><span class='op'>$</span><span class='va'>patient_id</span><span class='op'>)</span>
|
||||
|
||||
<span class='co'># but now you can group on isolates and many more:</span>
|
||||
<span class='va'>example_isolates</span> <span class='op'>%>%</span>
|
||||
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/group_by.html'>group_by</a></span><span class='op'>(</span><span class='va'>mo</span>, <span class='va'>hospital_id</span>, <span class='va'>ward_icu</span><span class='op'>)</span> <span class='op'>%>%</span>
|
||||
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/mutate.html'>mutate</a></span><span class='op'>(</span>flag_episode <span class='op'>=</span> <span class='fu'>is_new_episode</span><span class='op'>(</span><span class='op'>)</span><span class='op'>)</span>
|
||||
<span class='op'>}</span>
|
||||
<span class='co'># }</span>
|
||||
</pre>
|
||||
</div>
|
||||
|
|
|
@ -81,7 +81,7 @@
|
|||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="../index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9023</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
|
@ -478,7 +478,7 @@
|
|||
</tr><tr>
|
||||
|
||||
<td>
|
||||
<p><code><a href="first_isolate.html">first_isolate()</a></code> <code><a href="first_isolate.html">filter_first_isolate()</a></code> <code><a href="first_isolate.html">filter_first_weighted_isolate()</a></code> </p>
|
||||
<p><code><a href="first_isolate.html">first_isolate()</a></code> <code><a href="first_isolate.html">filter_first_isolate()</a></code> <code><a href="first_isolate.html">filter_first_weighted_isolate()</a></code> <code><a href="first_isolate.html">is_new_episode()</a></code> </p>
|
||||
</td>
|
||||
<td><p>Determine first (weighted) isolates</p></td>
|
||||
</tr><tr>
|
||||
|
|
|
@ -81,7 +81,7 @@
|
|||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9023</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.4.0.9024</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
\alias{first_isolate}
|
||||
\alias{filter_first_isolate}
|
||||
\alias{filter_first_weighted_isolate}
|
||||
\alias{is_new_episode}
|
||||
\title{Determine first (weighted) isolates}
|
||||
\source{
|
||||
Methodology of this function is strictly based on:
|
||||
|
@ -48,11 +49,18 @@ filter_first_weighted_isolate(
|
|||
col_keyantibiotics = NULL,
|
||||
...
|
||||
)
|
||||
|
||||
is_new_episode(
|
||||
.data,
|
||||
episode_days = 365,
|
||||
col_date = NULL,
|
||||
col_patient_id = NULL
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{a \link{data.frame} containing isolates.}
|
||||
\item{x, .data}{a \link{data.frame} containing isolates.}
|
||||
|
||||
\item{col_date}{column name of the result date (or date that is was received on the lab), defaults to the first column of with a date class}
|
||||
\item{col_date}{column name of the result date (or date that is was received on the lab), defaults to the first column with a date class}
|
||||
|
||||
\item{col_patient_id}{column name of the unique IDs of the patients, defaults to the first column that starts with 'patient' or 'patid' (case insensitive)}
|
||||
|
||||
|
@ -90,15 +98,22 @@ filter_first_weighted_isolate(
|
|||
A \code{\link{logical}} vector
|
||||
}
|
||||
\description{
|
||||
Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type.
|
||||
Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type. To determine patient episodes not necessarily based on microorganisms, use \code{\link[=is_new_episode]{is_new_episode()}} that also supports grouping with the \code{dplyr} package, see \emph{Examples}.
|
||||
}
|
||||
\details{
|
||||
\strong{WHY THIS IS SO IMPORTANT} \cr
|
||||
To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https:/pubmed.ncbi.nlm.nih.gov/17304462/}{(ref)}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}.
|
||||
The \code{\link[=is_new_episode]{is_new_episode()}} function is a wrapper around the \code{\link[=first_isolate]{first_isolate()}} function and can be used for data sets without isolates to just determine patient episodes based on any combination of grouping variables (using \code{dplyr}), please see \emph{Examples}. Since it runs \code{\link[=first_isolate]{first_isolate()}} for every group, it is quite slow.
|
||||
|
||||
All isolates with a microbial ID of \code{NA} will be excluded as first isolate.
|
||||
\subsection{Why this is so important}{
|
||||
|
||||
The functions \code{\link[=filter_first_isolate]{filter_first_isolate()}} and \code{\link[=filter_first_weighted_isolate]{filter_first_weighted_isolate()}} are helper functions to quickly filter on first isolates. The function \code{\link[=filter_first_isolate]{filter_first_isolate()}} is essentially equal to either:\preformatted{ x[first_isolate(x, ...), ]
|
||||
To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https:/pubmed.ncbi.nlm.nih.gov/17304462/}{(ref)}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}.
|
||||
}
|
||||
|
||||
\subsection{\verb{filter_*()} shortcuts}{
|
||||
|
||||
The functions \code{\link[=filter_first_isolate]{filter_first_isolate()}} and \code{\link[=filter_first_weighted_isolate]{filter_first_weighted_isolate()}} are helper functions to quickly filter on first isolates.
|
||||
|
||||
The function \code{\link[=filter_first_isolate]{filter_first_isolate()}} is essentially equal to either:\preformatted{ x[first_isolate(x, ...), ]
|
||||
x \%>\% filter(first_isolate(x, ...))
|
||||
}
|
||||
|
||||
|
@ -110,6 +125,7 @@ The function \code{\link[=filter_first_weighted_isolate]{filter_first_weighted_i
|
|||
select(-only_weighted_firsts, -keyab)
|
||||
}
|
||||
}
|
||||
}
|
||||
\section{Key antibiotics}{
|
||||
|
||||
There are two ways to determine whether isolates can be included as first \emph{weighted} isolates which will give generally the same results:
|
||||
|
@ -143,21 +159,22 @@ On our website \url{https://msberends.github.io/AMR/} you can find \href{https:/
|
|||
# basic filtering on first isolates
|
||||
example_isolates[first_isolate(example_isolates), ]
|
||||
|
||||
# filtering based on isolates ----------------------------------------------
|
||||
\donttest{
|
||||
if (require("dplyr")) {
|
||||
# Filter on first isolates:
|
||||
# filter on first isolates:
|
||||
example_isolates \%>\%
|
||||
mutate(first_isolate = first_isolate(.)) \%>\%
|
||||
filter(first_isolate == TRUE)
|
||||
|
||||
# Short-hand versions:
|
||||
# short-hand versions:
|
||||
example_isolates \%>\%
|
||||
filter_first_isolate()
|
||||
|
||||
example_isolates \%>\%
|
||||
filter_first_weighted_isolate()
|
||||
|
||||
# Now let's see if first isolates matter:
|
||||
# now let's see if first isolates matter:
|
||||
A <- example_isolates \%>\%
|
||||
group_by(hospital_id) \%>\%
|
||||
summarise(count = n_rsi(GEN), # gentamicin availability
|
||||
|
@ -174,6 +191,42 @@ if (require("dplyr")) {
|
|||
# Gentamicin resistance in hospital D appears to be 3.7\% higher than
|
||||
# when you (erroneously) would have used all isolates for analysis.
|
||||
}
|
||||
|
||||
# filtering based on any other condition -----------------------------------
|
||||
|
||||
if (require("dplyr")) {
|
||||
# is_new_episode() can be used in dplyr verbs to determine patient
|
||||
# episodes based on any (combination of) grouping variables:
|
||||
example_isolates \%>\%
|
||||
mutate(condition = sample(x = c("A", "B", "C"),
|
||||
size = 2000,
|
||||
replace = TRUE)) \%>\%
|
||||
group_by(condition) \%>\%
|
||||
mutate(new_episode = is_new_episode())
|
||||
|
||||
example_isolates \%>\%
|
||||
group_by(hospital_id) \%>\%
|
||||
summarise(patients = n_distinct(patient_id),
|
||||
n_episodes_365 = sum(is_new_episode(episode_days = 365)),
|
||||
n_episodes_60 = sum(is_new_episode(episode_days = 60)),
|
||||
n_episodes_30 = sum(is_new_episode(episode_days = 30)))
|
||||
|
||||
|
||||
# grouping on microorganisms leads to the same results as first_isolate():
|
||||
x <- example_isolates \%>\%
|
||||
filter_first_isolate(include_unknown = TRUE)
|
||||
|
||||
y <- example_isolates \%>\%
|
||||
group_by(mo) \%>\%
|
||||
filter(is_new_episode())
|
||||
|
||||
identical(x$patient_id, y$patient_id)
|
||||
|
||||
# but now you can group on isolates and many more:
|
||||
example_isolates \%>\%
|
||||
group_by(mo, hospital_id, ward_icu) \%>\%
|
||||
mutate(flag_episode = is_new_episode())
|
||||
}
|
||||
}
|
||||
}
|
||||
\seealso{
|
||||
|
|
|
@ -54,40 +54,3 @@ test_that("looking up ab columns works", {
|
|||
expect_warning(get_column_abx(dplyr::rename(example_isolates, thisone = AMX), amox = "thisone", tmp = "thisone", verbose = TRUE))
|
||||
expect_warning(get_column_abx(dplyr::rename(example_isolates, thisone = AMX), amox = "thisone", tmp = "thisone", verbose = FALSE))
|
||||
})
|
||||
|
||||
test_that("imports work", {
|
||||
skip_on_cran()
|
||||
|
||||
import_functions <- c(
|
||||
"anti_join" = "dplyr",
|
||||
"cur_column" = "dplyr",
|
||||
"freq.default" = "cleaner",
|
||||
"full_join" = "dplyr",
|
||||
"has_internet" = "curl",
|
||||
"html_attr" = "rvest",
|
||||
"html_children" = "rvest",
|
||||
"html_node" = "rvest",
|
||||
"html_nodes" = "rvest",
|
||||
"html_table" = "rvest",
|
||||
"html_text" = "rvest",
|
||||
"inline_hist" = "skimr",
|
||||
"inner_join" = "dplyr",
|
||||
"insertText" = "rstudioapi",
|
||||
"left_join" = "dplyr",
|
||||
"new_pillar_shaft_simple" = "pillar",
|
||||
"peek_mask" = "dplyr",
|
||||
"peek_vars" = "tidyselect",
|
||||
"read_excel" = "readxl",
|
||||
"read_html" = "xml2",
|
||||
"right_join" = "dplyr",
|
||||
"semi_join" = "dplyr",
|
||||
"sfl" = "skimr",
|
||||
"showQuestion" = "rstudioapi")
|
||||
|
||||
for (i in seq_len(length(import_functions))) {
|
||||
fn <- names(import_functions)[i]
|
||||
pkg <- unname(import_functions[i])
|
||||
expect(!is.null(import_fn(name = fn, pkg = pkg, error_on_fail = FALSE)),
|
||||
failure_message = paste0("Function ", pkg, "::", fn, "() does not exist"))
|
||||
}
|
||||
})
|
||||
|
|
|
@ -200,4 +200,15 @@ test_that("first isolates work", {
|
|||
expect_identical(filter_first_weighted_isolate(example_isolates),
|
||||
subset(example_isolates, first_isolate(ex)))
|
||||
|
||||
# notice that all mo's are distinct, so all are TRUE
|
||||
expect_true(all(example_isolates %pm>%
|
||||
pm_distinct(mo, .keep_all = TRUE) %pm>%
|
||||
first_isolate() == TRUE))
|
||||
|
||||
library(dplyr)
|
||||
# is_new_episode
|
||||
old <- example_isolates %>% mutate(out = first_isolate(., include_unknown = TRUE))
|
||||
new <- example_isolates %>% group_by(mo) %>% mutate(out = is_new_episode())
|
||||
expect_identical(which(old$out), which(new$out))
|
||||
|
||||
})
|
||||
|
|
|
@ -0,0 +1,69 @@
|
|||
# ==================================================================== #
|
||||
# TITLE #
|
||||
# Antimicrobial Resistance (AMR) Analysis for R #
|
||||
# #
|
||||
# SOURCE #
|
||||
# https://github.com/msberends/AMR #
|
||||
# #
|
||||
# LICENCE #
|
||||
# (c) 2018-2020 Berends MS, Luz CF et al. #
|
||||
# Developed at the University of Groningen, the Netherlands, in #
|
||||
# collaboration with non-profit organisations Certe Medical #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# #
|
||||
# This R package is free software; you can freely use and distribute #
|
||||
# it for both personal and commercial purposes under the terms of the #
|
||||
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
|
||||
# the Free Software Foundation. #
|
||||
# We created this package for both routine data analysis and academic #
|
||||
# research and it was publicly released in the hope that it will be #
|
||||
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
|
||||
# #
|
||||
# Visit our website for the full manual and a complete tutorial about #
|
||||
# how to conduct AMR analysis: https://msberends.github.io/AMR/ #
|
||||
# ==================================================================== #
|
||||
|
||||
context("zzz.R")
|
||||
|
||||
test_that("imports work", {
|
||||
skip_on_cran()
|
||||
|
||||
import_functions <- c(
|
||||
"anti_join" = "dplyr",
|
||||
"cur_column" = "dplyr",
|
||||
"cur_data" = "dplyr",
|
||||
"document_position" = "rstudioapi",
|
||||
"document_range" = "rstudioapi",
|
||||
"freq.default" = "cleaner",
|
||||
"full_join" = "dplyr",
|
||||
"getSourceEditorContext" = "rstudioapi",
|
||||
"has_internet" = "curl",
|
||||
"html_attr" = "rvest",
|
||||
"html_children" = "rvest",
|
||||
"html_node" = "rvest",
|
||||
"html_nodes" = "rvest",
|
||||
"html_table" = "rvest",
|
||||
"html_text" = "rvest",
|
||||
"inline_hist" = "skimr",
|
||||
"inner_join" = "dplyr",
|
||||
"insertText" = "rstudioapi",
|
||||
"insertText" = "rstudioapi",
|
||||
"insertText" = "rstudioapi",
|
||||
"left_join" = "dplyr",
|
||||
"new_pillar_shaft_simple" = "pillar",
|
||||
"peek_mask" = "dplyr",
|
||||
"peek_vars" = "tidyselect",
|
||||
"read_excel" = "readxl",
|
||||
"read_html" = "xml2",
|
||||
"right_join" = "dplyr",
|
||||
"semi_join" = "dplyr",
|
||||
"sfl" = "skimr",
|
||||
"showQuestion" = "rstudioapi")
|
||||
|
||||
for (i in seq_len(length(import_functions))) {
|
||||
fn <- names(import_functions)[i]
|
||||
pkg <- unname(import_functions[i])
|
||||
expect(!is.null(import_fn(name = fn, pkg = pkg, error_on_fail = FALSE)),
|
||||
failure_message = paste0("Function ", pkg, "::", fn, "() does not exist"))
|
||||
}
|
||||
})
|
Loading…
Reference in New Issue