update to septic_patients, speed improvements

This commit is contained in:
dr. M.S. (Matthijs) Berends 2018-07-25 14:17:04 +02:00
parent 03a3cb397b
commit d9e204031d
26 changed files with 273 additions and 233 deletions

View File

@ -1,6 +1,6 @@
Package: AMR
Version: 0.2.0.9015
Date: 2018-07-23
Version: 0.2.0.9016
Date: 2018-07-25
Title: Antimicrobial Resistance Analysis
Authors@R: c(
person(

View File

@ -154,7 +154,6 @@ importFrom(utils,View)
importFrom(utils,browseVignettes)
importFrom(utils,installed.packages)
importFrom(utils,object.size)
importFrom(utils,packageDescription)
importFrom(utils,read.delim)
importFrom(utils,write.table)
importFrom(xml2,read_html)

View File

@ -2,8 +2,9 @@
#### New
* **BREAKING**: `rsi_df` was removed in favour of new functions `resistance` and `susceptibility`. Now, all functions used to calculate resistance (`resistance` and `susceptibility`) use **hybrid evaluation**. This means calculations are not done in R directly but rather in C++ using the `Rcpp` package, making them 25 to 30 times faster. The function `rsi` still works, but is deprecated.
* **BREAKING**: the methodology for determining first weighted isolates was changed. The antibiotics that are compared between isolates (call *key antibiotics*) to include more first isolates (afterwards called first *weighted* isolates) are now as follows:
* Gram-positive: amoxicillin, amoxicillin/clavlanic acid, cefuroxime, piperacillin/tazobactam, ciprofloxacin, trimethoprim/sulfamethoxazole, vancomycin, teicoplanin, tetracycline, erythromycin, oxacillin, rifampicin
* Gram-negative: amoxicillin, amoxicillin/clavlanic acid, cefuroxime, piperacillin/tazobactam, ciprofloxacin, trimethoprim/sulfamethoxazole, gentamicin, tobramycin, colistin, cefotaxime, ceftazidime, meropenem
* Universal: amoxicillin, amoxicillin/clavlanic acid, cefuroxime, piperacillin/tazobactam, ciprofloxacin, trimethoprim/sulfamethoxazole
* Gram-positive: vancomycin, teicoplanin, tetracycline, erythromycin, oxacillin, rifampicin
* Gram-negative: gentamicin, tobramycin, colistin, cefotaxime, ceftazidime, meropenem
* Functions `as.bactid` and `is.bactid` to transform/look up microbial ID's; this replaces the function `guess_bactid` but it will remain available for backwards compatibility
* For convience, new descriptive statistical functions `kurtosis` and `skewness` that are lacking in base R - they are generic functions and have support for vectors, data.frames and matrices
* Function `g.test` to perform the Χ<sup>2</sup> distributed [*G*-test](https://en.wikipedia.org/wiki/G-test), which use is the same as `chisq.test`
@ -23,6 +24,7 @@
* Possibility to globally set the default for the amount of items to print, with `options(max.print.freq = n)` where *n* is your preset value
#### Changed
* Updates version of the `setic_patients` dataset to better reflect the reality
* Pretty printing for tibbles removed as it is not really the scope of this package
* Improved speed of key antibiotics comparison for determining first isolates
* Column names for the `key_antibiotics` function are now generic: 6 for broadspectrum ABs, 6 for Gram-positive specific and 6 for Gram-negative specific ABs
@ -36,6 +38,7 @@
* `as.rsi("<=0.002; S")` will return `S`
* `as.mic("<=0.002; S")` will return `<=0.002`
* Now possible to coerce MIC values with a space between operator and value, i.e. `as.mic("<= 0.002")` now works
* Classes `rsi` and `mic` do not add the attribute `package.version` anymore
* Added `"groups"` option for `atc_property(..., property)`. It will return a vector of the ATC hierarchy as defined by the [WHO](https://www.whocc.no/atc/structure_and_principles/). The new function `atc_groups` is a convenient wrapper around this.
* Build-in host check for `atc_property` as it requires the host set by `url` to be responsive
* Improved `first_isolate` algorithm to exclude isolates where bacteria ID or genus is unavailable

View File

@ -221,7 +221,6 @@ as.bactid <- function(x) {
}
class(x) <- "bactid"
attr(x, 'package') <- 'AMR'
attr(x, 'package.version') <- packageDescription('AMR')$Version
x
}

View File

@ -21,18 +21,18 @@
#' This transforms a vector to a new class \code{rsi}, which is an ordered factor with levels \code{S < I < R}. Invalid antimicrobial interpretations will be translated as \code{NA} with a warning.
#' @rdname as.rsi
#' @param x vector
#' @return Ordered factor with new class \code{rsi} and new attributes \code{package} and \code{package.version}
#' @return Ordered factor with new class \code{rsi} and new attribute \code{package}
#' @keywords rsi
#' @export
#' @importFrom dplyr %>%
#' @importFrom utils packageDescription
#' @seealso \code{\link{as.mic}}
#' @examples
#' rsi_data <- as.rsi(c(rep("S", 474), rep("I", 36), rep("R", 370)))
#' rsi_data <- as.rsi(c(rep("S", 474), rep("I", 36), rep("R", 370), "A", "B", "C"))
#' is.rsi(rsi_data)
#'
#' # this can also coerce combined MIC/RSI values:
#' as.rsi("<= 0.002; R") # will return R
#' as.rsi("<= 0.002; S") # will return S
#'
#' plot(rsi_data) # for percentages
#' barplot(rsi_data) # for frequencies
@ -76,7 +76,6 @@ as.rsi <- function(x) {
x <- x %>% factor(levels = c("S", "I", "R"), ordered = TRUE)
class(x) <- c('rsi', 'ordered', 'factor')
attr(x, 'package') <- 'AMR'
attr(x, 'package.version') <- packageDescription('AMR')$Version
x
}
}
@ -196,21 +195,21 @@ barplot.rsi <- function(height, ...) {
#' Class 'mic'
#'
#' This transforms a vector to a new class\code{mic}, which is an ordered factor with valid MIC values as levels. Invalid MIC values will be translated as \code{NA} with a warning.
#' This transforms a vector to a new class \code{mic}, which is an ordered factor with valid MIC values as levels. Invalid MIC values will be translated as \code{NA} with a warning.
#' @rdname as.mic
#' @param x vector
#' @param na.rm a logical indicating whether missing values should be removed
#' @return Ordered factor with new class \code{mic} and new attributes \code{package} and \code{package.version}
#' @return Ordered factor with new class \code{mic} and new attribute \code{package}
#' @keywords mic
#' @export
#' @importFrom dplyr %>%
#' @importFrom utils packageDescription
#' @seealso \code{\link{as.rsi}}
#' @examples
#' mic_data <- as.mic(c(">=32", "1.0", "1", "1.00", 8, "<=0.128", "8", "16", "16"))
#' is.mic(mic_data)
#'
#' # this can also coerce combined MIC/RSI values:
#' as.mic("<=0.002; R") # will return <=0.002
#' as.mic("<=0.002; S") # will return <=0.002
#'
#' plot(mic_data)
#' barplot(mic_data)
@ -319,7 +318,6 @@ as.mic <- function(x, na.rm = FALSE) {
ordered = TRUE)
class(x) <- c('mic', 'ordered', 'factor')
attr(x, 'package') <- 'AMR'
attr(x, 'package.version') <- packageDescription('AMR')$Version
x
}
}

View File

@ -252,7 +252,7 @@
#' \item{\code{type_nl}}{Type of microorganism in Dutch, like \code{"Bacterie"} and \code{"Schimmel/gist"}}
#' \item{\code{gramstain_nl}}{Gram of microorganism in Dutch, like \code{"Negatieve staven"}}
#' }
#' @source MOLIS (LIS of Certe) - \url{https://www.certe.nl}
# source MOLIS (LIS of Certe) - \url{https://www.certe.nl}
#' @seealso \code{\link{guess_bactid}} \code{\link{antibiotics}} \code{\link{microorganisms.umcg}}
"microorganisms"
@ -264,14 +264,14 @@
#' \item{\code{mocode}}{Code of microorganism according to UMCG MMB}
#' \item{\code{bactid}}{Code of microorganism in \code{\link{microorganisms}}}
#' }
#' @source MOLIS (LIS of Certe) - \url{https://www.certe.nl} \cr \cr GLIMS (LIS of UMCG) - \url{https://www.umcg.nl}
# source MOLIS (LIS of Certe) - \url{https://www.certe.nl} \cr \cr GLIMS (LIS of UMCG) - \url{https://www.umcg.nl}
#' @seealso \code{\link{guess_bactid}} \code{\link{microorganisms}}
"microorganisms.umcg"
#' Dataset with 2000 blood culture isolates of septic patients
#'
#' An anonymised dataset containing 2000 microbial blood culture isolates with their antibiogram of septic patients found in 5 different hospitals in the Netherlands, between 2001 and 2017. This data.frame can be used to practice AMR analysis. For examples, press F1.
#' @format A data.frame with 2000 observations and 47 variables:
#' @format A data.frame with 2000 observations and 49 variables:
#' \describe{
#' \item{\code{date}}{date of receipt at the laboratory}
#' \item{\code{hospital_id}}{ID of the hospital}
@ -282,9 +282,9 @@
#' \item{\code{sex}}{sex of the patient}
#' \item{\code{patient_id}}{ID of the patient, first 10 characters of an SHA hash containing irretrievable information}
#' \item{\code{bactid}}{ID of microorganism, see \code{\link{microorganisms}}}
#' \item{\code{peni:mupi}}{38 different antibiotics with class \code{rsi} (see \code{\link{as.rsi}}); these column names occur in \code{\link{antibiotics}} and can be translated with \code{\link{abname}}}
#' \item{\code{peni:rifa}}{40 different antibiotics with class \code{rsi} (see \code{\link{as.rsi}}); these column names occur in \code{\link{antibiotics}} data set and can be translated with \code{\link{abname}}}
#' }
#' @source MOLIS (LIS of Certe) - \url{https://www.certe.nl}
# source MOLIS (LIS of Certe) - \url{https://www.certe.nl}
#' @examples
#' # ----------- #
#' # PREPARATION #
@ -304,15 +304,15 @@
#' # ANALYSIS #
#' # -------- #
#'
#' # 1. Get the amoxicillin resistance percentages
#' # of E. coli, divided by hospital:
#' # 1. Get the amoxicillin resistance percentages (p)
#' # and numbers (n) of E. coli, divided by hospital:
#'
#' my_data %>%
#' filter(bactid == "ESCCOL",
#' filter(bactid == guess_bactid("E. coli"),
#' first_isolates == TRUE) %>%
#' group_by(hospital_id) %>%
#' summarise(n = n(),
#' amoxicillin_resistance = rsi(amox))
#' summarise(n = n_rsi(amox),
#' p = resistance(amox))
#'
#'
#' # 2. Get the amoxicillin/clavulanic acid resistance
@ -322,6 +322,6 @@
#' filter(bactid == guess_bactid("E. coli"),
#' first_isolates == TRUE) %>%
#' group_by(year = format(date, "%Y")) %>%
#' summarise(n = n(),
#' amoxclav_resistance = rsi(amcl, minimum = 20))
#' summarise(n = n_rsi(amcl),
#' p = resistance(amcl, minimum = 20))
"septic_patients"

View File

@ -41,7 +41,7 @@
#' @details \strong{WHY THIS IS SO IMPORTANT} \cr
#' To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https://www.ncbi.nlm.nih.gov/pubmed/17304462}{[1]}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}.
#' @section Key antibiotics:
#' There are two ways to determine whether isolates can be included as first \emph{weighted} isolates: \cr
#' There are two ways to determine whether isolates can be included as first \emph{weighted} isolates which will give generally the same results: \cr
#'
#' \strong{1. Using} \code{type = "keyantibiotics"} \strong{and parameter} \code{ignore_I} \cr
#' Any difference from S to R (or vice versa) will (re)select an isolate as a first weighted isolate. With \code{ignore_I = FALSE}, also differences from I to S|R (or vice versa) will lead to this. This is a reliable method and 30-35 times faster than method 2. \cr
@ -65,6 +65,24 @@
#' col_patient_id = "patient_id",
#' col_bactid = "bactid")
#'
#' # Now let's see if first isolates matter:
#' A <- my_patients %>%
#' group_by(hospital_id) %>%
#' summarise(count = n_rsi(gent), # gentamicin
#' resistance = resistance(gent))
#'
#' B <- my_patients %>%
#' filter(first_isolate == TRUE) %>%
#' group_by(hospital_id) %>%
#' summarise(count = n_rsi(gent), # gentamicin
#' resistance = resistance(gent))
#'
#' # Have a look at A and B. B is more reliable because every isolate is
#' # counted once. Gentamicin resitance in hospital D seems to be 5%
#' # higher than originally thought.
#'
#' ## OTHER EXAMPLES:
#'
#' \dontrun{
#'
#' # set key antibiotics to a new variable
@ -153,7 +171,7 @@ first_isolate <- function(tbl,
if (!is.na(col_bactid)) {
if (!tbl %>% pull(col_bactid) %>% is.bactid()) {
tbl[, col_bactid] <- tbl %>% pull(col_bactid) %>% as.bactid()
warning("Improve integrity of the `", col_bactid, "` column by transforming it with 'as.bactid'.")
}
tbl <- tbl %>% left_join_microorganisms(by = col_bactid)
col_genus <- "genus"
@ -179,7 +197,6 @@ first_isolate <- function(tbl,
filter_specimen <- ''
}
weighted.notice <- ''
# filter on specimen group and keyantibiotics when they are filled in
if (!is.na(filter_specimen) & filter_specimen != '') {
check_columns_existance(col_specimen, tbl)
@ -317,7 +334,9 @@ first_isolate <- function(tbl,
(date_lab - lag(date_lab)) + lag(days_diff),
0))
weighted.notice <- ''
if (col_keyantibiotics != '') {
weighted.notice <- 'weighted '
if (info == TRUE) {
if (type == 'keyantibiotics') {
cat('[Criteria] Inclusion based on key antibiotics, ')

View File

@ -415,7 +415,6 @@ frequency_tbl <- function(x,
class(df) <- c('frequency_tbl', class(df))
attr(df, 'package') <- 'AMR'
attr(df, 'package.version') <- packageDescription('AMR')$Version
if (markdown == TRUE) {
tbl_format <- 'markdown'
@ -567,7 +566,6 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
#' @export
as.data.frame.frequency_tbl <- function(x, ...) {
attr(x, 'package') <- NULL
attr(x, 'package.version') <- NULL
attr(x, 'opt') <- NULL
as.data.frame.data.frame(x, ...)
}
@ -578,7 +576,6 @@ as.data.frame.frequency_tbl <- function(x, ...) {
#' @importFrom dplyr as_tibble
as_tibble.frequency_tbl <- function(x, validate = TRUE, ..., rownames = NA) {
attr(x, 'package') <- NULL
attr(x, 'package.version') <- NULL
attr(x, 'opt') <- NULL
as_tibble(x = as.data.frame(x), validate = validate, ..., rownames = rownames)
}

View File

@ -26,8 +26,8 @@
#' df2 <- left_join_microorganisms(df, "bacteria_id")
#' colnames(df2)
inner_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...) {
if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) {
x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE)
if (!any(class(x) %in% c("data.frame", "matrix"))) {
x <- data.frame(bactid = as.character(x), stringsAsFactors = FALSE)
}
# no name set to `by` parameter
if (is.null(names(by))) {
@ -48,8 +48,8 @@ inner_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...
#' @rdname join
#' @export
left_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...) {
if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) {
x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE)
if (!any(class(x) %in% c("data.frame", "matrix"))) {
x <- data.frame(bactid = as.character(x), stringsAsFactors = FALSE)
}
# no name set to `by` parameter
if (is.null(names(by))) {
@ -70,8 +70,8 @@ left_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...)
#' @rdname join
#' @export
right_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...) {
if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) {
x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE)
if (!any(class(x) %in% c("data.frame", "matrix"))) {
x <- data.frame(bactid = as.character(x), stringsAsFactors = FALSE)
}
# no name set to `by` parameter
if (is.null(names(by))) {
@ -92,8 +92,8 @@ right_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...
#' @rdname join
#' @export
full_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...) {
if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) {
x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE)
if (!any(class(x) %in% c("data.frame", "matrix"))) {
x <- data.frame(bactid = as.character(x), stringsAsFactors = FALSE)
}
# no name set to `by` parameter
if (is.null(names(by))) {
@ -114,8 +114,8 @@ full_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...)
#' @rdname join
#' @export
semi_join_microorganisms <- function(x, by = 'bactid', ...) {
if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) {
x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE)
if (!any(class(x) %in% c("data.frame", "matrix"))) {
x <- data.frame(bactid = as.character(x), stringsAsFactors = FALSE)
}
# no name set to `by` parameter
if (is.null(names(by))) {
@ -132,8 +132,8 @@ semi_join_microorganisms <- function(x, by = 'bactid', ...) {
#' @rdname join
#' @export
anti_join_microorganisms <- function(x, by = 'bactid', ...) {
if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) {
x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE)
if (!any(class(x) %in% c("data.frame", "matrix"))) {
x <- data.frame(bactid = as.character(x), stringsAsFactors = FALSE)
}
# no name set to `by` parameter
if (is.null(names(by))) {

View File

@ -42,19 +42,26 @@
#' @importFrom dplyr %>% mutate if_else
#' @seealso \code{\link{first_isolate}}
#' @examples
#' \dontrun{
#' # septic_patients is a dataset available in the AMR package
#' ?septic_patients
#' my_patients <- septic_patients
#'
#' library(dplyr)
#' # set key antibiotics to a new variable
#' tbl$keyab <- key_antibiotics(tbl)
#' my_patients <- my_patients %>%
#' mutate(keyab = key_antibiotics(.)) %>%
#' mutate(
#' # now calculate first isolates
#' first_regular = first_isolate(., "date", "patient_id", "bactid"),
#' # and first WEIGHTED isolates
#' first_weighted = first_isolate(., "date", "patient_id", "bactid",
#' col_keyantibiotics = "keyab")
#' )
#'
#' # add regular first isolates
#' tbl$first_isolate <-
#' first_isolate(tbl)
#' # Check the difference, in this data set it results in 7% more isolates:
#' sum(my_patients$first_regular, na.rm = TRUE)
#' sum(my_patients$first_weighted, na.rm = TRUE)
#'
#' # add first WEIGHTED isolates using key antibiotics
#' tbl$first_isolate_weighed <-
#' first_isolate(tbl,
#' col_keyantibiotics = 'keyab')
#' }
#'
#' # output of the `key_antibiotics` function could be like this:
#' strainA <- "SSSRR.S.R..S"
@ -169,87 +176,80 @@ key_antibiotics_equal <- function(x,
points_threshold = 2,
info = FALSE) {
# x is active row, y is lag
type <- type[1]
if (length(x) != length(y)) {
stop('Length of `x` and `y` must be equal.')
}
# only show progress bar on points or when at least 5000 isolates
info_needed <- info == TRUE & (type == "points" | length(x) > 5000)
result <- logical(length(x))
if (type == "keyantibiotics") {
if (ignore_I == TRUE) {
# evaluation using regular expression will treat '.' as any character
# so I is actually ignored then
x <- gsub('I', '.', x, ignore.case = TRUE)
y <- gsub('I', '.', y, ignore.case = TRUE)
if (info_needed == TRUE) {
p <- dplyr::progress_estimated(length(x))
}
for (i in 1:length(x)) {
if (info_needed == TRUE) {
p$tick()$print()
}
for (i in 1:length(x)) {
if (is.na(x[i])) {
x[i] <- ''
}
if (is.na(y[i])) {
y[i] <- ''
}
if (x[i] == y[i]) {
result[i] <- TRUE
} else if (nchar(x[i]) != nchar(y[i])) {
result[i] <- FALSE
} else {
x_split <- strsplit(x[i], "")[[1]]
y_split <- strsplit(y[i], "")[[1]]
y_split[x_split == "."] <- "."
x_split[y_split == "."] <- "."
x_checkfor <- paste(x_split, collapse = "")
y_checkfor <- paste(y_split, collapse = "")
result[i] <- nchar(x[i]) == nchar(y[i]) &
(x_checkfor %like% y_checkfor |
y_checkfor %like% x_checkfor)
}
return(result)
} else {
if (type == 'keyantibiotics') {
if (type != 'points') {
stop('`', type, '` is not a valid value for type, must be "points" or "keyantibiotics". See ?first_isolate.')
}
if (ignore_I == TRUE) {
x_split[x_split == "I"] <- "."
y_split[y_split == "I"] <- "."
}
if (info == TRUE) {
p <- dplyr::progress_estimated(length(x))
}
y_split[x_split == "."] <- "."
x_split[y_split == "."] <- "."
for (i in 1:length(x)) {
if (info == TRUE) {
p$tick()$print()
}
if (is.na(x[i])) {
x[i] <- ''
}
if (is.na(y[i])) {
y[i] <- ''
}
if (nchar(x[i]) != nchar(y[i])) {
result[i] <- FALSE
} else if (x[i] == '' & y[i] == '') {
result[i] <- TRUE
} else {
x2 <- strsplit(x[i], "")[[1]]
y2 <- strsplit(y[i], "")[[1]]
result[i] <- all(x_split == y_split)
} else if (type == 'points') {
# count points for every single character:
# - no change is 0 points
# - I <-> S|R is 0.5 point
# - S|R <-> R|S is 1 point
# use the levels of as.rsi (S = 1, I = 2, R = 3)
suppressWarnings(x2 <- x2 %>% as.rsi() %>% as.double())
suppressWarnings(y2 <- y2 %>% as.rsi() %>% as.double())
suppressWarnings(x_split <- x_split %>% as.rsi() %>% as.double())
suppressWarnings(y_split <- y_split %>% as.rsi() %>% as.double())
points <- (x2 - y2) %>% abs() %>% sum(na.rm = TRUE)
result[i] <- ((points / 2) >= points_threshold)
points <- (x_split - y_split) %>% abs() %>% sum(na.rm = TRUE) / 2
result[i] <- points >= points_threshold
} else {
stop('`', type, '` is not a valid value for type, must be "points" or "keyantibiotics". See ?first_isolate.')
}
}
if (info == TRUE) {
cat('\n')
}
result
}
if (info_needed == TRUE) {
cat('\n')
}
result
}

108
README.md
View File

@ -26,9 +26,13 @@ This R package contains functions to make **microbiological, epidemiological dat
With `AMR` you can:
* Calculate the resistance (and even co-resistance) of microbial isolates with the `resistance` and `susceptibility` functions, that can also be used with the `dplyr` package (e.g. in conjunction with `summarise`)
* Predict antimicrobial resistance for the nextcoming years with the `rsi_predict` function
* Predict antimicrobial resistance for the nextcoming years with the `resistance_predict` function
* Apply [EUCAST rules to isolates](http://www.eucast.org/expert_rules_and_intrinsic_resistance/) with the `EUCAST_rules` function
* Identify first isolates of every patient [using guidelines from the CLSI](https://clsi.org/standards/products/microbiology/documents/m39/) (Clinical and Laboratory Standards Institute) with the `first_isolate` function
* You can also identify first *weighted* isolates of every patient, an adjusted version of the CLSI guideline. This takes into account key antibiotics of every strain and compares them. The following 12 antibiotics will be used as key antibiotics at default:
* Universal: amoxicillin, amoxicillin/clavlanic acid, cefuroxime, piperacillin/tazobactam, ciprofloxacin, trimethoprim/sulfamethoxazole
* Specific for Gram-positives: vancomycin, teicoplanin, tetracycline, erythromycin, oxacillin, rifampicin
* Specific for Gram-negatives: gentamicin, tobramycin, colistin, cefotaxime, ceftazidime, meropenem
* Get antimicrobial ATC properties from the WHO Collaborating Centre for Drug Statistics Methodology ([WHOCC](https://www.whocc.no/atc_ddd_methodology/who_collaborating_centre/)), to be able to:
* Translate antibiotic codes (like *AMOX*), official names (like *amoxicillin*) and even trade names (like *Amoxil* or *Trimox*) to an [ATC code](https://www.whocc.no/atc_ddd_index/?code=J01CA04&showdescription=no) (like *J01CA04*) and vice versa with the `abname` function
* Get the latest antibiotic properties like hierarchic groups and [defined daily dose](https://en.wikipedia.org/wiki/Defined_daily_dose) (DDD) with units and administration form from the WHOCC website with the `atc_property` function
@ -219,35 +223,33 @@ mydata %>% freq(myvariable)
Factors sort on item by default:
```r
septic_patients %>% freq(hospital_id)
# Frequency table of `hospital_id`
# Frequency table of `hospital_id`
# Class: factor
# Length: 2000 (of which NA: 0 = 0.0%)
# Unique: 5
# Unique: 4
#
# Item Count Percent Cum. Count Cum. Percent (Factor Level)
# --- ----- ------ -------- ----------- ------------- ---------------
# 1 A 233 11.7% 233 11.7% 1
# 2 B 583 29.1% 816 40.8% 2
# 3 C 221 11.1% 1037 51.8% 3
# 4 D 650 32.5% 1687 84.4% 4
# 5 E 313 15.7% 2000 100.0% 5
# 1 A 319 16.0% 319 16.0% 1
# 2 B 661 33.1% 980 49.0% 2
# 3 C 256 12.8% 1236 61.8% 3
# 4 D 764 38.2% 2000 100.0% 4
```
This can be changed with the `sort.count` parameter:
```r
septic_patients %>% freq(hospital_id, sort.count = TRUE)
# Frequency table of `hospital_id`
# Frequency table of `hospital_id`
# Class: factor
# Length: 2000 (of which NA: 0 = 0.0%)
# Unique: 5
# Unique: 4
#
# Item Count Percent Cum. Count Cum. Percent (Factor Level)
# --- ----- ------ -------- ----------- ------------- ---------------
# 1 D 650 32.5% 650 32.5% 4
# 2 B 583 29.1% 1233 61.7% 2
# 3 E 313 15.7% 1546 77.3% 5
# 4 A 233 11.7% 1779 88.9% 1
# 5 C 221 11.1% 2000 100.0% 3
# 1 D 764 38.2% 764 38.2% 4
# 2 B 661 33.1% 1425 71.2% 2
# 3 A 319 16.0% 1744 87.2% 1
# 4 C 256 12.8% 2000 100.0% 3
```
All other types, like numbers, characters and dates, sort on count by default:
@ -256,56 +258,56 @@ septic_patients %>% freq(date)
# Frequency table of `date`
# Class: Date
# Length: 2000 (of which NA: 0 = 0.0%)
# Unique: 1662
# Unique: 1151
#
# Oldest: 2 January 2001
# Newest: 18 October 2017 (+6133)
# Median: 6 December 2009 (~53%)
# Oldest: 2 January 2002
# Newest: 28 December 2017 (+5839)
# Median: 7 Augustus 2009 (~48%)
#
# Item Count Percent Cum. Count Cum. Percent
# --- ----------- ------ -------- ----------- -------------
# 1 2008-12-24 5 0.2% 5 0.2%
# 2 2010-12-10 4 0.2% 9 0.4%
# 3 2011-03-03 4 0.2% 13 0.6%
# 4 2013-06-24 4 0.2% 17 0.8%
# 5 2017-09-01 4 0.2% 21 1.1%
# 6 2002-09-02 3 0.2% 24 1.2%
# 7 2003-10-14 3 0.2% 27 1.4%
# 8 2004-06-25 3 0.2% 30 1.5%
# 9 2004-06-27 3 0.2% 33 1.7%
# 10 2004-10-29 3 0.2% 36 1.8%
# 11 2005-09-27 3 0.2% 39 2.0%
# 12 2006-08-01 3 0.2% 42 2.1%
# 13 2006-10-10 3 0.2% 45 2.2%
# 14 2007-11-16 3 0.2% 48 2.4%
# 15 2008-03-09 3 0.2% 51 2.5%
# [ reached getOption("max.print.freq") -- omitted 1647 entries, n = 1949 (97.5%) ]
# 1 2016-05-21 10 0.5% 10 0.5%
# 2 2004-11-15 8 0.4% 18 0.9%
# 3 2013-07-29 8 0.4% 26 1.3%
# 4 2017-06-12 8 0.4% 34 1.7%
# 5 2015-11-19 7 0.4% 41 2.1%
# 6 2005-12-22 6 0.3% 47 2.4%
# 7 2015-10-12 6 0.3% 53 2.6%
# 8 2002-05-16 5 0.2% 58 2.9%
# 9 2004-02-02 5 0.2% 63 3.1%
# 10 2004-02-18 5 0.2% 68 3.4%
# 11 2005-08-16 5 0.2% 73 3.6%
# 12 2005-09-01 5 0.2% 78 3.9%
# 13 2006-06-29 5 0.2% 83 4.2%
# 14 2007-08-10 5 0.2% 88 4.4%
# 15 2008-08-29 5 0.2% 93 4.7%
# [ reached getOption("max.print.freq") -- omitted 1136 entries, n = 1907 (95.3%) ]
```
For numeric values, some extra descriptive statistics will be calculated:
```r
freq(runif(n = 10, min = 1, max = 5))
# Frequency table
# Frequency table
# Class: numeric
# Length: 10 (of which NA: 0 = 0.0%)
# Unique: 10
#
# Mean: 2.9
# Std. dev.: 1.3 (CV: 0.43, MAD: 1.5)
# Five-Num: 1.5 | 1.7 | 2.6 | 4.0 | 4.7 (IQR: 2.3, CQV: 0.4)
#
# Mean: 3.4
# Std. dev.: 1.3 (CV: 0.38, MAD: 1.3)
# Five-Num: 1.6 | 2.0 | 3.9 | 4.7 | 4.8 (IQR: 2.7, CQV: 0.4)
# Outliers: 0
#
# Item Count Percent Cum. Count Cum. Percent
# --------- ------ -------- ----------- -------------
# 1.132033 1 10.0% 1 10.0%
# 2.226903 1 10.0% 2 20.0%
# 2.280779 1 10.0% 3 30.0%
# 2.640898 1 10.0% 4 40.0%
# 2.913462 1 10.0% 5 50.0%
# 3.364201 1 10.0% 6 60.0%
# 3.771975 1 10.0% 7 70.0%
# 3.802861 1 10.0% 8 80.0%
# 3.803547 1 10.0% 9 90.0%
# 3.985691 1 10.0% 10 100.0%
# Item Count Percent Cum. Count Cum. Percent
# --- --------- ------ -------- ----------- -------------
# 1 1.568997 1 10.0% 1 10.0%
# 2 1.993575 1 10.0% 2 20.0%
# 3 2.022348 1 10.0% 3 30.0%
# 4 2.236038 1 10.0% 4 40.0%
# 5 3.579828 1 10.0% 5 50.0%
# 6 4.178081 1 10.0% 6 60.0%
# 7 4.394818 1 10.0% 7 70.0%
# 8 4.689871 1 10.0% 8 80.0%
# 9 4.698626 1 10.0% 9 90.0%
# 10 4.751488 1 10.0% 10 100.0%
#
# Warning message:
# All observations are unique.
@ -320,7 +322,7 @@ Datasets to work with antibiotics and bacteria properties.
```r
# Dataset with 2000 random blood culture isolates from anonymised
# septic patients between 2001 and 2017 in 5 Dutch hospitals
septic_patients # A tibble: 2,000 x 47
septic_patients # A tibble: 2,000 x 49
# Dataset with ATC antibiotics codes, official names, trade names
# and DDD's (oral and parenteral)

Binary file not shown.

View File

@ -15,19 +15,22 @@ is.mic(x)
\item{na.rm}{a logical indicating whether missing values should be removed}
}
\value{
Ordered factor with new class \code{mic} and new attributes \code{package} and \code{package.version}
Ordered factor with new class \code{mic} and new attribute \code{package}
}
\description{
This transforms a vector to a new class\code{mic}, which is an ordered factor with valid MIC values as levels. Invalid MIC values will be translated as \code{NA} with a warning.
This transforms a vector to a new class \code{mic}, which is an ordered factor with valid MIC values as levels. Invalid MIC values will be translated as \code{NA} with a warning.
}
\examples{
mic_data <- as.mic(c(">=32", "1.0", "1", "1.00", 8, "<=0.128", "8", "16", "16"))
is.mic(mic_data)
# this can also coerce combined MIC/RSI values:
as.mic("<=0.002; R") # will return <=0.002
as.mic("<=0.002; S") # will return <=0.002
plot(mic_data)
barplot(mic_data)
}
\seealso{
\code{\link{as.rsi}}
}
\keyword{mic}

View File

@ -13,7 +13,7 @@ is.rsi(x)
\item{x}{vector}
}
\value{
Ordered factor with new class \code{rsi} and new attributes \code{package} and \code{package.version}
Ordered factor with new class \code{rsi} and new attribute \code{package}
}
\description{
This transforms a vector to a new class \code{rsi}, which is an ordered factor with levels \code{S < I < R}. Invalid antimicrobial interpretations will be translated as \code{NA} with a warning.
@ -24,9 +24,12 @@ rsi_data <- as.rsi(c(rep("S", 474), rep("I", 36), rep("R", 370), "A", "B", "C"))
is.rsi(rsi_data)
# this can also coerce combined MIC/RSI values:
as.rsi("<= 0.002; R") # will return R
as.rsi("<= 0.002; S") # will return S
plot(rsi_data) # for percentages
barplot(rsi_data) # for frequencies
}
\seealso{
\code{\link{as.mic}}
}
\keyword{rsi}

View File

@ -65,7 +65,7 @@ Determine first (weighted) isolates of all microorganisms of every patient per e
}
\section{Key antibiotics}{
There are two ways to determine whether isolates can be included as first \emph{weighted} isolates: \cr
There are two ways to determine whether isolates can be included as first \emph{weighted} isolates which will give generally the same results: \cr
\strong{1. Using} \code{type = "keyantibiotics"} \strong{and parameter} \code{ignore_I} \cr
Any difference from S to R (or vice versa) will (re)select an isolate as a first weighted isolate. With \code{ignore_I = FALSE}, also differences from I to S|R (or vice versa) will lead to this. This is a reliable method and 30-35 times faster than method 2. \cr
@ -85,6 +85,24 @@ my_patients$first_isolate <- my_patients \%>\%
col_patient_id = "patient_id",
col_bactid = "bactid")
# Now let's see if first isolates matter:
A <- my_patients \%>\%
group_by(hospital_id) \%>\%
summarise(count = n_rsi(gent), # gentamicin
resistance = resistance(gent))
B <- my_patients \%>\%
filter(first_isolate == TRUE) \%>\%
group_by(hospital_id) \%>\%
summarise(count = n_rsi(gent), # gentamicin
resistance = resistance(gent))
# Have a look at A and B. B is more reliable because every isolate is
# counted once. Gentamicin resitance in hospital D seems to be 5\%
# higher than originally thought.
## OTHER EXAMPLES:
\dontrun{
# set key antibiotics to a new variable

View File

@ -56,7 +56,7 @@ The function \code{key_antibiotics} returns a character vector with 12 antibioti
}
\section{Key antibiotics}{
There are two ways to determine whether isolates can be included as first \emph{weighted} isolates: \cr
There are two ways to determine whether isolates can be included as first \emph{weighted} isolates which will give generally the same results: \cr
\strong{1. Using} \code{type = "keyantibiotics"} \strong{and parameter} \code{ignore_I} \cr
Any difference from S to R (or vice versa) will (re)select an isolate as a first weighted isolate. With \code{ignore_I = FALSE}, also differences from I to S|R (or vice versa) will lead to this. This is a reliable method and 30-35 times faster than method 2. \cr
@ -66,19 +66,26 @@ The function \code{key_antibiotics} returns a character vector with 12 antibioti
}
\examples{
\dontrun{
# septic_patients is a dataset available in the AMR package
?septic_patients
my_patients <- septic_patients
library(dplyr)
# set key antibiotics to a new variable
tbl$keyab <- key_antibiotics(tbl)
my_patients <- my_patients \%>\%
mutate(keyab = key_antibiotics(.)) \%>\%
mutate(
# now calculate first isolates
first_regular = first_isolate(., "date", "patient_id", "bactid"),
# and first WEIGHTED isolates
first_weighted = first_isolate(., "date", "patient_id", "bactid",
col_keyantibiotics = "keyab")
)
# add regular first isolates
tbl$first_isolate <-
first_isolate(tbl)
# Check the difference, in this data set it results in 7\% more isolates:
sum(my_patients$first_regular, na.rm = TRUE)
sum(my_patients$first_weighted, na.rm = TRUE)
# add first WEIGHTED isolates using key antibiotics
tbl$first_isolate_weighed <-
first_isolate(tbl,
col_keyantibiotics = 'keyab')
}
# output of the `key_antibiotics` function could be like this:
strainA <- "SSSRR.S.R..S"

View File

@ -19,9 +19,6 @@
\item{\code{type_nl}}{Type of microorganism in Dutch, like \code{"Bacterie"} and \code{"Schimmel/gist"}}
\item{\code{gramstain_nl}}{Gram of microorganism in Dutch, like \code{"Negatieve staven"}}
}}
\source{
MOLIS (LIS of Certe) - \url{https://www.certe.nl}
}
\usage{
microorganisms
}

View File

@ -9,9 +9,6 @@
\item{\code{mocode}}{Code of microorganism according to UMCG MMB}
\item{\code{bactid}}{Code of microorganism in \code{\link{microorganisms}}}
}}
\source{
MOLIS (LIS of Certe) - \url{https://www.certe.nl} \cr \cr GLIMS (LIS of UMCG) - \url{https://www.umcg.nl}
}
\usage{
microorganisms.umcg
}

View File

@ -4,7 +4,7 @@
\name{septic_patients}
\alias{septic_patients}
\title{Dataset with 2000 blood culture isolates of septic patients}
\format{A data.frame with 2000 observations and 47 variables:
\format{A data.frame with 2000 observations and 49 variables:
\describe{
\item{\code{date}}{date of receipt at the laboratory}
\item{\code{hospital_id}}{ID of the hospital}
@ -15,11 +15,8 @@
\item{\code{sex}}{sex of the patient}
\item{\code{patient_id}}{ID of the patient, first 10 characters of an SHA hash containing irretrievable information}
\item{\code{bactid}}{ID of microorganism, see \code{\link{microorganisms}}}
\item{\code{peni:mupi}}{38 different antibiotics with class \code{rsi} (see \code{\link{as.rsi}}); these column names occur in \code{\link{antibiotics}} and can be translated with \code{\link{abname}}}
\item{\code{peni:rifa}}{40 different antibiotics with class \code{rsi} (see \code{\link{as.rsi}}); these column names occur in \code{\link{antibiotics}} data set and can be translated with \code{\link{abname}}}
}}
\source{
MOLIS (LIS of Certe) - \url{https://www.certe.nl}
}
\usage{
septic_patients
}
@ -45,15 +42,15 @@ my_data <- my_data \%>\%
# ANALYSIS #
# -------- #
# 1. Get the amoxicillin resistance percentages
# of E. coli, divided by hospital:
# 1. Get the amoxicillin resistance percentages (p)
# and numbers (n) of E. coli, divided by hospital:
my_data \%>\%
filter(bactid == "ESCCOL",
filter(bactid == guess_bactid("E. coli"),
first_isolates == TRUE) \%>\%
group_by(hospital_id) \%>\%
summarise(n = n(),
amoxicillin_resistance = rsi(amox))
summarise(n = n_rsi(amox),
p = resistance(amox))
# 2. Get the amoxicillin/clavulanic acid resistance
@ -63,7 +60,7 @@ my_data \%>\%
filter(bactid == guess_bactid("E. coli"),
first_isolates == TRUE) \%>\%
group_by(year = format(date, "\%Y")) \%>\%
summarise(n = n(),
amoxclav_resistance = rsi(amcl, minimum = 20))
summarise(n = n_rsi(amcl),
p = resistance(amcl, minimum = 20))
}
\keyword{datasets}

View File

@ -37,8 +37,8 @@ test_that("as.bactid works", {
select(genus) %>%
as.bactid() %>%
as.character(),
c("STC", "STC", "NEI", "STA", "STA",
"NEI", "ENT", "ENT", "ESC", "KLE"))
c("ESC", "ESC", "STA", "STA", "STA",
"STA", "STA", "STA", "STA", "STA"))
# select with two columns
expect_identical(

View File

@ -8,9 +8,9 @@ test_that("first isolates work", {
col_date = "date",
col_patient_id = "patient_id",
col_bactid = "bactid",
info = FALSE),
info = TRUE),
na.rm = TRUE),
1959)
1326)
# septic_patients contains 1962 out of 2000 first *weighted* isolates
expect_equal(
@ -24,8 +24,8 @@ test_that("first isolates work", {
type = "keyantibiotics",
info = TRUE),
na.rm = TRUE)),
1962)
# and 1997 when using points
1421)
# and 1961 when using points
expect_equal(
suppressWarnings(
sum(
@ -37,7 +37,7 @@ test_that("first isolates work", {
type = "points",
info = TRUE),
na.rm = TRUE)),
1997)
1425)
# septic_patients contains 1732 out of 2000 first non-ICU isolates
expect_equal(
@ -50,7 +50,7 @@ test_that("first isolates work", {
info = TRUE,
icu_exclude = TRUE),
na.rm = TRUE),
1732)
1171)
# set 1500 random observations to be of specimen type 'Urine'
random_rows <- sample(x = 1:2000, size = 1500, replace = FALSE)
@ -59,7 +59,7 @@ test_that("first isolates work", {
first_isolate(tbl = mutate(septic_patients,
specimen = if_else(row_number() %in% random_rows,
"Urine",
"Unknown")),
"Other")),
col_date = "date",
col_patient_id = "patient_id",
col_bactid = "bactid",
@ -74,7 +74,7 @@ test_that("first isolates work", {
first_isolate(tbl = mutate(septic_patients,
specimen = if_else(row_number() %in% random_rows,
"Urine",
"Unknown")),
"Other")),
col_date = "date",
col_patient_id = "patient_id",
col_bactid = "bactid",

View File

@ -4,8 +4,8 @@ test_that("frequency table works", {
expect_equal(nrow(freq(c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5))), 5)
expect_equal(nrow(frequency_tbl(c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5))), 5)
# date column of septic_patients should contain 1662 unique dates
expect_equal(nrow(freq(septic_patients$date)), 1662)
# date column of septic_patients should contain 1151 unique dates
expect_equal(nrow(freq(septic_patients$date)), 1151)
expect_equal(nrow(freq(septic_patients$date)),
length(unique(septic_patients$date)))

View File

@ -2,12 +2,12 @@ context("kurtosis.R")
test_that("kurtosis works", {
expect_equal(kurtosis(septic_patients$age),
6.423118,
3.57781,
tolerance = 0.00001)
expect_equal(unname(kurtosis(data.frame(septic_patients$age))),
6.423118,
3.57781,
tolerance = 0.00001)
expect_equal(kurtosis(matrix(septic_patients$age)),
6.423118,
3.57781,
tolerance = 0.00001)
})

View File

@ -13,7 +13,8 @@ test_that("MDRO works", {
expect_equal(outcome %>% class(), c('ordered', 'factor'))
# septic_patients should have these finding using Dutch guidelines
expect_equal(outcome %>% freq() %>% pull(count), c(3, 21))
expect_equal(outcome %>% freq() %>% pull(count),
c(2, 14)) # 2 unconfirmed, 14 positive
expect_equal(BRMO(septic_patients, info = FALSE), MDRO(septic_patients, "nl", info = FALSE))

View File

@ -1,19 +1,19 @@
context("resistance.R")
test_that("resistance works", {
# amox resistance in `septic_patients` should be around 57.56%
expect_equal(resistance(septic_patients$amox, include_I = TRUE), 0.5756, tolerance = 0.0001)
expect_equal(susceptibility(septic_patients$amox, include_I = FALSE), 1 - 0.5756, tolerance = 0.0001)
# amox resistance in `septic_patients` should be around 66.33%
expect_equal(resistance(septic_patients$amox, include_I = TRUE), 0.6633, tolerance = 0.0001)
expect_equal(susceptibility(septic_patients$amox, include_I = FALSE), 1 - 0.6633, tolerance = 0.0001)
# pita+genta susceptibility around 98.09%
expect_equal(susceptibility(septic_patients$pita,
septic_patients$gent),
0.9809,
0.9535,
tolerance = 0.0001)
expect_equal(suppressWarnings(rsi(septic_patients$pita,
septic_patients$gent,
interpretation = "S")),
0.9809,
0.9535,
tolerance = 0.0001)
# count of cases
@ -26,7 +26,7 @@ test_that("resistance works", {
combination_p = susceptibility(cipr, gent, as_percent = TRUE),
combination_n = n_rsi(cipr, gent)) %>%
pull(combination_n),
c(138, 474, 170, 464, 183))
c(202, 482, 201, 499))
expect_warning(resistance(as.character(septic_patients$amcl)))
expect_warning(susceptibility(as.character(septic_patients$amcl)))
@ -36,26 +36,26 @@ test_that("resistance works", {
})
test_that("old rsi works", {
# amox resistance in `septic_patients` should be around 53.86%
expect_equal(rsi(septic_patients$amox), 0.5756, tolerance = 0.0001)
expect_equal(rsi(septic_patients$amox), 0.5756, tolerance = 0.0001)
# amox resistance in `septic_patients` should be around 66.33%
expect_equal(rsi(septic_patients$amox), 0.6633, tolerance = 0.0001)
expect_equal(rsi(septic_patients$amox, interpretation = "S"), 1 - 0.6633, tolerance = 0.0001)
expect_equal(rsi_df(septic_patients,
ab = "amox",
info = TRUE),
0.5756,
0.6633,
tolerance = 0.0001)
# pita+genta susceptibility around 98.09%
expect_equal(rsi(septic_patients$pita,
septic_patients$gent,
interpretation = "S",
info = TRUE),
0.9809,
0.9535,
tolerance = 0.0001)
expect_equal(rsi_df(septic_patients,
ab = c("pita", "gent"),
interpretation = "S",
info = TRUE),
0.9809,
0.9535,
tolerance = 0.0001)
# more than 2 not allowed
expect_error(rsi_df(septic_patients,
@ -76,7 +76,7 @@ test_that("old rsi works", {
as_percent = TRUE, warning = FALSE),
combination_n = n_rsi(cipr, gent)) %>%
pull(combination_n),
c(138, 474, 170, 464, 183))
c(202, 482, 201, 499))
})
test_that("prediction of rsi works", {
@ -86,8 +86,8 @@ test_that("prediction of rsi works", {
col_date = "date",
info = TRUE) %>%
pull("probR")
# amox resistance will decrease using dataset `septic_patients`
expect_true(amox_R[2] > amox_R[20])
# amox resistance will increase according to data set `septic_patients`
expect_true(amox_R[3] < amox_R[20])
expect_output(rsi_predict(tbl = filter(septic_patients, bactid == "ESCCOL"),
model = "binomial",

View File

@ -2,12 +2,12 @@ context("skewness.R")
test_that("skewness works", {
expect_equal(skewness(septic_patients$age),
-1.637164,
-0.90624,
tolerance = 0.00001)
expect_equal(unname(skewness(data.frame(septic_patients$age))),
-1.637164,
-0.90624,
tolerance = 0.00001)
expect_equal(skewness(matrix(septic_patients$age)),
-1.637164,
-0.90624,
tolerance = 0.00001)
})