1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-09 20:21:55 +02:00

update to septic_patients, speed improvements

This commit is contained in:
2018-07-25 14:17:04 +02:00
parent 03a3cb397b
commit d9e204031d
26 changed files with 273 additions and 233 deletions

View File

@ -221,7 +221,6 @@ as.bactid <- function(x) {
}
class(x) <- "bactid"
attr(x, 'package') <- 'AMR'
attr(x, 'package.version') <- packageDescription('AMR')$Version
x
}

View File

@ -21,18 +21,18 @@
#' This transforms a vector to a new class \code{rsi}, which is an ordered factor with levels \code{S < I < R}. Invalid antimicrobial interpretations will be translated as \code{NA} with a warning.
#' @rdname as.rsi
#' @param x vector
#' @return Ordered factor with new class \code{rsi} and new attributes \code{package} and \code{package.version}
#' @return Ordered factor with new class \code{rsi} and new attribute \code{package}
#' @keywords rsi
#' @export
#' @importFrom dplyr %>%
#' @importFrom utils packageDescription
#' @seealso \code{\link{as.mic}}
#' @examples
#' rsi_data <- as.rsi(c(rep("S", 474), rep("I", 36), rep("R", 370)))
#' rsi_data <- as.rsi(c(rep("S", 474), rep("I", 36), rep("R", 370), "A", "B", "C"))
#' is.rsi(rsi_data)
#'
#' # this can also coerce combined MIC/RSI values:
#' as.rsi("<= 0.002; R") # will return R
#' as.rsi("<= 0.002; S") # will return S
#'
#' plot(rsi_data) # for percentages
#' barplot(rsi_data) # for frequencies
@ -76,7 +76,6 @@ as.rsi <- function(x) {
x <- x %>% factor(levels = c("S", "I", "R"), ordered = TRUE)
class(x) <- c('rsi', 'ordered', 'factor')
attr(x, 'package') <- 'AMR'
attr(x, 'package.version') <- packageDescription('AMR')$Version
x
}
}
@ -196,21 +195,21 @@ barplot.rsi <- function(height, ...) {
#' Class 'mic'
#'
#' This transforms a vector to a new class\code{mic}, which is an ordered factor with valid MIC values as levels. Invalid MIC values will be translated as \code{NA} with a warning.
#' This transforms a vector to a new class \code{mic}, which is an ordered factor with valid MIC values as levels. Invalid MIC values will be translated as \code{NA} with a warning.
#' @rdname as.mic
#' @param x vector
#' @param na.rm a logical indicating whether missing values should be removed
#' @return Ordered factor with new class \code{mic} and new attributes \code{package} and \code{package.version}
#' @return Ordered factor with new class \code{mic} and new attribute \code{package}
#' @keywords mic
#' @export
#' @importFrom dplyr %>%
#' @importFrom utils packageDescription
#' @seealso \code{\link{as.rsi}}
#' @examples
#' mic_data <- as.mic(c(">=32", "1.0", "1", "1.00", 8, "<=0.128", "8", "16", "16"))
#' is.mic(mic_data)
#'
#' # this can also coerce combined MIC/RSI values:
#' as.mic("<=0.002; R") # will return <=0.002
#' as.mic("<=0.002; S") # will return <=0.002
#'
#' plot(mic_data)
#' barplot(mic_data)
@ -319,7 +318,6 @@ as.mic <- function(x, na.rm = FALSE) {
ordered = TRUE)
class(x) <- c('mic', 'ordered', 'factor')
attr(x, 'package') <- 'AMR'
attr(x, 'package.version') <- packageDescription('AMR')$Version
x
}
}

View File

@ -252,7 +252,7 @@
#' \item{\code{type_nl}}{Type of microorganism in Dutch, like \code{"Bacterie"} and \code{"Schimmel/gist"}}
#' \item{\code{gramstain_nl}}{Gram of microorganism in Dutch, like \code{"Negatieve staven"}}
#' }
#' @source MOLIS (LIS of Certe) - \url{https://www.certe.nl}
# source MOLIS (LIS of Certe) - \url{https://www.certe.nl}
#' @seealso \code{\link{guess_bactid}} \code{\link{antibiotics}} \code{\link{microorganisms.umcg}}
"microorganisms"
@ -264,14 +264,14 @@
#' \item{\code{mocode}}{Code of microorganism according to UMCG MMB}
#' \item{\code{bactid}}{Code of microorganism in \code{\link{microorganisms}}}
#' }
#' @source MOLIS (LIS of Certe) - \url{https://www.certe.nl} \cr \cr GLIMS (LIS of UMCG) - \url{https://www.umcg.nl}
# source MOLIS (LIS of Certe) - \url{https://www.certe.nl} \cr \cr GLIMS (LIS of UMCG) - \url{https://www.umcg.nl}
#' @seealso \code{\link{guess_bactid}} \code{\link{microorganisms}}
"microorganisms.umcg"
#' Dataset with 2000 blood culture isolates of septic patients
#'
#' An anonymised dataset containing 2000 microbial blood culture isolates with their antibiogram of septic patients found in 5 different hospitals in the Netherlands, between 2001 and 2017. This data.frame can be used to practice AMR analysis. For examples, press F1.
#' @format A data.frame with 2000 observations and 47 variables:
#' @format A data.frame with 2000 observations and 49 variables:
#' \describe{
#' \item{\code{date}}{date of receipt at the laboratory}
#' \item{\code{hospital_id}}{ID of the hospital}
@ -282,9 +282,9 @@
#' \item{\code{sex}}{sex of the patient}
#' \item{\code{patient_id}}{ID of the patient, first 10 characters of an SHA hash containing irretrievable information}
#' \item{\code{bactid}}{ID of microorganism, see \code{\link{microorganisms}}}
#' \item{\code{peni:mupi}}{38 different antibiotics with class \code{rsi} (see \code{\link{as.rsi}}); these column names occur in \code{\link{antibiotics}} and can be translated with \code{\link{abname}}}
#' \item{\code{peni:rifa}}{40 different antibiotics with class \code{rsi} (see \code{\link{as.rsi}}); these column names occur in \code{\link{antibiotics}} data set and can be translated with \code{\link{abname}}}
#' }
#' @source MOLIS (LIS of Certe) - \url{https://www.certe.nl}
# source MOLIS (LIS of Certe) - \url{https://www.certe.nl}
#' @examples
#' # ----------- #
#' # PREPARATION #
@ -304,15 +304,15 @@
#' # ANALYSIS #
#' # -------- #
#'
#' # 1. Get the amoxicillin resistance percentages
#' # of E. coli, divided by hospital:
#' # 1. Get the amoxicillin resistance percentages (p)
#' # and numbers (n) of E. coli, divided by hospital:
#'
#' my_data %>%
#' filter(bactid == "ESCCOL",
#' filter(bactid == guess_bactid("E. coli"),
#' first_isolates == TRUE) %>%
#' group_by(hospital_id) %>%
#' summarise(n = n(),
#' amoxicillin_resistance = rsi(amox))
#' summarise(n = n_rsi(amox),
#' p = resistance(amox))
#'
#'
#' # 2. Get the amoxicillin/clavulanic acid resistance
@ -322,6 +322,6 @@
#' filter(bactid == guess_bactid("E. coli"),
#' first_isolates == TRUE) %>%
#' group_by(year = format(date, "%Y")) %>%
#' summarise(n = n(),
#' amoxclav_resistance = rsi(amcl, minimum = 20))
#' summarise(n = n_rsi(amcl),
#' p = resistance(amcl, minimum = 20))
"septic_patients"

View File

@ -41,7 +41,7 @@
#' @details \strong{WHY THIS IS SO IMPORTANT} \cr
#' To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https://www.ncbi.nlm.nih.gov/pubmed/17304462}{[1]}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}.
#' @section Key antibiotics:
#' There are two ways to determine whether isolates can be included as first \emph{weighted} isolates: \cr
#' There are two ways to determine whether isolates can be included as first \emph{weighted} isolates which will give generally the same results: \cr
#'
#' \strong{1. Using} \code{type = "keyantibiotics"} \strong{and parameter} \code{ignore_I} \cr
#' Any difference from S to R (or vice versa) will (re)select an isolate as a first weighted isolate. With \code{ignore_I = FALSE}, also differences from I to S|R (or vice versa) will lead to this. This is a reliable method and 30-35 times faster than method 2. \cr
@ -65,6 +65,24 @@
#' col_patient_id = "patient_id",
#' col_bactid = "bactid")
#'
#' # Now let's see if first isolates matter:
#' A <- my_patients %>%
#' group_by(hospital_id) %>%
#' summarise(count = n_rsi(gent), # gentamicin
#' resistance = resistance(gent))
#'
#' B <- my_patients %>%
#' filter(first_isolate == TRUE) %>%
#' group_by(hospital_id) %>%
#' summarise(count = n_rsi(gent), # gentamicin
#' resistance = resistance(gent))
#'
#' # Have a look at A and B. B is more reliable because every isolate is
#' # counted once. Gentamicin resitance in hospital D seems to be 5%
#' # higher than originally thought.
#'
#' ## OTHER EXAMPLES:
#'
#' \dontrun{
#'
#' # set key antibiotics to a new variable
@ -153,7 +171,7 @@ first_isolate <- function(tbl,
if (!is.na(col_bactid)) {
if (!tbl %>% pull(col_bactid) %>% is.bactid()) {
tbl[, col_bactid] <- tbl %>% pull(col_bactid) %>% as.bactid()
warning("Improve integrity of the `", col_bactid, "` column by transforming it with 'as.bactid'.")
}
tbl <- tbl %>% left_join_microorganisms(by = col_bactid)
col_genus <- "genus"
@ -179,7 +197,6 @@ first_isolate <- function(tbl,
filter_specimen <- ''
}
weighted.notice <- ''
# filter on specimen group and keyantibiotics when they are filled in
if (!is.na(filter_specimen) & filter_specimen != '') {
check_columns_existance(col_specimen, tbl)
@ -317,7 +334,9 @@ first_isolate <- function(tbl,
(date_lab - lag(date_lab)) + lag(days_diff),
0))
weighted.notice <- ''
if (col_keyantibiotics != '') {
weighted.notice <- 'weighted '
if (info == TRUE) {
if (type == 'keyantibiotics') {
cat('[Criteria] Inclusion based on key antibiotics, ')

View File

@ -415,7 +415,6 @@ frequency_tbl <- function(x,
class(df) <- c('frequency_tbl', class(df))
attr(df, 'package') <- 'AMR'
attr(df, 'package.version') <- packageDescription('AMR')$Version
if (markdown == TRUE) {
tbl_format <- 'markdown'
@ -567,7 +566,6 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
#' @export
as.data.frame.frequency_tbl <- function(x, ...) {
attr(x, 'package') <- NULL
attr(x, 'package.version') <- NULL
attr(x, 'opt') <- NULL
as.data.frame.data.frame(x, ...)
}
@ -578,7 +576,6 @@ as.data.frame.frequency_tbl <- function(x, ...) {
#' @importFrom dplyr as_tibble
as_tibble.frequency_tbl <- function(x, validate = TRUE, ..., rownames = NA) {
attr(x, 'package') <- NULL
attr(x, 'package.version') <- NULL
attr(x, 'opt') <- NULL
as_tibble(x = as.data.frame(x), validate = validate, ..., rownames = rownames)
}

View File

@ -26,8 +26,8 @@
#' df2 <- left_join_microorganisms(df, "bacteria_id")
#' colnames(df2)
inner_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...) {
if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) {
x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE)
if (!any(class(x) %in% c("data.frame", "matrix"))) {
x <- data.frame(bactid = as.character(x), stringsAsFactors = FALSE)
}
# no name set to `by` parameter
if (is.null(names(by))) {
@ -48,8 +48,8 @@ inner_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...
#' @rdname join
#' @export
left_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...) {
if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) {
x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE)
if (!any(class(x) %in% c("data.frame", "matrix"))) {
x <- data.frame(bactid = as.character(x), stringsAsFactors = FALSE)
}
# no name set to `by` parameter
if (is.null(names(by))) {
@ -70,8 +70,8 @@ left_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...)
#' @rdname join
#' @export
right_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...) {
if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) {
x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE)
if (!any(class(x) %in% c("data.frame", "matrix"))) {
x <- data.frame(bactid = as.character(x), stringsAsFactors = FALSE)
}
# no name set to `by` parameter
if (is.null(names(by))) {
@ -92,8 +92,8 @@ right_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...
#' @rdname join
#' @export
full_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...) {
if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) {
x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE)
if (!any(class(x) %in% c("data.frame", "matrix"))) {
x <- data.frame(bactid = as.character(x), stringsAsFactors = FALSE)
}
# no name set to `by` parameter
if (is.null(names(by))) {
@ -114,8 +114,8 @@ full_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...)
#' @rdname join
#' @export
semi_join_microorganisms <- function(x, by = 'bactid', ...) {
if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) {
x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE)
if (!any(class(x) %in% c("data.frame", "matrix"))) {
x <- data.frame(bactid = as.character(x), stringsAsFactors = FALSE)
}
# no name set to `by` parameter
if (is.null(names(by))) {
@ -132,8 +132,8 @@ semi_join_microorganisms <- function(x, by = 'bactid', ...) {
#' @rdname join
#' @export
anti_join_microorganisms <- function(x, by = 'bactid', ...) {
if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) {
x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE)
if (!any(class(x) %in% c("data.frame", "matrix"))) {
x <- data.frame(bactid = as.character(x), stringsAsFactors = FALSE)
}
# no name set to `by` parameter
if (is.null(names(by))) {

View File

@ -42,19 +42,26 @@
#' @importFrom dplyr %>% mutate if_else
#' @seealso \code{\link{first_isolate}}
#' @examples
#' \dontrun{
#' # septic_patients is a dataset available in the AMR package
#' ?septic_patients
#' my_patients <- septic_patients
#'
#' library(dplyr)
#' # set key antibiotics to a new variable
#' tbl$keyab <- key_antibiotics(tbl)
#' my_patients <- my_patients %>%
#' mutate(keyab = key_antibiotics(.)) %>%
#' mutate(
#' # now calculate first isolates
#' first_regular = first_isolate(., "date", "patient_id", "bactid"),
#' # and first WEIGHTED isolates
#' first_weighted = first_isolate(., "date", "patient_id", "bactid",
#' col_keyantibiotics = "keyab")
#' )
#'
#' # add regular first isolates
#' tbl$first_isolate <-
#' first_isolate(tbl)
#' # Check the difference, in this data set it results in 7% more isolates:
#' sum(my_patients$first_regular, na.rm = TRUE)
#' sum(my_patients$first_weighted, na.rm = TRUE)
#'
#' # add first WEIGHTED isolates using key antibiotics
#' tbl$first_isolate_weighed <-
#' first_isolate(tbl,
#' col_keyantibiotics = 'keyab')
#' }
#'
#' # output of the `key_antibiotics` function could be like this:
#' strainA <- "SSSRR.S.R..S"
@ -169,87 +176,80 @@ key_antibiotics_equal <- function(x,
points_threshold = 2,
info = FALSE) {
# x is active row, y is lag
type <- type[1]
if (length(x) != length(y)) {
stop('Length of `x` and `y` must be equal.')
}
# only show progress bar on points or when at least 5000 isolates
info_needed <- info == TRUE & (type == "points" | length(x) > 5000)
result <- logical(length(x))
if (type == "keyantibiotics") {
if (ignore_I == TRUE) {
# evaluation using regular expression will treat '.' as any character
# so I is actually ignored then
x <- gsub('I', '.', x, ignore.case = TRUE)
y <- gsub('I', '.', y, ignore.case = TRUE)
if (info_needed == TRUE) {
p <- dplyr::progress_estimated(length(x))
}
for (i in 1:length(x)) {
if (info_needed == TRUE) {
p$tick()$print()
}
for (i in 1:length(x)) {
if (is.na(x[i])) {
x[i] <- ''
}
if (is.na(y[i])) {
y[i] <- ''
}
if (x[i] == y[i]) {
result[i] <- TRUE
} else if (nchar(x[i]) != nchar(y[i])) {
result[i] <- FALSE
} else {
x_split <- strsplit(x[i], "")[[1]]
y_split <- strsplit(y[i], "")[[1]]
y_split[x_split == "."] <- "."
x_split[y_split == "."] <- "."
x_checkfor <- paste(x_split, collapse = "")
y_checkfor <- paste(y_split, collapse = "")
result[i] <- nchar(x[i]) == nchar(y[i]) &
(x_checkfor %like% y_checkfor |
y_checkfor %like% x_checkfor)
}
return(result)
} else {
if (type == 'keyantibiotics') {
if (type != 'points') {
stop('`', type, '` is not a valid value for type, must be "points" or "keyantibiotics". See ?first_isolate.')
}
if (ignore_I == TRUE) {
x_split[x_split == "I"] <- "."
y_split[y_split == "I"] <- "."
}
if (info == TRUE) {
p <- dplyr::progress_estimated(length(x))
}
y_split[x_split == "."] <- "."
x_split[y_split == "."] <- "."
for (i in 1:length(x)) {
if (info == TRUE) {
p$tick()$print()
}
if (is.na(x[i])) {
x[i] <- ''
}
if (is.na(y[i])) {
y[i] <- ''
}
if (nchar(x[i]) != nchar(y[i])) {
result[i] <- FALSE
} else if (x[i] == '' & y[i] == '') {
result[i] <- TRUE
} else {
x2 <- strsplit(x[i], "")[[1]]
y2 <- strsplit(y[i], "")[[1]]
result[i] <- all(x_split == y_split)
} else if (type == 'points') {
# count points for every single character:
# - no change is 0 points
# - I <-> S|R is 0.5 point
# - S|R <-> R|S is 1 point
# use the levels of as.rsi (S = 1, I = 2, R = 3)
suppressWarnings(x2 <- x2 %>% as.rsi() %>% as.double())
suppressWarnings(y2 <- y2 %>% as.rsi() %>% as.double())
suppressWarnings(x_split <- x_split %>% as.rsi() %>% as.double())
suppressWarnings(y_split <- y_split %>% as.rsi() %>% as.double())
points <- (x2 - y2) %>% abs() %>% sum(na.rm = TRUE)
result[i] <- ((points / 2) >= points_threshold)
points <- (x_split - y_split) %>% abs() %>% sum(na.rm = TRUE) / 2
result[i] <- points >= points_threshold
} else {
stop('`', type, '` is not a valid value for type, must be "points" or "keyantibiotics". See ?first_isolate.')
}
}
if (info == TRUE) {
cat('\n')
}
result
}
if (info_needed == TRUE) {
cat('\n')
}
result
}