diff --git a/DESCRIPTION b/DESCRIPTION index b28deb19..55fc3de8 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 0.2.0.9015 -Date: 2018-07-23 +Version: 0.2.0.9016 +Date: 2018-07-25 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/NAMESPACE b/NAMESPACE index 60753e49..e2927e21 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -154,7 +154,6 @@ importFrom(utils,View) importFrom(utils,browseVignettes) importFrom(utils,installed.packages) importFrom(utils,object.size) -importFrom(utils,packageDescription) importFrom(utils,read.delim) importFrom(utils,write.table) importFrom(xml2,read_html) diff --git a/NEWS.md b/NEWS.md index 850c2a17..19eae140 100755 --- a/NEWS.md +++ b/NEWS.md @@ -2,8 +2,9 @@ #### New * **BREAKING**: `rsi_df` was removed in favour of new functions `resistance` and `susceptibility`. Now, all functions used to calculate resistance (`resistance` and `susceptibility`) use **hybrid evaluation**. This means calculations are not done in R directly but rather in C++ using the `Rcpp` package, making them 25 to 30 times faster. The function `rsi` still works, but is deprecated. * **BREAKING**: the methodology for determining first weighted isolates was changed. The antibiotics that are compared between isolates (call *key antibiotics*) to include more first isolates (afterwards called first *weighted* isolates) are now as follows: - * Gram-positive: amoxicillin, amoxicillin/clavlanic acid, cefuroxime, piperacillin/tazobactam, ciprofloxacin, trimethoprim/sulfamethoxazole, vancomycin, teicoplanin, tetracycline, erythromycin, oxacillin, rifampicin - * Gram-negative: amoxicillin, amoxicillin/clavlanic acid, cefuroxime, piperacillin/tazobactam, ciprofloxacin, trimethoprim/sulfamethoxazole, gentamicin, tobramycin, colistin, cefotaxime, ceftazidime, meropenem + * Universal: amoxicillin, amoxicillin/clavlanic acid, cefuroxime, piperacillin/tazobactam, ciprofloxacin, trimethoprim/sulfamethoxazole + * Gram-positive: vancomycin, teicoplanin, tetracycline, erythromycin, oxacillin, rifampicin + * Gram-negative: gentamicin, tobramycin, colistin, cefotaxime, ceftazidime, meropenem * Functions `as.bactid` and `is.bactid` to transform/look up microbial ID's; this replaces the function `guess_bactid` but it will remain available for backwards compatibility * For convience, new descriptive statistical functions `kurtosis` and `skewness` that are lacking in base R - they are generic functions and have support for vectors, data.frames and matrices * Function `g.test` to perform the Χ2 distributed [*G*-test](https://en.wikipedia.org/wiki/G-test), which use is the same as `chisq.test` @@ -23,6 +24,7 @@ * Possibility to globally set the default for the amount of items to print, with `options(max.print.freq = n)` where *n* is your preset value #### Changed +* Updates version of the `setic_patients` dataset to better reflect the reality * Pretty printing for tibbles removed as it is not really the scope of this package * Improved speed of key antibiotics comparison for determining first isolates * Column names for the `key_antibiotics` function are now generic: 6 for broadspectrum ABs, 6 for Gram-positive specific and 6 for Gram-negative specific ABs @@ -36,6 +38,7 @@ * `as.rsi("<=0.002; S")` will return `S` * `as.mic("<=0.002; S")` will return `<=0.002` * Now possible to coerce MIC values with a space between operator and value, i.e. `as.mic("<= 0.002")` now works +* Classes `rsi` and `mic` do not add the attribute `package.version` anymore * Added `"groups"` option for `atc_property(..., property)`. It will return a vector of the ATC hierarchy as defined by the [WHO](https://www.whocc.no/atc/structure_and_principles/). The new function `atc_groups` is a convenient wrapper around this. * Build-in host check for `atc_property` as it requires the host set by `url` to be responsive * Improved `first_isolate` algorithm to exclude isolates where bacteria ID or genus is unavailable diff --git a/R/bactid.R b/R/bactid.R index 93c10e7f..c91a30c2 100644 --- a/R/bactid.R +++ b/R/bactid.R @@ -221,7 +221,6 @@ as.bactid <- function(x) { } class(x) <- "bactid" attr(x, 'package') <- 'AMR' - attr(x, 'package.version') <- packageDescription('AMR')$Version x } diff --git a/R/classes.R b/R/classes.R index 14fcf66d..e75ee0e3 100755 --- a/R/classes.R +++ b/R/classes.R @@ -21,18 +21,18 @@ #' This transforms a vector to a new class \code{rsi}, which is an ordered factor with levels \code{S < I < R}. Invalid antimicrobial interpretations will be translated as \code{NA} with a warning. #' @rdname as.rsi #' @param x vector -#' @return Ordered factor with new class \code{rsi} and new attributes \code{package} and \code{package.version} +#' @return Ordered factor with new class \code{rsi} and new attribute \code{package} #' @keywords rsi #' @export #' @importFrom dplyr %>% -#' @importFrom utils packageDescription +#' @seealso \code{\link{as.mic}} #' @examples #' rsi_data <- as.rsi(c(rep("S", 474), rep("I", 36), rep("R", 370))) #' rsi_data <- as.rsi(c(rep("S", 474), rep("I", 36), rep("R", 370), "A", "B", "C")) #' is.rsi(rsi_data) #' #' # this can also coerce combined MIC/RSI values: -#' as.rsi("<= 0.002; R") # will return R +#' as.rsi("<= 0.002; S") # will return S #' #' plot(rsi_data) # for percentages #' barplot(rsi_data) # for frequencies @@ -76,7 +76,6 @@ as.rsi <- function(x) { x <- x %>% factor(levels = c("S", "I", "R"), ordered = TRUE) class(x) <- c('rsi', 'ordered', 'factor') attr(x, 'package') <- 'AMR' - attr(x, 'package.version') <- packageDescription('AMR')$Version x } } @@ -196,21 +195,21 @@ barplot.rsi <- function(height, ...) { #' Class 'mic' #' -#' This transforms a vector to a new class\code{mic}, which is an ordered factor with valid MIC values as levels. Invalid MIC values will be translated as \code{NA} with a warning. +#' This transforms a vector to a new class \code{mic}, which is an ordered factor with valid MIC values as levels. Invalid MIC values will be translated as \code{NA} with a warning. #' @rdname as.mic #' @param x vector #' @param na.rm a logical indicating whether missing values should be removed -#' @return Ordered factor with new class \code{mic} and new attributes \code{package} and \code{package.version} +#' @return Ordered factor with new class \code{mic} and new attribute \code{package} #' @keywords mic #' @export #' @importFrom dplyr %>% -#' @importFrom utils packageDescription +#' @seealso \code{\link{as.rsi}} #' @examples #' mic_data <- as.mic(c(">=32", "1.0", "1", "1.00", 8, "<=0.128", "8", "16", "16")) #' is.mic(mic_data) #' #' # this can also coerce combined MIC/RSI values: -#' as.mic("<=0.002; R") # will return <=0.002 +#' as.mic("<=0.002; S") # will return <=0.002 #' #' plot(mic_data) #' barplot(mic_data) @@ -319,7 +318,6 @@ as.mic <- function(x, na.rm = FALSE) { ordered = TRUE) class(x) <- c('mic', 'ordered', 'factor') attr(x, 'package') <- 'AMR' - attr(x, 'package.version') <- packageDescription('AMR')$Version x } } diff --git a/R/data.R b/R/data.R index 8f6a0291..a83b798c 100755 --- a/R/data.R +++ b/R/data.R @@ -252,7 +252,7 @@ #' \item{\code{type_nl}}{Type of microorganism in Dutch, like \code{"Bacterie"} and \code{"Schimmel/gist"}} #' \item{\code{gramstain_nl}}{Gram of microorganism in Dutch, like \code{"Negatieve staven"}} #' } -#' @source MOLIS (LIS of Certe) - \url{https://www.certe.nl} +# source MOLIS (LIS of Certe) - \url{https://www.certe.nl} #' @seealso \code{\link{guess_bactid}} \code{\link{antibiotics}} \code{\link{microorganisms.umcg}} "microorganisms" @@ -264,14 +264,14 @@ #' \item{\code{mocode}}{Code of microorganism according to UMCG MMB} #' \item{\code{bactid}}{Code of microorganism in \code{\link{microorganisms}}} #' } -#' @source MOLIS (LIS of Certe) - \url{https://www.certe.nl} \cr \cr GLIMS (LIS of UMCG) - \url{https://www.umcg.nl} +# source MOLIS (LIS of Certe) - \url{https://www.certe.nl} \cr \cr GLIMS (LIS of UMCG) - \url{https://www.umcg.nl} #' @seealso \code{\link{guess_bactid}} \code{\link{microorganisms}} "microorganisms.umcg" #' Dataset with 2000 blood culture isolates of septic patients #' #' An anonymised dataset containing 2000 microbial blood culture isolates with their antibiogram of septic patients found in 5 different hospitals in the Netherlands, between 2001 and 2017. This data.frame can be used to practice AMR analysis. For examples, press F1. -#' @format A data.frame with 2000 observations and 47 variables: +#' @format A data.frame with 2000 observations and 49 variables: #' \describe{ #' \item{\code{date}}{date of receipt at the laboratory} #' \item{\code{hospital_id}}{ID of the hospital} @@ -282,9 +282,9 @@ #' \item{\code{sex}}{sex of the patient} #' \item{\code{patient_id}}{ID of the patient, first 10 characters of an SHA hash containing irretrievable information} #' \item{\code{bactid}}{ID of microorganism, see \code{\link{microorganisms}}} -#' \item{\code{peni:mupi}}{38 different antibiotics with class \code{rsi} (see \code{\link{as.rsi}}); these column names occur in \code{\link{antibiotics}} and can be translated with \code{\link{abname}}} +#' \item{\code{peni:rifa}}{40 different antibiotics with class \code{rsi} (see \code{\link{as.rsi}}); these column names occur in \code{\link{antibiotics}} data set and can be translated with \code{\link{abname}}} #' } -#' @source MOLIS (LIS of Certe) - \url{https://www.certe.nl} +# source MOLIS (LIS of Certe) - \url{https://www.certe.nl} #' @examples #' # ----------- # #' # PREPARATION # @@ -304,15 +304,15 @@ #' # ANALYSIS # #' # -------- # #' -#' # 1. Get the amoxicillin resistance percentages -#' # of E. coli, divided by hospital: +#' # 1. Get the amoxicillin resistance percentages (p) +#' # and numbers (n) of E. coli, divided by hospital: #' #' my_data %>% -#' filter(bactid == "ESCCOL", +#' filter(bactid == guess_bactid("E. coli"), #' first_isolates == TRUE) %>% #' group_by(hospital_id) %>% -#' summarise(n = n(), -#' amoxicillin_resistance = rsi(amox)) +#' summarise(n = n_rsi(amox), +#' p = resistance(amox)) #' #' #' # 2. Get the amoxicillin/clavulanic acid resistance @@ -322,6 +322,6 @@ #' filter(bactid == guess_bactid("E. coli"), #' first_isolates == TRUE) %>% #' group_by(year = format(date, "%Y")) %>% -#' summarise(n = n(), -#' amoxclav_resistance = rsi(amcl, minimum = 20)) +#' summarise(n = n_rsi(amcl), +#' p = resistance(amcl, minimum = 20)) "septic_patients" diff --git a/R/first_isolate.R b/R/first_isolate.R index fd97bcf4..ca07f1d4 100755 --- a/R/first_isolate.R +++ b/R/first_isolate.R @@ -41,7 +41,7 @@ #' @details \strong{WHY THIS IS SO IMPORTANT} \cr #' To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https://www.ncbi.nlm.nih.gov/pubmed/17304462}{[1]}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}. #' @section Key antibiotics: -#' There are two ways to determine whether isolates can be included as first \emph{weighted} isolates: \cr +#' There are two ways to determine whether isolates can be included as first \emph{weighted} isolates which will give generally the same results: \cr #' #' \strong{1. Using} \code{type = "keyantibiotics"} \strong{and parameter} \code{ignore_I} \cr #' Any difference from S to R (or vice versa) will (re)select an isolate as a first weighted isolate. With \code{ignore_I = FALSE}, also differences from I to S|R (or vice versa) will lead to this. This is a reliable method and 30-35 times faster than method 2. \cr @@ -65,6 +65,24 @@ #' col_patient_id = "patient_id", #' col_bactid = "bactid") #' +#' # Now let's see if first isolates matter: +#' A <- my_patients %>% +#' group_by(hospital_id) %>% +#' summarise(count = n_rsi(gent), # gentamicin +#' resistance = resistance(gent)) +#' +#' B <- my_patients %>% +#' filter(first_isolate == TRUE) %>% +#' group_by(hospital_id) %>% +#' summarise(count = n_rsi(gent), # gentamicin +#' resistance = resistance(gent)) +#' +#' # Have a look at A and B. B is more reliable because every isolate is +#' # counted once. Gentamicin resitance in hospital D seems to be 5% +#' # higher than originally thought. +#' +#' ## OTHER EXAMPLES: +#' #' \dontrun{ #' #' # set key antibiotics to a new variable @@ -153,7 +171,7 @@ first_isolate <- function(tbl, if (!is.na(col_bactid)) { if (!tbl %>% pull(col_bactid) %>% is.bactid()) { - tbl[, col_bactid] <- tbl %>% pull(col_bactid) %>% as.bactid() + warning("Improve integrity of the `", col_bactid, "` column by transforming it with 'as.bactid'.") } tbl <- tbl %>% left_join_microorganisms(by = col_bactid) col_genus <- "genus" @@ -179,7 +197,6 @@ first_isolate <- function(tbl, filter_specimen <- '' } - weighted.notice <- '' # filter on specimen group and keyantibiotics when they are filled in if (!is.na(filter_specimen) & filter_specimen != '') { check_columns_existance(col_specimen, tbl) @@ -317,7 +334,9 @@ first_isolate <- function(tbl, (date_lab - lag(date_lab)) + lag(days_diff), 0)) + weighted.notice <- '' if (col_keyantibiotics != '') { + weighted.notice <- 'weighted ' if (info == TRUE) { if (type == 'keyantibiotics') { cat('[Criteria] Inclusion based on key antibiotics, ') diff --git a/R/freq.R b/R/freq.R index 17cfab5a..6c37a6fe 100755 --- a/R/freq.R +++ b/R/freq.R @@ -415,7 +415,6 @@ frequency_tbl <- function(x, class(df) <- c('frequency_tbl', class(df)) attr(df, 'package') <- 'AMR' - attr(df, 'package.version') <- packageDescription('AMR')$Version if (markdown == TRUE) { tbl_format <- 'markdown' @@ -567,7 +566,6 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default = #' @export as.data.frame.frequency_tbl <- function(x, ...) { attr(x, 'package') <- NULL - attr(x, 'package.version') <- NULL attr(x, 'opt') <- NULL as.data.frame.data.frame(x, ...) } @@ -578,7 +576,6 @@ as.data.frame.frequency_tbl <- function(x, ...) { #' @importFrom dplyr as_tibble as_tibble.frequency_tbl <- function(x, validate = TRUE, ..., rownames = NA) { attr(x, 'package') <- NULL - attr(x, 'package.version') <- NULL attr(x, 'opt') <- NULL as_tibble(x = as.data.frame(x), validate = validate, ..., rownames = rownames) } diff --git a/R/join_microorganisms.R b/R/join_microorganisms.R index 3cdaa276..dd19c16b 100755 --- a/R/join_microorganisms.R +++ b/R/join_microorganisms.R @@ -26,8 +26,8 @@ #' df2 <- left_join_microorganisms(df, "bacteria_id") #' colnames(df2) inner_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...) { - if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) { - x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE) + if (!any(class(x) %in% c("data.frame", "matrix"))) { + x <- data.frame(bactid = as.character(x), stringsAsFactors = FALSE) } # no name set to `by` parameter if (is.null(names(by))) { @@ -48,8 +48,8 @@ inner_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ... #' @rdname join #' @export left_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...) { - if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) { - x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE) + if (!any(class(x) %in% c("data.frame", "matrix"))) { + x <- data.frame(bactid = as.character(x), stringsAsFactors = FALSE) } # no name set to `by` parameter if (is.null(names(by))) { @@ -70,8 +70,8 @@ left_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...) #' @rdname join #' @export right_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...) { - if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) { - x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE) + if (!any(class(x) %in% c("data.frame", "matrix"))) { + x <- data.frame(bactid = as.character(x), stringsAsFactors = FALSE) } # no name set to `by` parameter if (is.null(names(by))) { @@ -92,8 +92,8 @@ right_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ... #' @rdname join #' @export full_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...) { - if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) { - x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE) + if (!any(class(x) %in% c("data.frame", "matrix"))) { + x <- data.frame(bactid = as.character(x), stringsAsFactors = FALSE) } # no name set to `by` parameter if (is.null(names(by))) { @@ -114,8 +114,8 @@ full_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...) #' @rdname join #' @export semi_join_microorganisms <- function(x, by = 'bactid', ...) { - if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) { - x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE) + if (!any(class(x) %in% c("data.frame", "matrix"))) { + x <- data.frame(bactid = as.character(x), stringsAsFactors = FALSE) } # no name set to `by` parameter if (is.null(names(by))) { @@ -132,8 +132,8 @@ semi_join_microorganisms <- function(x, by = 'bactid', ...) { #' @rdname join #' @export anti_join_microorganisms <- function(x, by = 'bactid', ...) { - if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) { - x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE) + if (!any(class(x) %in% c("data.frame", "matrix"))) { + x <- data.frame(bactid = as.character(x), stringsAsFactors = FALSE) } # no name set to `by` parameter if (is.null(names(by))) { diff --git a/R/key_antibiotics.R b/R/key_antibiotics.R index 3b391fb1..6e04a260 100644 --- a/R/key_antibiotics.R +++ b/R/key_antibiotics.R @@ -42,19 +42,26 @@ #' @importFrom dplyr %>% mutate if_else #' @seealso \code{\link{first_isolate}} #' @examples -#' \dontrun{ +#' # septic_patients is a dataset available in the AMR package +#' ?septic_patients +#' my_patients <- septic_patients +#' +#' library(dplyr) #' # set key antibiotics to a new variable -#' tbl$keyab <- key_antibiotics(tbl) +#' my_patients <- my_patients %>% +#' mutate(keyab = key_antibiotics(.)) %>% +#' mutate( +#' # now calculate first isolates +#' first_regular = first_isolate(., "date", "patient_id", "bactid"), +#' # and first WEIGHTED isolates +#' first_weighted = first_isolate(., "date", "patient_id", "bactid", +#' col_keyantibiotics = "keyab") +#' ) #' -#' # add regular first isolates -#' tbl$first_isolate <- -#' first_isolate(tbl) +#' # Check the difference, in this data set it results in 7% more isolates: +#' sum(my_patients$first_regular, na.rm = TRUE) +#' sum(my_patients$first_weighted, na.rm = TRUE) #' -#' # add first WEIGHTED isolates using key antibiotics -#' tbl$first_isolate_weighed <- -#' first_isolate(tbl, -#' col_keyantibiotics = 'keyab') -#' } #' #' # output of the `key_antibiotics` function could be like this: #' strainA <- "SSSRR.S.R..S" @@ -169,87 +176,80 @@ key_antibiotics_equal <- function(x, points_threshold = 2, info = FALSE) { # x is active row, y is lag + type <- type[1] if (length(x) != length(y)) { stop('Length of `x` and `y` must be equal.') } + # only show progress bar on points or when at least 5000 isolates + info_needed <- info == TRUE & (type == "points" | length(x) > 5000) + result <- logical(length(x)) - if (type == "keyantibiotics") { - if (ignore_I == TRUE) { - # evaluation using regular expression will treat '.' as any character - # so I is actually ignored then - x <- gsub('I', '.', x, ignore.case = TRUE) - y <- gsub('I', '.', y, ignore.case = TRUE) + if (info_needed == TRUE) { + p <- dplyr::progress_estimated(length(x)) + } + + for (i in 1:length(x)) { + + if (info_needed == TRUE) { + p$tick()$print() } - for (i in 1:length(x)) { + if (is.na(x[i])) { + x[i] <- '' + } + if (is.na(y[i])) { + y[i] <- '' + } + + if (x[i] == y[i]) { + + result[i] <- TRUE + + } else if (nchar(x[i]) != nchar(y[i])) { + + result[i] <- FALSE + + } else { + x_split <- strsplit(x[i], "")[[1]] y_split <- strsplit(y[i], "")[[1]] - y_split[x_split == "."] <- "." - x_split[y_split == "."] <- "." - x_checkfor <- paste(x_split, collapse = "") - y_checkfor <- paste(y_split, collapse = "") - result[i] <- nchar(x[i]) == nchar(y[i]) & - (x_checkfor %like% y_checkfor | - y_checkfor %like% x_checkfor) - } - return(result) - } else { + if (type == 'keyantibiotics') { - if (type != 'points') { - stop('`', type, '` is not a valid value for type, must be "points" or "keyantibiotics". See ?first_isolate.') - } + if (ignore_I == TRUE) { + x_split[x_split == "I"] <- "." + y_split[y_split == "I"] <- "." + } - if (info == TRUE) { - p <- dplyr::progress_estimated(length(x)) - } + y_split[x_split == "."] <- "." + x_split[y_split == "."] <- "." - for (i in 1:length(x)) { - - if (info == TRUE) { - p$tick()$print() - } - - if (is.na(x[i])) { - x[i] <- '' - } - if (is.na(y[i])) { - y[i] <- '' - } - - if (nchar(x[i]) != nchar(y[i])) { - - result[i] <- FALSE - - } else if (x[i] == '' & y[i] == '') { - - result[i] <- TRUE - - } else { - - x2 <- strsplit(x[i], "")[[1]] - y2 <- strsplit(y[i], "")[[1]] + result[i] <- all(x_split == y_split) + } else if (type == 'points') { # count points for every single character: # - no change is 0 points # - I <-> S|R is 0.5 point # - S|R <-> R|S is 1 point # use the levels of as.rsi (S = 1, I = 2, R = 3) - suppressWarnings(x2 <- x2 %>% as.rsi() %>% as.double()) - suppressWarnings(y2 <- y2 %>% as.rsi() %>% as.double()) + suppressWarnings(x_split <- x_split %>% as.rsi() %>% as.double()) + suppressWarnings(y_split <- y_split %>% as.rsi() %>% as.double()) - points <- (x2 - y2) %>% abs() %>% sum(na.rm = TRUE) - result[i] <- ((points / 2) >= points_threshold) + points <- (x_split - y_split) %>% abs() %>% sum(na.rm = TRUE) / 2 + result[i] <- points >= points_threshold + + } else { + stop('`', type, '` is not a valid value for type, must be "points" or "keyantibiotics". See ?first_isolate.') } } - if (info == TRUE) { - cat('\n') - } - result } + if (info_needed == TRUE) { + cat('\n') + } + result } diff --git a/README.md b/README.md index 1e9c5920..ec3d16bf 100755 --- a/README.md +++ b/README.md @@ -26,9 +26,13 @@ This R package contains functions to make **microbiological, epidemiological dat With `AMR` you can: * Calculate the resistance (and even co-resistance) of microbial isolates with the `resistance` and `susceptibility` functions, that can also be used with the `dplyr` package (e.g. in conjunction with `summarise`) -* Predict antimicrobial resistance for the nextcoming years with the `rsi_predict` function +* Predict antimicrobial resistance for the nextcoming years with the `resistance_predict` function * Apply [EUCAST rules to isolates](http://www.eucast.org/expert_rules_and_intrinsic_resistance/) with the `EUCAST_rules` function * Identify first isolates of every patient [using guidelines from the CLSI](https://clsi.org/standards/products/microbiology/documents/m39/) (Clinical and Laboratory Standards Institute) with the `first_isolate` function + * You can also identify first *weighted* isolates of every patient, an adjusted version of the CLSI guideline. This takes into account key antibiotics of every strain and compares them. The following 12 antibiotics will be used as key antibiotics at default: + * Universal: amoxicillin, amoxicillin/clavlanic acid, cefuroxime, piperacillin/tazobactam, ciprofloxacin, trimethoprim/sulfamethoxazole + * Specific for Gram-positives: vancomycin, teicoplanin, tetracycline, erythromycin, oxacillin, rifampicin + * Specific for Gram-negatives: gentamicin, tobramycin, colistin, cefotaxime, ceftazidime, meropenem * Get antimicrobial ATC properties from the WHO Collaborating Centre for Drug Statistics Methodology ([WHOCC](https://www.whocc.no/atc_ddd_methodology/who_collaborating_centre/)), to be able to: * Translate antibiotic codes (like *AMOX*), official names (like *amoxicillin*) and even trade names (like *Amoxil* or *Trimox*) to an [ATC code](https://www.whocc.no/atc_ddd_index/?code=J01CA04&showdescription=no) (like *J01CA04*) and vice versa with the `abname` function * Get the latest antibiotic properties like hierarchic groups and [defined daily dose](https://en.wikipedia.org/wiki/Defined_daily_dose) (DDD) with units and administration form from the WHOCC website with the `atc_property` function @@ -219,35 +223,33 @@ mydata %>% freq(myvariable) Factors sort on item by default: ```r septic_patients %>% freq(hospital_id) -# Frequency table of `hospital_id` +# Frequency table of `hospital_id` # Class: factor # Length: 2000 (of which NA: 0 = 0.0%) -# Unique: 5 +# Unique: 4 # # Item Count Percent Cum. Count Cum. Percent (Factor Level) # --- ----- ------ -------- ----------- ------------- --------------- -# 1 A 233 11.7% 233 11.7% 1 -# 2 B 583 29.1% 816 40.8% 2 -# 3 C 221 11.1% 1037 51.8% 3 -# 4 D 650 32.5% 1687 84.4% 4 -# 5 E 313 15.7% 2000 100.0% 5 +# 1 A 319 16.0% 319 16.0% 1 +# 2 B 661 33.1% 980 49.0% 2 +# 3 C 256 12.8% 1236 61.8% 3 +# 4 D 764 38.2% 2000 100.0% 4 ``` This can be changed with the `sort.count` parameter: ```r septic_patients %>% freq(hospital_id, sort.count = TRUE) -# Frequency table of `hospital_id` +# Frequency table of `hospital_id` # Class: factor # Length: 2000 (of which NA: 0 = 0.0%) -# Unique: 5 +# Unique: 4 # # Item Count Percent Cum. Count Cum. Percent (Factor Level) # --- ----- ------ -------- ----------- ------------- --------------- -# 1 D 650 32.5% 650 32.5% 4 -# 2 B 583 29.1% 1233 61.7% 2 -# 3 E 313 15.7% 1546 77.3% 5 -# 4 A 233 11.7% 1779 88.9% 1 -# 5 C 221 11.1% 2000 100.0% 3 +# 1 D 764 38.2% 764 38.2% 4 +# 2 B 661 33.1% 1425 71.2% 2 +# 3 A 319 16.0% 1744 87.2% 1 +# 4 C 256 12.8% 2000 100.0% 3 ``` All other types, like numbers, characters and dates, sort on count by default: @@ -256,56 +258,56 @@ septic_patients %>% freq(date) # Frequency table of `date` # Class: Date # Length: 2000 (of which NA: 0 = 0.0%) -# Unique: 1662 +# Unique: 1151 # -# Oldest: 2 January 2001 -# Newest: 18 October 2017 (+6133) -# Median: 6 December 2009 (~53%) +# Oldest: 2 January 2002 +# Newest: 28 December 2017 (+5839) +# Median: 7 Augustus 2009 (~48%) # # Item Count Percent Cum. Count Cum. Percent # --- ----------- ------ -------- ----------- ------------- -# 1 2008-12-24 5 0.2% 5 0.2% -# 2 2010-12-10 4 0.2% 9 0.4% -# 3 2011-03-03 4 0.2% 13 0.6% -# 4 2013-06-24 4 0.2% 17 0.8% -# 5 2017-09-01 4 0.2% 21 1.1% -# 6 2002-09-02 3 0.2% 24 1.2% -# 7 2003-10-14 3 0.2% 27 1.4% -# 8 2004-06-25 3 0.2% 30 1.5% -# 9 2004-06-27 3 0.2% 33 1.7% -# 10 2004-10-29 3 0.2% 36 1.8% -# 11 2005-09-27 3 0.2% 39 2.0% -# 12 2006-08-01 3 0.2% 42 2.1% -# 13 2006-10-10 3 0.2% 45 2.2% -# 14 2007-11-16 3 0.2% 48 2.4% -# 15 2008-03-09 3 0.2% 51 2.5% -# [ reached getOption("max.print.freq") -- omitted 1647 entries, n = 1949 (97.5%) ] +# 1 2016-05-21 10 0.5% 10 0.5% +# 2 2004-11-15 8 0.4% 18 0.9% +# 3 2013-07-29 8 0.4% 26 1.3% +# 4 2017-06-12 8 0.4% 34 1.7% +# 5 2015-11-19 7 0.4% 41 2.1% +# 6 2005-12-22 6 0.3% 47 2.4% +# 7 2015-10-12 6 0.3% 53 2.6% +# 8 2002-05-16 5 0.2% 58 2.9% +# 9 2004-02-02 5 0.2% 63 3.1% +# 10 2004-02-18 5 0.2% 68 3.4% +# 11 2005-08-16 5 0.2% 73 3.6% +# 12 2005-09-01 5 0.2% 78 3.9% +# 13 2006-06-29 5 0.2% 83 4.2% +# 14 2007-08-10 5 0.2% 88 4.4% +# 15 2008-08-29 5 0.2% 93 4.7% +# [ reached getOption("max.print.freq") -- omitted 1136 entries, n = 1907 (95.3%) ] ``` For numeric values, some extra descriptive statistics will be calculated: ```r freq(runif(n = 10, min = 1, max = 5)) -# Frequency table +# Frequency table # Class: numeric # Length: 10 (of which NA: 0 = 0.0%) # Unique: 10 -# -# Mean: 2.9 -# Std. dev.: 1.3 (CV: 0.43, MAD: 1.5) -# Five-Num: 1.5 | 1.7 | 2.6 | 4.0 | 4.7 (IQR: 2.3, CQV: 0.4) +# +# Mean: 3.4 +# Std. dev.: 1.3 (CV: 0.38, MAD: 1.3) +# Five-Num: 1.6 | 2.0 | 3.9 | 4.7 | 4.8 (IQR: 2.7, CQV: 0.4) # Outliers: 0 # -# Item Count Percent Cum. Count Cum. Percent -# --------- ------ -------- ----------- ------------- -# 1.132033 1 10.0% 1 10.0% -# 2.226903 1 10.0% 2 20.0% -# 2.280779 1 10.0% 3 30.0% -# 2.640898 1 10.0% 4 40.0% -# 2.913462 1 10.0% 5 50.0% -# 3.364201 1 10.0% 6 60.0% -# 3.771975 1 10.0% 7 70.0% -# 3.802861 1 10.0% 8 80.0% -# 3.803547 1 10.0% 9 90.0% -# 3.985691 1 10.0% 10 100.0% +# Item Count Percent Cum. Count Cum. Percent +# --- --------- ------ -------- ----------- ------------- +# 1 1.568997 1 10.0% 1 10.0% +# 2 1.993575 1 10.0% 2 20.0% +# 3 2.022348 1 10.0% 3 30.0% +# 4 2.236038 1 10.0% 4 40.0% +# 5 3.579828 1 10.0% 5 50.0% +# 6 4.178081 1 10.0% 6 60.0% +# 7 4.394818 1 10.0% 7 70.0% +# 8 4.689871 1 10.0% 8 80.0% +# 9 4.698626 1 10.0% 9 90.0% +# 10 4.751488 1 10.0% 10 100.0% # # Warning message: # All observations are unique. @@ -320,7 +322,7 @@ Datasets to work with antibiotics and bacteria properties. ```r # Dataset with 2000 random blood culture isolates from anonymised # septic patients between 2001 and 2017 in 5 Dutch hospitals -septic_patients # A tibble: 2,000 x 47 +septic_patients # A tibble: 2,000 x 49 # Dataset with ATC antibiotics codes, official names, trade names # and DDD's (oral and parenteral) diff --git a/data/septic_patients.rda b/data/septic_patients.rda index fc482274..aa113de2 100755 Binary files a/data/septic_patients.rda and b/data/septic_patients.rda differ diff --git a/man/as.mic.Rd b/man/as.mic.Rd index 4cf77e34..371ccccf 100755 --- a/man/as.mic.Rd +++ b/man/as.mic.Rd @@ -15,19 +15,22 @@ is.mic(x) \item{na.rm}{a logical indicating whether missing values should be removed} } \value{ -Ordered factor with new class \code{mic} and new attributes \code{package} and \code{package.version} +Ordered factor with new class \code{mic} and new attribute \code{package} } \description{ -This transforms a vector to a new class\code{mic}, which is an ordered factor with valid MIC values as levels. Invalid MIC values will be translated as \code{NA} with a warning. +This transforms a vector to a new class \code{mic}, which is an ordered factor with valid MIC values as levels. Invalid MIC values will be translated as \code{NA} with a warning. } \examples{ mic_data <- as.mic(c(">=32", "1.0", "1", "1.00", 8, "<=0.128", "8", "16", "16")) is.mic(mic_data) # this can also coerce combined MIC/RSI values: -as.mic("<=0.002; R") # will return <=0.002 +as.mic("<=0.002; S") # will return <=0.002 plot(mic_data) barplot(mic_data) } +\seealso{ +\code{\link{as.rsi}} +} \keyword{mic} diff --git a/man/as.rsi.Rd b/man/as.rsi.Rd index 61aab991..24352463 100755 --- a/man/as.rsi.Rd +++ b/man/as.rsi.Rd @@ -13,7 +13,7 @@ is.rsi(x) \item{x}{vector} } \value{ -Ordered factor with new class \code{rsi} and new attributes \code{package} and \code{package.version} +Ordered factor with new class \code{rsi} and new attribute \code{package} } \description{ This transforms a vector to a new class \code{rsi}, which is an ordered factor with levels \code{S < I < R}. Invalid antimicrobial interpretations will be translated as \code{NA} with a warning. @@ -24,9 +24,12 @@ rsi_data <- as.rsi(c(rep("S", 474), rep("I", 36), rep("R", 370), "A", "B", "C")) is.rsi(rsi_data) # this can also coerce combined MIC/RSI values: -as.rsi("<= 0.002; R") # will return R +as.rsi("<= 0.002; S") # will return S plot(rsi_data) # for percentages barplot(rsi_data) # for frequencies } +\seealso{ +\code{\link{as.mic}} +} \keyword{rsi} diff --git a/man/first_isolate.Rd b/man/first_isolate.Rd index 86b582cb..2929f161 100755 --- a/man/first_isolate.Rd +++ b/man/first_isolate.Rd @@ -65,7 +65,7 @@ Determine first (weighted) isolates of all microorganisms of every patient per e } \section{Key antibiotics}{ - There are two ways to determine whether isolates can be included as first \emph{weighted} isolates: \cr + There are two ways to determine whether isolates can be included as first \emph{weighted} isolates which will give generally the same results: \cr \strong{1. Using} \code{type = "keyantibiotics"} \strong{and parameter} \code{ignore_I} \cr Any difference from S to R (or vice versa) will (re)select an isolate as a first weighted isolate. With \code{ignore_I = FALSE}, also differences from I to S|R (or vice versa) will lead to this. This is a reliable method and 30-35 times faster than method 2. \cr @@ -85,6 +85,24 @@ my_patients$first_isolate <- my_patients \%>\% col_patient_id = "patient_id", col_bactid = "bactid") +# Now let's see if first isolates matter: +A <- my_patients \%>\% + group_by(hospital_id) \%>\% + summarise(count = n_rsi(gent), # gentamicin + resistance = resistance(gent)) + +B <- my_patients \%>\% + filter(first_isolate == TRUE) \%>\% + group_by(hospital_id) \%>\% + summarise(count = n_rsi(gent), # gentamicin + resistance = resistance(gent)) + +# Have a look at A and B. B is more reliable because every isolate is +# counted once. Gentamicin resitance in hospital D seems to be 5\% +# higher than originally thought. + +## OTHER EXAMPLES: + \dontrun{ # set key antibiotics to a new variable diff --git a/man/key_antibiotics.Rd b/man/key_antibiotics.Rd index 8d97b7e7..41bf8371 100755 --- a/man/key_antibiotics.Rd +++ b/man/key_antibiotics.Rd @@ -56,7 +56,7 @@ The function \code{key_antibiotics} returns a character vector with 12 antibioti } \section{Key antibiotics}{ - There are two ways to determine whether isolates can be included as first \emph{weighted} isolates: \cr + There are two ways to determine whether isolates can be included as first \emph{weighted} isolates which will give generally the same results: \cr \strong{1. Using} \code{type = "keyantibiotics"} \strong{and parameter} \code{ignore_I} \cr Any difference from S to R (or vice versa) will (re)select an isolate as a first weighted isolate. With \code{ignore_I = FALSE}, also differences from I to S|R (or vice versa) will lead to this. This is a reliable method and 30-35 times faster than method 2. \cr @@ -66,19 +66,26 @@ The function \code{key_antibiotics} returns a character vector with 12 antibioti } \examples{ -\dontrun{ +# septic_patients is a dataset available in the AMR package +?septic_patients +my_patients <- septic_patients + +library(dplyr) # set key antibiotics to a new variable -tbl$keyab <- key_antibiotics(tbl) +my_patients <- my_patients \%>\% + mutate(keyab = key_antibiotics(.)) \%>\% + mutate( + # now calculate first isolates + first_regular = first_isolate(., "date", "patient_id", "bactid"), + # and first WEIGHTED isolates + first_weighted = first_isolate(., "date", "patient_id", "bactid", + col_keyantibiotics = "keyab") + ) -# add regular first isolates -tbl$first_isolate <- - first_isolate(tbl) +# Check the difference, in this data set it results in 7\% more isolates: +sum(my_patients$first_regular, na.rm = TRUE) +sum(my_patients$first_weighted, na.rm = TRUE) -# add first WEIGHTED isolates using key antibiotics -tbl$first_isolate_weighed <- - first_isolate(tbl, - col_keyantibiotics = 'keyab') -} # output of the `key_antibiotics` function could be like this: strainA <- "SSSRR.S.R..S" diff --git a/man/microorganisms.Rd b/man/microorganisms.Rd index 248e68db..2dd3a2d6 100755 --- a/man/microorganisms.Rd +++ b/man/microorganisms.Rd @@ -19,9 +19,6 @@ \item{\code{type_nl}}{Type of microorganism in Dutch, like \code{"Bacterie"} and \code{"Schimmel/gist"}} \item{\code{gramstain_nl}}{Gram of microorganism in Dutch, like \code{"Negatieve staven"}} }} -\source{ -MOLIS (LIS of Certe) - \url{https://www.certe.nl} -} \usage{ microorganisms } diff --git a/man/microorganisms.umcg.Rd b/man/microorganisms.umcg.Rd index 18c98600..51645a14 100755 --- a/man/microorganisms.umcg.Rd +++ b/man/microorganisms.umcg.Rd @@ -9,9 +9,6 @@ \item{\code{mocode}}{Code of microorganism according to UMCG MMB} \item{\code{bactid}}{Code of microorganism in \code{\link{microorganisms}}} }} -\source{ -MOLIS (LIS of Certe) - \url{https://www.certe.nl} \cr \cr GLIMS (LIS of UMCG) - \url{https://www.umcg.nl} -} \usage{ microorganisms.umcg } diff --git a/man/septic_patients.Rd b/man/septic_patients.Rd index 14956c3b..a8be9cdd 100755 --- a/man/septic_patients.Rd +++ b/man/septic_patients.Rd @@ -4,7 +4,7 @@ \name{septic_patients} \alias{septic_patients} \title{Dataset with 2000 blood culture isolates of septic patients} -\format{A data.frame with 2000 observations and 47 variables: +\format{A data.frame with 2000 observations and 49 variables: \describe{ \item{\code{date}}{date of receipt at the laboratory} \item{\code{hospital_id}}{ID of the hospital} @@ -15,11 +15,8 @@ \item{\code{sex}}{sex of the patient} \item{\code{patient_id}}{ID of the patient, first 10 characters of an SHA hash containing irretrievable information} \item{\code{bactid}}{ID of microorganism, see \code{\link{microorganisms}}} - \item{\code{peni:mupi}}{38 different antibiotics with class \code{rsi} (see \code{\link{as.rsi}}); these column names occur in \code{\link{antibiotics}} and can be translated with \code{\link{abname}}} + \item{\code{peni:rifa}}{40 different antibiotics with class \code{rsi} (see \code{\link{as.rsi}}); these column names occur in \code{\link{antibiotics}} data set and can be translated with \code{\link{abname}}} }} -\source{ -MOLIS (LIS of Certe) - \url{https://www.certe.nl} -} \usage{ septic_patients } @@ -45,15 +42,15 @@ my_data <- my_data \%>\% # ANALYSIS # # -------- # -# 1. Get the amoxicillin resistance percentages -# of E. coli, divided by hospital: +# 1. Get the amoxicillin resistance percentages (p) +# and numbers (n) of E. coli, divided by hospital: my_data \%>\% - filter(bactid == "ESCCOL", + filter(bactid == guess_bactid("E. coli"), first_isolates == TRUE) \%>\% group_by(hospital_id) \%>\% - summarise(n = n(), - amoxicillin_resistance = rsi(amox)) + summarise(n = n_rsi(amox), + p = resistance(amox)) # 2. Get the amoxicillin/clavulanic acid resistance @@ -63,7 +60,7 @@ my_data \%>\% filter(bactid == guess_bactid("E. coli"), first_isolates == TRUE) \%>\% group_by(year = format(date, "\%Y")) \%>\% - summarise(n = n(), - amoxclav_resistance = rsi(amcl, minimum = 20)) + summarise(n = n_rsi(amcl), + p = resistance(amcl, minimum = 20)) } \keyword{datasets} diff --git a/tests/testthat/test-bactid.R b/tests/testthat/test-bactid.R index 1fc8253f..17bd3c38 100644 --- a/tests/testthat/test-bactid.R +++ b/tests/testthat/test-bactid.R @@ -37,8 +37,8 @@ test_that("as.bactid works", { select(genus) %>% as.bactid() %>% as.character(), - c("STC", "STC", "NEI", "STA", "STA", - "NEI", "ENT", "ENT", "ESC", "KLE")) + c("ESC", "ESC", "STA", "STA", "STA", + "STA", "STA", "STA", "STA", "STA")) # select with two columns expect_identical( diff --git a/tests/testthat/test-first_isolate.R b/tests/testthat/test-first_isolate.R index c809bbfc..8f37f4f7 100755 --- a/tests/testthat/test-first_isolate.R +++ b/tests/testthat/test-first_isolate.R @@ -8,9 +8,9 @@ test_that("first isolates work", { col_date = "date", col_patient_id = "patient_id", col_bactid = "bactid", - info = FALSE), + info = TRUE), na.rm = TRUE), - 1959) + 1326) # septic_patients contains 1962 out of 2000 first *weighted* isolates expect_equal( @@ -24,8 +24,8 @@ test_that("first isolates work", { type = "keyantibiotics", info = TRUE), na.rm = TRUE)), - 1962) - # and 1997 when using points + 1421) + # and 1961 when using points expect_equal( suppressWarnings( sum( @@ -37,7 +37,7 @@ test_that("first isolates work", { type = "points", info = TRUE), na.rm = TRUE)), - 1997) + 1425) # septic_patients contains 1732 out of 2000 first non-ICU isolates expect_equal( @@ -50,7 +50,7 @@ test_that("first isolates work", { info = TRUE, icu_exclude = TRUE), na.rm = TRUE), - 1732) + 1171) # set 1500 random observations to be of specimen type 'Urine' random_rows <- sample(x = 1:2000, size = 1500, replace = FALSE) @@ -59,7 +59,7 @@ test_that("first isolates work", { first_isolate(tbl = mutate(septic_patients, specimen = if_else(row_number() %in% random_rows, "Urine", - "Unknown")), + "Other")), col_date = "date", col_patient_id = "patient_id", col_bactid = "bactid", @@ -74,7 +74,7 @@ test_that("first isolates work", { first_isolate(tbl = mutate(septic_patients, specimen = if_else(row_number() %in% random_rows, "Urine", - "Unknown")), + "Other")), col_date = "date", col_patient_id = "patient_id", col_bactid = "bactid", diff --git a/tests/testthat/test-freq.R b/tests/testthat/test-freq.R index be3de4c3..c81fcb14 100755 --- a/tests/testthat/test-freq.R +++ b/tests/testthat/test-freq.R @@ -4,8 +4,8 @@ test_that("frequency table works", { expect_equal(nrow(freq(c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5))), 5) expect_equal(nrow(frequency_tbl(c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5))), 5) - # date column of septic_patients should contain 1662 unique dates - expect_equal(nrow(freq(septic_patients$date)), 1662) + # date column of septic_patients should contain 1151 unique dates + expect_equal(nrow(freq(septic_patients$date)), 1151) expect_equal(nrow(freq(septic_patients$date)), length(unique(septic_patients$date))) diff --git a/tests/testthat/test-kurtosis.R b/tests/testthat/test-kurtosis.R index a970e55a..8d5b8f77 100644 --- a/tests/testthat/test-kurtosis.R +++ b/tests/testthat/test-kurtosis.R @@ -2,12 +2,12 @@ context("kurtosis.R") test_that("kurtosis works", { expect_equal(kurtosis(septic_patients$age), - 6.423118, + 3.57781, tolerance = 0.00001) expect_equal(unname(kurtosis(data.frame(septic_patients$age))), - 6.423118, + 3.57781, tolerance = 0.00001) expect_equal(kurtosis(matrix(septic_patients$age)), - 6.423118, + 3.57781, tolerance = 0.00001) }) diff --git a/tests/testthat/test-mdro.R b/tests/testthat/test-mdro.R index 9433be36..f0982e3b 100755 --- a/tests/testthat/test-mdro.R +++ b/tests/testthat/test-mdro.R @@ -13,7 +13,8 @@ test_that("MDRO works", { expect_equal(outcome %>% class(), c('ordered', 'factor')) # septic_patients should have these finding using Dutch guidelines - expect_equal(outcome %>% freq() %>% pull(count), c(3, 21)) + expect_equal(outcome %>% freq() %>% pull(count), + c(2, 14)) # 2 unconfirmed, 14 positive expect_equal(BRMO(septic_patients, info = FALSE), MDRO(septic_patients, "nl", info = FALSE)) diff --git a/tests/testthat/test-resistance.R b/tests/testthat/test-resistance.R index e93a7208..59a30dd9 100755 --- a/tests/testthat/test-resistance.R +++ b/tests/testthat/test-resistance.R @@ -1,19 +1,19 @@ context("resistance.R") test_that("resistance works", { - # amox resistance in `septic_patients` should be around 57.56% - expect_equal(resistance(septic_patients$amox, include_I = TRUE), 0.5756, tolerance = 0.0001) - expect_equal(susceptibility(septic_patients$amox, include_I = FALSE), 1 - 0.5756, tolerance = 0.0001) + # amox resistance in `septic_patients` should be around 66.33% + expect_equal(resistance(septic_patients$amox, include_I = TRUE), 0.6633, tolerance = 0.0001) + expect_equal(susceptibility(septic_patients$amox, include_I = FALSE), 1 - 0.6633, tolerance = 0.0001) # pita+genta susceptibility around 98.09% expect_equal(susceptibility(septic_patients$pita, septic_patients$gent), - 0.9809, + 0.9535, tolerance = 0.0001) expect_equal(suppressWarnings(rsi(septic_patients$pita, septic_patients$gent, interpretation = "S")), - 0.9809, + 0.9535, tolerance = 0.0001) # count of cases @@ -26,7 +26,7 @@ test_that("resistance works", { combination_p = susceptibility(cipr, gent, as_percent = TRUE), combination_n = n_rsi(cipr, gent)) %>% pull(combination_n), - c(138, 474, 170, 464, 183)) + c(202, 482, 201, 499)) expect_warning(resistance(as.character(septic_patients$amcl))) expect_warning(susceptibility(as.character(septic_patients$amcl))) @@ -36,26 +36,26 @@ test_that("resistance works", { }) test_that("old rsi works", { - # amox resistance in `septic_patients` should be around 53.86% - expect_equal(rsi(septic_patients$amox), 0.5756, tolerance = 0.0001) - expect_equal(rsi(septic_patients$amox), 0.5756, tolerance = 0.0001) + # amox resistance in `septic_patients` should be around 66.33% + expect_equal(rsi(septic_patients$amox), 0.6633, tolerance = 0.0001) + expect_equal(rsi(septic_patients$amox, interpretation = "S"), 1 - 0.6633, tolerance = 0.0001) expect_equal(rsi_df(septic_patients, ab = "amox", info = TRUE), - 0.5756, + 0.6633, tolerance = 0.0001) # pita+genta susceptibility around 98.09% expect_equal(rsi(septic_patients$pita, septic_patients$gent, interpretation = "S", info = TRUE), - 0.9809, + 0.9535, tolerance = 0.0001) expect_equal(rsi_df(septic_patients, ab = c("pita", "gent"), interpretation = "S", info = TRUE), - 0.9809, + 0.9535, tolerance = 0.0001) # more than 2 not allowed expect_error(rsi_df(septic_patients, @@ -76,7 +76,7 @@ test_that("old rsi works", { as_percent = TRUE, warning = FALSE), combination_n = n_rsi(cipr, gent)) %>% pull(combination_n), - c(138, 474, 170, 464, 183)) + c(202, 482, 201, 499)) }) test_that("prediction of rsi works", { @@ -86,8 +86,8 @@ test_that("prediction of rsi works", { col_date = "date", info = TRUE) %>% pull("probR") - # amox resistance will decrease using dataset `septic_patients` - expect_true(amox_R[2] > amox_R[20]) + # amox resistance will increase according to data set `septic_patients` + expect_true(amox_R[3] < amox_R[20]) expect_output(rsi_predict(tbl = filter(septic_patients, bactid == "ESCCOL"), model = "binomial", diff --git a/tests/testthat/test-skewness.R b/tests/testthat/test-skewness.R index cd7beff8..96b0ef6e 100644 --- a/tests/testthat/test-skewness.R +++ b/tests/testthat/test-skewness.R @@ -2,12 +2,12 @@ context("skewness.R") test_that("skewness works", { expect_equal(skewness(septic_patients$age), - -1.637164, + -0.90624, tolerance = 0.00001) expect_equal(unname(skewness(data.frame(septic_patients$age))), - -1.637164, + -0.90624, tolerance = 0.00001) expect_equal(skewness(matrix(septic_patients$age)), - -1.637164, + -0.90624, tolerance = 0.00001) })