diff --git a/DESCRIPTION b/DESCRIPTION index b70bf118..109c4b93 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 0.3.0 -Date: 2018-08-14 +Version: 0.3.0.9001 +Date: 2018-08-21 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/NAMESPACE b/NAMESPACE index 7ca07bdf..749b2bd9 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -43,6 +43,12 @@ export(atc_groups) export(atc_property) export(clipboard_export) export(clipboard_import) +export(count_I) +export(count_IR) +export(count_R) +export(count_S) +export(count_SI) +export(count_df) export(facet_rsi) export(first_isolate) export(freq) @@ -58,6 +64,7 @@ export(interpretive_reading) export(is.bactid) export(is.mic) export(is.rsi) +export(is.rsi.eligible) export(key_antibiotics) export(key_antibiotics_equal) export(kurtosis) @@ -137,6 +144,7 @@ importFrom(dplyr,progress_estimated) importFrom(dplyr,pull) importFrom(dplyr,row_number) importFrom(dplyr,select) +importFrom(dplyr,select_if) importFrom(dplyr,slice) importFrom(dplyr,summarise) importFrom(dplyr,summarise_if) diff --git a/NEWS.md b/NEWS.md index 0254c2a6..8f91e574 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,15 @@ -# 0.3.0 +# 0.3.0.90xx (latest development version) + +#### New +* Functions `count_R`, `count_IR`, `count_I`, `count_SI` and `count_S` to selectively count resistant or susceptibile isolates +* Function `is.rsi.eligible` to check for columns that have valid antimicrobial results, but do not have the `rsi` class yet. Transform the columns of your raw data with: `data %>% mutate_at(is.rsi.eligible, as.rsi)` + +#### Changed +* Added parameters `minimum` and `as_percent` to `portion_df` +* Edited `ggplot_rsi` and `geom_rsi` so they can cope with `count_df`. The new `fun` parameter has value `portion_df` at default, but can be set to `count_df`. +* Fix for `ggplot_rsi` when the `ggplot2` was not loaded + +# 0.3.0 (latest stable version) **Published on CRAN: 2018-08-14** #### New diff --git a/R/classes.R b/R/classes.R index 0962e240..301e91df 100755 --- a/R/classes.R +++ b/R/classes.R @@ -21,6 +21,7 @@ #' This transforms a vector to a new class \code{rsi}, which is an ordered factor with levels \code{S < I < R}. Invalid antimicrobial interpretations will be translated as \code{NA} with a warning. #' @rdname as.rsi #' @param x vector +#' @details The function \code{is.rsi.eligible} returns \code{TRUE} when a columns contains only valid antimicrobial interpretations (S and/or I and/or R), and \code{FALSE} otherwise. #' @return Ordered factor with new class \code{rsi} and new attribute \code{package} #' @keywords rsi #' @export @@ -37,6 +38,12 @@ #' plot(rsi_data) # for percentages #' barplot(rsi_data) # for frequencies #' freq(rsi_data) # frequency table with informative header +#' +#' # fastest way to transform all columns with already valid AB results to class `rsi`: +#' library(dplyr) +#' septic_patients %>% +#' mutate_if(is.rsi.eligible, +#' as.rsi) as.rsi <- function(x) { if (is.rsi(x)) { x @@ -88,6 +95,18 @@ is.rsi <- function(x) { class(x) %>% identical(c('rsi', 'ordered', 'factor')) } +#' @rdname as.rsi +#' @export +#' @importFrom dplyr %>% +is.rsi.eligible <- function(x) { + distinct_val <- x %>% unique() %>% sort() %>% as.character() + distinct_val <- distinct_val[!is.na(distinct_val) & trimws(distinct_val) != ""] + distinct_val_rsi <- as.character(suppressWarnings(as.rsi(distinct_val))) + + length(distinct_val) > 0 & + identical(distinct_val, distinct_val_rsi) +} + #' @exportMethod print.rsi #' @export #' @importFrom dplyr %>% diff --git a/R/count.R b/R/count.R new file mode 100644 index 00000000..d78cadf8 --- /dev/null +++ b/R/count.R @@ -0,0 +1,199 @@ +# ==================================================================== # +# TITLE # +# Antimicrobial Resistance (AMR) Analysis # +# # +# AUTHORS # +# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) # +# # +# LICENCE # +# This program is free software; you can redistribute it and/or modify # +# it under the terms of the GNU General Public License version 2.0, # +# as published by the Free Software Foundation. # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# ==================================================================== # + +#' Count isolates +#' +#' @description These functions can be used to count resistant/susceptible microbial isolates. All functions can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}. +#' +#' \code{count_R} and \code{count_IR} can be used to count resistant isolates, \code{count_S} and \code{count_SI} can be used to count susceptible isolates.\cr +#' @inheritParams portion +#' @details \strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set. +#' +#' These functions are meant to count isolates. Use the \code{\link{portion}_*} functions to calculate microbial resistance. +#' +#' \code{count_df} takes any variable from \code{data} that has an \code{"rsi"} class (created with \code{\link{as.rsi}}) and counts the amounts of R, I and S. The resulting \emph{tidy data} (see Source) \code{data.frame} will have three rows (S/I/R) and a column for each variable with class \code{"rsi"}. +#' @source Wickham H. \strong{Tidy Data.} The Journal of Statistical Software, vol. 59, 2014. \url{http://vita.had.co.nz/papers/tidy-data.html} +#' @seealso \code{\link{portion}_*} to calculate microbial resistance and susceptibility.\cr +#' \code{\link{n_rsi}} to count all cases where antimicrobial results are available. +#' @keywords resistance susceptibility rsi antibiotics isolate isolates +#' @return Integer +#' @rdname count +#' @name count +#' @export +#' @examples +#' # septic_patients is a data set available in the AMR package. It is true, genuine data. +#' ?septic_patients +#' +#' # Count resistant isolates +#' count_R(septic_patients$amox) +#' count_IR(septic_patients$amox) +#' +#' # Or susceptibile isolates +#' count_S(septic_patients$amox) +#' count_SI(septic_patients$amox) +#' +#' # Since n_rsi counts available isolates, you can +#' # calculate back to count e.g. non-susceptible isolates. +#' # This results in the same: +#' count_IR(septic_patients$amox) +#' portion_IR(septic_patients$amox) * n_rsi(septic_patients$amox) +#' +#' library(dplyr) +#' septic_patients %>% +#' group_by(hospital_id) %>% +#' summarise(R = count_R(cipr), +#' I = count_I(cipr), +#' S = count_S(cipr), +#' n = n_rsi(cipr), # the actual total; sum of all three +#' total = n()) # NOT the amount of tested isolates! +#' +#' # Count co-resistance between amoxicillin/clav acid and gentamicin, +#' # so we can see that combination therapy does a lot more than mono therapy. +#' # Please mind that `portion_S` calculates percentages right away instead. +#' count_S(septic_patients$amcl) # S = 1056 (67.3%) +#' n_rsi(septic_patients$amcl) # n = 1570 +#' +#' count_S(septic_patients$gent) # S = 1363 (74.0%) +#' n_rsi(septic_patients$gent) # n = 1842 +#' +#' with(septic_patients, +#' count_S(amcl, gent)) # S = 1385 (92.1%) +#' with(septic_patients, # n = 1504 +#' n_rsi(amcl, gent)) +#' +#' # Get portions S/I/R immediately of all rsi columns +#' septic_patients %>% +#' select(amox, cipr) %>% +#' count_df(translate = FALSE) +#' +#' # It also supports grouping variables +#' septic_patients %>% +#' select(hospital_id, amox, cipr) %>% +#' group_by(hospital_id) %>% +#' count_df(translate = FALSE) +#' +count_R <- function(ab1, + ab2 = NULL) { + rsi_calc(type = "R", + ab1 = ab1, + ab2 = ab2, + include_I = FALSE, + minimum = 0, + as_percent = FALSE, + only_count = TRUE) +} + +#' @rdname count +#' @export +count_IR <- function(ab1, + ab2 = NULL) { + rsi_calc(type = "R", + ab1 = ab1, + ab2 = ab2, + include_I = TRUE, + minimum = 0, + as_percent = FALSE, + only_count = TRUE) +} + +#' @rdname count +#' @export +count_I <- function(ab1) { + rsi_calc(type = "I", + ab1 = ab1, + ab2 = NULL, + include_I = FALSE, + minimum = 0, + as_percent = FALSE, + only_count = TRUE) +} + +#' @rdname count +#' @export +count_SI <- function(ab1, + ab2 = NULL) { + rsi_calc(type = "S", + ab1 = ab1, + ab2 = ab2, + include_I = TRUE, + minimum = 0, + as_percent = FALSE, + only_count = TRUE) +} + +#' @rdname count +#' @export +count_S <- function(ab1, + ab2 = NULL) { + rsi_calc(type = "S", + ab1 = ab1, + ab2 = ab2, + include_I = FALSE, + minimum = 0, + as_percent = FALSE, + only_count = TRUE) +} + +#' @rdname count +#' @importFrom dplyr %>% select_if bind_rows summarise_if mutate group_vars select everything +#' @export +count_df <- function(data, + translate_ab = getOption("get_antibiotic_names", "official")) { + + if (data %>% select_if(is.rsi) %>% ncol() == 0) { + stop("No columns with class 'rsi' found. See ?as.rsi.") + } + + if (as.character(translate_ab) == "TRUE") { + translate_ab <- "official" + } + options(get_antibiotic_names = translate_ab) + + resS <- summarise_if(.tbl = data, + .predicate = is.rsi, + .funs = count_S) %>% + mutate(Interpretation = "S") %>% + select(Interpretation, everything()) + + resI <- summarise_if(.tbl = data, + .predicate = is.rsi, + .funs = count_I) %>% + mutate(Interpretation = "I") %>% + select(Interpretation, everything()) + + resR <- summarise_if(.tbl = data, + .predicate = is.rsi, + .funs = count_R) %>% + mutate(Interpretation = "R") %>% + select(Interpretation, everything()) + + data.groups <- group_vars(data) + + res <- bind_rows(resS, resI, resR) %>% + mutate(Interpretation = factor(Interpretation, levels = c("R", "I", "S"), ordered = TRUE)) %>% + tidyr::gather(Antibiotic, Count, -Interpretation, -data.groups) + + if (!translate_ab == FALSE) { + if (!tolower(translate_ab) %in% tolower(colnames(AMR::antibiotics))) { + stop("Parameter `translate_ab` does not occur in the `antibiotics` data set.", call. = FALSE) + } + res <- res %>% mutate(Antibiotic = abname(Antibiotic, from = "guess", to = translate_ab)) + } + + res +} diff --git a/R/ggplot_rsi.R b/R/ggplot_rsi.R index e2fd020b..fea5588b 100644 --- a/R/ggplot_rsi.R +++ b/R/ggplot_rsi.R @@ -20,16 +20,17 @@ #' #' Use these functions to create bar plots for antimicrobial resistance analysis. All functions rely on internal \code{\link[ggplot2]{ggplot}} functions. #' @param data a \code{data.frame} with column(s) of class \code{"rsi"} (see \code{\link{as.rsi}}) -#' @param position position adjustment of bars, either \code{"stack"} (default) or \code{"dodge"} +#' @param position position adjustment of bars, either \code{"stack"} (default when \code{fun} is \code{\link{portion_df}}) or \code{"dodge"} (default when \code{fun} is \code{\link{count_df}}) #' @param x variable to show on x axis, either \code{"Antibiotic"} (default) or \code{"Interpretation"} or a grouping variable #' @param fill variable to categorise using the plots legend, either \code{"Antibiotic"} (default) or \code{"Interpretation"} or a grouping variable #' @param facet variable to split plots by, either \code{"Interpretation"} (default) or \code{"Antibiotic"} or a grouping variable #' @param translate_ab a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations into, using \code{\link{abname}}. Default behaviour is to translate to official names according to the WHO. Use \code{translate_ab = FALSE} to disable translation. +#' @param fun function to transform \code{data}, either \code{\link{portion_df}} (default) or \code{\link{count_df}} #' @param ... other parameters passed on to \code{\link[ggplot2]{facet_wrap}} #' @details At default, the names of antibiotics will be shown on the plots using \code{\link{abname}}. This can be set with the option \code{get_antibiotic_names} (a logical value), so change it e.g. to \code{FALSE} with \code{options(get_antibiotic_names = FALSE)}. #' #' \strong{The functions}\cr -#' \code{geom_rsi} will take any variable from the data that has an \code{rsi} class (created with \code{\link{as.rsi}}) using \code{\link{portion_df}} and will plot bars with the percentage R, I and S. The default behaviour is to have the bars stacked and to have the different antibiotics on the x axis. +#' \code{geom_rsi} will take any variable from the data that has an \code{rsi} class (created with \code{\link{as.rsi}}) using \code{fun} (\code{\link{portion_df}} at default, could also be \code{\link{count_df}}) and will plot bars with the percentage R, I and S. The default behaviour is to have the bars stacked and to have the different antibiotics on the x axis. #' #' \code{facet_rsi} creates 2d plots (at default based on S/I/R) using \code{\link[ggplot2]{facet_wrap}}. #' @@ -63,6 +64,11 @@ #' septic_patients %>% #' select(amox, nitr, fosf, trim, cipr) %>% #' ggplot_rsi() +#' +#' # get counts instead of percentages: +#' septic_patients %>% +#' select(amox, nitr, fosf, trim, cipr) %>% +#' ggplot_rsi(fun = count_df) #' \donttest{ #' # it also supports groups (don't forget to use the group on `x` or `facet`): #' septic_patients %>% @@ -102,26 +108,35 @@ #' x = "Microorganisms") #' } ggplot_rsi <- function(data, - position = "stack", + position = NULL, x = "Antibiotic", fill = "Interpretation", facet = NULL, translate_ab = "official", + fun = portion_df, ...) { if (!"ggplot2" %in% rownames(installed.packages())) { stop('this function requires the ggplot2 package.', call. = FALSE) } + fun_name <- deparse(substitute(fun)) + if (!fun_name %in% c("portion_df", "count_df")) { + stop("`fun` must be portion_df or count_df") + } + p <- ggplot2::ggplot(data = data) + - geom_rsi(position = position, x = x, fill = fill, translate_ab = translate_ab) + - scale_y_percent() + + geom_rsi(position = position, x = x, fill = fill, translate_ab = translate_ab, fun = fun) + theme_rsi() if (fill == "Interpretation") { # set RSI colours p <- p + scale_rsi_colours() } + if (fun_name == "portion_df") { + # portions, so use y scale with percentage + p <- p + scale_y_percent() + } if (!is.null(facet)) { p <- p + facet_rsi(facet = facet, ...) @@ -132,10 +147,27 @@ ggplot_rsi <- function(data, #' @rdname ggplot_rsi #' @export -geom_rsi <- function(position = "stack", +geom_rsi <- function(position = NULL, x = c("Antibiotic", "Interpretation"), fill = "Interpretation", - translate_ab = "official") { + translate_ab = "official", + fun = portion_df) { + + fun_name <- deparse(substitute(fun)) + if (!fun_name %in% c("portion_df", "count_df", "fun")) { + stop("`fun` must be portion_df or count_df") + } + if (identical(fun, count_df)) { + y <- "Count" + if (missing(position) | is.null(position)) { + position <- "dodge" + } + } else { + y <- "Percentage" + if (missing(position) | is.null(position)) { + position <- "stack" + } + } x <- x[1] if (x %in% tolower(c('ab', 'antibiotic', 'abx', 'antibiotics'))) { @@ -147,8 +179,8 @@ geom_rsi <- function(position = "stack", options(get_antibiotic_names = translate_ab) ggplot2::layer(geom = "bar", stat = "identity", position = position, - mapping = ggplot2::aes_string(x = x, y = "Percentage", fill = fill), - data = AMR::portion_df, params = list()) + mapping = ggplot2::aes_string(x = x, y = y, fill = fill), + data = fun, params = list()) } @@ -163,14 +195,13 @@ facet_rsi <- function(facet = c("Interpretation", "Antibiotic"), ...) { facet <- "Antibiotic" } - ggplot2::facet_wrap(facets = facet, scales = "free", ...) + ggplot2::facet_wrap(facets = facet, scales = "free_x", ...) } #' @rdname ggplot_rsi #' @export scale_y_percent <- function() { - ggplot2::scale_y_continuous(name = "Percentage", - breaks = seq(0, 1, 0.1), + ggplot2::scale_y_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 1), labels = percent(seq(0, 1, 0.1))) } @@ -184,8 +215,8 @@ scale_rsi_colours <- function() { #' @rdname ggplot_rsi #' @export theme_rsi <- function() { - theme_minimal() + - theme(panel.grid.major.x = element_blank(), - panel.grid.minor = element_blank(), - panel.grid.major.y = element_line(colour = "grey75")) + ggplot2::theme_minimal() + + ggplot2::theme(panel.grid.major.x = ggplot2::element_blank(), + panel.grid.minor = ggplot2::element_blank(), + panel.grid.major.y = ggplot2::element_line(colour = "grey75")) } diff --git a/R/globals.R b/R/globals.R index 32afe883..7c765670 100755 --- a/R/globals.R +++ b/R/globals.R @@ -32,6 +32,7 @@ globalVariables(c('abname', 'C_chisq_sim', 'cnt', 'count', + 'Count', 'counts', 'cum_count', 'cum_percent', diff --git a/R/misc.R b/R/misc.R index e8ff46a4..f5097fa3 100755 --- a/R/misc.R +++ b/R/misc.R @@ -138,21 +138,18 @@ tbl_parse_guess <- function(tbl, asciify = FALSE)) } if (any(tbl %>% pull(i) %>% class() %in% c('factor', 'character'))) { - # get values - distinct_val <- tbl %>% pull(i) %>% unique() %>% sort() if (remove_ASCII_escape_char == TRUE) { # remove ASCII escape character: https://en.wikipedia.org/wiki/Escape_character#ASCII_escape_character tbl[, i] <- tbl %>% pull(i) %>% gsub('\033', ' ', ., fixed = TRUE) } - # look for RSI, shouldn't all be "" and must be valid antibiotic interpretations - if (!all(distinct_val[!is.na(distinct_val)] == '') - & all(distinct_val[!is.na(distinct_val)] %in% c('', 'I', 'I;I', 'R', 'R;R', 'S', 'S;S'))) { - tbl[, i] <- tbl %>% pull(i) %>% as.rsi() + if (tbl %>% pull(i) %>% is.rsi.eligible()) { + # look for RSI + tbl[, i] <- as.rsi(tbl[, i]) } } - # convert to MIC class + # convert to MIC class when ends on `_mic` if (colnames(tbl)[i] %like% '_mic$') { - tbl[, i] <- tbl %>% pull(i) %>% as.mic() + tbl[, i] <- as.mic(tbl[, i]) } } tbl diff --git a/R/portion.R b/R/portion.R index 367b8f60..c4636b6e 100755 --- a/R/portion.R +++ b/R/portion.R @@ -24,11 +24,13 @@ #' @param ab1 vector of antibiotic interpretations, they will be transformed internally with \code{\link{as.rsi}} if needed #' @param ab2 like \code{ab}, a vector of antibiotic interpretations. Use this to calculate (the lack of) co-resistance: the probability where one of two drugs have a resistant or susceptible result. See Examples. #' @param minimum minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA}. The default number of \code{30} isolates is advised by the CLSI as best practice, see Source. -#' @param as_percent logical to indicate whether the output must be returned as percent (text), will else be a double -#' @param data a code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}}) +#' @param as_percent logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}. +#' @param data a \code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}}) #' @param translate_ab a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations to, using \code{\link{abname}}. This can be set with \code{\link{getOption}("get_antibiotic_names")}. #' @details \strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set. #' +#' These functions are not meant to count isolates, but to calculate the portion of resistance/susceptibility. If a column has been transformed with \code{\link{as.rsi}}, just use e.g. \code{isolates[isolates == "R"]} to get the resistant ones. You could then calculate the \code{\link{length}} of it. +#' #' \code{portion_df} takes any variable from \code{data} that has an \code{"rsi"} class (created with \code{\link{as.rsi}}) and calculates the portions R, I and S. The resulting \emph{tidy data} (see Source) \code{data.frame} will have three rows (S/I/R) and a column for each variable with class \code{"rsi"}. #' #' The old \code{\link{rsi}} function is still available for backwards compatibility but is deprecated. @@ -47,7 +49,8 @@ #' @source \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition}, 2014, \emph{Clinical and Laboratory Standards Institute (CLSI)}. \url{https://clsi.org/standards/products/microbiology/documents/m39/}. #' #' Wickham H. \strong{Tidy Data.} The Journal of Statistical Software, vol. 59, 2014. \url{http://vita.had.co.nz/papers/tidy-data.html} -#' @seealso \code{\link{n_rsi}} to count cases with antimicrobial results. +#' @seealso \code{\link[AMR]{count}_*} to count resistant and susceptibile isolates.\cr +#' \code{\link{n_rsi}} to count all cases where antimicrobial results are available. #' @keywords resistance susceptibility rsi_df rsi antibiotics isolate isolates #' @return Double or, when \code{as_percent = TRUE}, a character. #' @rdname portion @@ -135,7 +138,8 @@ portion_R <- function(ab1, ab2 = ab2, include_I = FALSE, minimum = minimum, - as_percent = as_percent) + as_percent = as_percent, + only_count = FALSE) } #' @rdname portion @@ -149,7 +153,8 @@ portion_IR <- function(ab1, ab2 = ab2, include_I = TRUE, minimum = minimum, - as_percent = as_percent) + as_percent = as_percent, + only_count = FALSE) } #' @rdname portion @@ -162,7 +167,8 @@ portion_I <- function(ab1, ab2 = NULL, include_I = FALSE, minimum = minimum, - as_percent = as_percent) + as_percent = as_percent, + only_count = FALSE) } #' @rdname portion @@ -176,7 +182,8 @@ portion_SI <- function(ab1, ab2 = ab2, include_I = TRUE, minimum = minimum, - as_percent = as_percent) + as_percent = as_percent, + only_count = FALSE) } #' @rdname portion @@ -190,13 +197,21 @@ portion_S <- function(ab1, ab2 = ab2, include_I = FALSE, minimum = minimum, - as_percent = as_percent) + as_percent = as_percent, + only_count = FALSE) } #' @rdname portion -#' @importFrom dplyr bind_rows summarise_if mutate group_vars select everything +#' @importFrom dplyr %>% select_if bind_rows summarise_if mutate group_vars select everything #' @export -portion_df <- function(data, translate_ab = getOption("get_antibiotic_names", "official")) { +portion_df <- function(data, + translate_ab = getOption("get_antibiotic_names", "official"), + minimum = 30, + as_percent = FALSE) { + + if (data %>% select_if(is.rsi) %>% ncol() == 0) { + stop("No columns with class 'rsi' found. See ?as.rsi.") + } if (as.character(translate_ab) == "TRUE") { translate_ab <- "official" @@ -205,19 +220,25 @@ portion_df <- function(data, translate_ab = getOption("get_antibiotic_names", "o resS <- summarise_if(.tbl = data, .predicate = is.rsi, - .funs = portion_S) %>% + .funs = portion_S, + minimum = minimum, + as_percent = as_percent) %>% mutate(Interpretation = "S") %>% select(Interpretation, everything()) resI <- summarise_if(.tbl = data, .predicate = is.rsi, - .funs = portion_I) %>% + .funs = portion_I, + minimum = minimum, + as_percent = as_percent) %>% mutate(Interpretation = "I") %>% select(Interpretation, everything()) resR <- summarise_if(.tbl = data, .predicate = is.rsi, - .funs = portion_R) %>% + .funs = portion_R, + minimum = minimum, + as_percent = as_percent) %>% mutate(Interpretation = "R") %>% select(Interpretation, everything()) @@ -242,7 +263,8 @@ rsi_calc <- function(type, ab2, include_I, minimum, - as_percent) { + as_percent, + only_count) { if (NCOL(ab1) > 1) { stop('`ab1` must be a vector of antimicrobial interpretations', call. = FALSE) @@ -284,11 +306,6 @@ rsi_calc <- function(type, warning("Increase speed by transforming to class `rsi` on beforehand: df %>% mutate_at(vars(col10:col20), as.rsi)") } - total <- length(x) - sum(is.na(x)) - if (total < minimum) { - return(NA) - } - if (type == "S") { found <- sum(as.integer(x) <= 1 + include_I, na.rm = TRUE) } else if (type == "I") { @@ -299,6 +316,15 @@ rsi_calc <- function(type, stop("invalid type") } + if (only_count == TRUE) { + return(found) + } + + total <- length(x) - sum(is.na(x)) + if (total < minimum) { + return(NA) + } + if (as_percent == TRUE) { percent(found / total, force_zero = TRUE) } else { diff --git a/README.md b/README.md index 078e2f2c..8ce33d2f 100755 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ With the `MDRO` function (abbreviation of Multi Drug Resistant Organisms), you c **Read all changes and new functions in [NEWS.md](NEWS.md).** ## How to get it? -This package [is published on CRAN](http://cran.r-project.org/package=AMR), the official R network. +All versions of this package [are published on CRAN](http://cran.r-project.org/package=AMR), the official R network with a peer-reviewed submission process. ### Install from CRAN [![CRAN_Badge](https://www.r-pkg.org/badges/version/AMR)](http://cran.r-project.org/package=AMR) [![CRAN_Downloads](https://cranlogs.r-pkg.org/badges/grand-total/AMR)](http://cran.r-project.org/package=AMR) @@ -87,10 +87,16 @@ This package [is published on CRAN](http://cran.r-project.org/package=AMR), the [![Last_Commit](https://img.shields.io/github/last-commit/msberends/AMR.svg)](https://github.com/msberends/AMR/commits/master) [![Code_Coverage](https://codecov.io/gh/msberends/AMR/branch/master/graph/badge.svg)](https://codecov.io/gh/msberends/AMR) -```r install.packages("devtools") +```r +install.packages("devtools") devtools::install_github("msberends/AMR") ``` +### Install from Zenodo +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1305355.svg)](https://doi.org/10.5281/zenodo.1305355) + +This package was also published on Zenodo: https://doi.org/10.5281/zenodo.1305355 + ## How to use it? ```r # Call it with: diff --git a/man/as.rsi.Rd b/man/as.rsi.Rd index 0c6f4d76..bb3549ff 100755 --- a/man/as.rsi.Rd +++ b/man/as.rsi.Rd @@ -3,11 +3,14 @@ \name{as.rsi} \alias{as.rsi} \alias{is.rsi} +\alias{is.rsi.eligible} \title{Class 'rsi'} \usage{ as.rsi(x) is.rsi(x) + +is.rsi.eligible(x) } \arguments{ \item{x}{vector} @@ -18,6 +21,9 @@ Ordered factor with new class \code{rsi} and new attribute \code{package} \description{ This transforms a vector to a new class \code{rsi}, which is an ordered factor with levels \code{S < I < R}. Invalid antimicrobial interpretations will be translated as \code{NA} with a warning. } +\details{ +The function \code{is.rsi.eligible} returns \code{TRUE} when a columns contains only valid antimicrobial interpretations (S and/or I and/or R), and \code{FALSE} otherwise. +} \examples{ rsi_data <- as.rsi(c(rep("S", 474), rep("I", 36), rep("R", 370))) rsi_data <- as.rsi(c(rep("S", 474), rep("I", 36), rep("R", 370), "A", "B", "C")) @@ -29,6 +35,12 @@ as.rsi("<= 0.002; S") # will return S plot(rsi_data) # for percentages barplot(rsi_data) # for frequencies freq(rsi_data) # frequency table with informative header + +# fastest way to transform all columns with already valid AB results to class `rsi`: +library(dplyr) +septic_patients \%>\% + mutate_if(is.rsi.eligible, + as.rsi) } \seealso{ \code{\link{as.mic}} diff --git a/man/count.Rd b/man/count.Rd new file mode 100644 index 00000000..68b9c12d --- /dev/null +++ b/man/count.Rd @@ -0,0 +1,115 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/count.R +\name{count} +\alias{count} +\alias{count_R} +\alias{count_IR} +\alias{count_I} +\alias{count_SI} +\alias{count_S} +\alias{count_df} +\title{Count isolates} +\source{ +Wickham H. \strong{Tidy Data.} The Journal of Statistical Software, vol. 59, 2014. \url{http://vita.had.co.nz/papers/tidy-data.html} +} +\usage{ +count_R(ab1, ab2 = NULL) + +count_IR(ab1, ab2 = NULL) + +count_I(ab1) + +count_SI(ab1, ab2 = NULL) + +count_S(ab1, ab2 = NULL) + +count_df(data, translate_ab = getOption("get_antibiotic_names", + "official")) +} +\arguments{ +\item{ab1}{vector of antibiotic interpretations, they will be transformed internally with \code{\link{as.rsi}} if needed} + +\item{ab2}{like \code{ab}, a vector of antibiotic interpretations. Use this to calculate (the lack of) co-resistance: the probability where one of two drugs have a resistant or susceptible result. See Examples.} + +\item{data}{a \code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}})} + +\item{translate_ab}{a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations to, using \code{\link{abname}}. This can be set with \code{\link{getOption}("get_antibiotic_names")}.} +} +\value{ +Integer +} +\description{ +These functions can be used to count resistant/susceptible microbial isolates. All functions can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}. + +\code{count_R} and \code{count_IR} can be used to count resistant isolates, \code{count_S} and \code{count_SI} can be used to count susceptible isolates.\cr +} +\details{ +\strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set. + +These functions are meant to count isolates. Use the \code{\link{portion}_*} functions to calculate microbial resistance. + +\code{count_df} takes any variable from \code{data} that has an \code{"rsi"} class (created with \code{\link{as.rsi}}) and counts the amounts of R, I and S. The resulting \emph{tidy data} (see Source) \code{data.frame} will have three rows (S/I/R) and a column for each variable with class \code{"rsi"}. +} +\examples{ +# septic_patients is a data set available in the AMR package. It is true, genuine data. +?septic_patients + +# Count resistant isolates +count_R(septic_patients$amox) +count_IR(septic_patients$amox) + +# Or susceptibile isolates +count_S(septic_patients$amox) +count_SI(septic_patients$amox) + +# Since n_rsi counts available isolates, you can +# calculate back to count e.g. non-susceptible isolates. +# This results in the same: +count_IR(septic_patients$amox) +portion_IR(septic_patients$amox) * n_rsi(septic_patients$amox) + +library(dplyr) +septic_patients \%>\% + group_by(hospital_id) \%>\% + summarise(R = count_R(cipr), + I = count_I(cipr), + S = count_S(cipr), + n = n_rsi(cipr), # the actual total; sum of all three + total = n()) # NOT the amount of tested isolates! + +# Count co-resistance between amoxicillin/clav acid and gentamicin, +# so we can see that combination therapy does a lot more than mono therapy. +# Please mind that `portion_S` calculates percentages right away instead. +count_S(septic_patients$amcl) # S = 1056 (67.3\%) +n_rsi(septic_patients$amcl) # n = 1570 + +count_S(septic_patients$gent) # S = 1363 (74.0\%) +n_rsi(septic_patients$gent) # n = 1842 + +with(septic_patients, + count_S(amcl, gent)) # S = 1385 (92.1\%) +with(septic_patients, # n = 1504 + n_rsi(amcl, gent)) + +# Get portions S/I/R immediately of all rsi columns +septic_patients \%>\% + select(amox, cipr) \%>\% + count_df(translate = FALSE) + +# It also supports grouping variables +septic_patients \%>\% + select(hospital_id, amox, cipr) \%>\% + group_by(hospital_id) \%>\% + count_df(translate = FALSE) + +} +\seealso{ +\code{\link{portion}_*} to calculate microbial resistance and susceptibility.\cr +\code{\link{n_rsi}} to count all cases where antimicrobial results are available. +} +\keyword{antibiotics} +\keyword{isolate} +\keyword{isolates} +\keyword{resistance} +\keyword{rsi} +\keyword{susceptibility} diff --git a/man/ggplot_rsi.Rd b/man/ggplot_rsi.Rd index e8a59727..af6d23be 100644 --- a/man/ggplot_rsi.Rd +++ b/man/ggplot_rsi.Rd @@ -9,12 +9,13 @@ \alias{theme_rsi} \title{AMR bar plots with \code{ggplot}} \usage{ -ggplot_rsi(data, position = "stack", x = "Antibiotic", +ggplot_rsi(data, position = NULL, x = "Antibiotic", fill = "Interpretation", facet = NULL, translate_ab = "official", - ...) + fun = portion_df, ...) -geom_rsi(position = "stack", x = c("Antibiotic", "Interpretation"), - fill = "Interpretation", translate_ab = "official") +geom_rsi(position = NULL, x = c("Antibiotic", "Interpretation"), + fill = "Interpretation", translate_ab = "official", + fun = portion_df) facet_rsi(facet = c("Interpretation", "Antibiotic"), ...) @@ -27,7 +28,7 @@ theme_rsi() \arguments{ \item{data}{a \code{data.frame} with column(s) of class \code{"rsi"} (see \code{\link{as.rsi}})} -\item{position}{position adjustment of bars, either \code{"stack"} (default) or \code{"dodge"}} +\item{position}{position adjustment of bars, either \code{"stack"} (default when \code{fun} is \code{\link{portion_df}}) or \code{"dodge"} (default when \code{fun} is \code{\link{count_df}})} \item{x}{variable to show on x axis, either \code{"Antibiotic"} (default) or \code{"Interpretation"} or a grouping variable} @@ -37,6 +38,8 @@ theme_rsi() \item{translate_ab}{a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations into, using \code{\link{abname}}. Default behaviour is to translate to official names according to the WHO. Use \code{translate_ab = FALSE} to disable translation.} +\item{fun}{function to transform \code{data}, either \code{\link{portion_df}} (default) or \code{\link{count_df}}} + \item{...}{other parameters passed on to \code{\link[ggplot2]{facet_wrap}}} } \description{ @@ -46,7 +49,7 @@ Use these functions to create bar plots for antimicrobial resistance analysis. A At default, the names of antibiotics will be shown on the plots using \code{\link{abname}}. This can be set with the option \code{get_antibiotic_names} (a logical value), so change it e.g. to \code{FALSE} with \code{options(get_antibiotic_names = FALSE)}. \strong{The functions}\cr -\code{geom_rsi} will take any variable from the data that has an \code{rsi} class (created with \code{\link{as.rsi}}) using \code{\link{portion_df}} and will plot bars with the percentage R, I and S. The default behaviour is to have the bars stacked and to have the different antibiotics on the x axis. +\code{geom_rsi} will take any variable from the data that has an \code{rsi} class (created with \code{\link{as.rsi}}) using \code{fun} (\code{\link{portion_df}} at default, could also be \code{\link{count_df}}) and will plot bars with the percentage R, I and S. The default behaviour is to have the bars stacked and to have the different antibiotics on the x axis. \code{facet_rsi} creates 2d plots (at default based on S/I/R) using \code{\link[ggplot2]{facet_wrap}}. @@ -79,6 +82,11 @@ ggplot(df) + septic_patients \%>\% select(amox, nitr, fosf, trim, cipr) \%>\% ggplot_rsi() + +# get counts instead of percentages: +septic_patients \%>\% + select(amox, nitr, fosf, trim, cipr) \%>\% + ggplot_rsi(fun = count_df) \donttest{ # it also supports groups (don't forget to use the group on `x` or `facet`): septic_patients \%>\% diff --git a/man/portion.Rd b/man/portion.Rd index 9a7c26bf..80477540 100644 --- a/man/portion.Rd +++ b/man/portion.Rd @@ -26,7 +26,7 @@ portion_SI(ab1, ab2 = NULL, minimum = 30, as_percent = FALSE) portion_S(ab1, ab2 = NULL, minimum = 30, as_percent = FALSE) portion_df(data, translate_ab = getOption("get_antibiotic_names", - "official")) + "official"), minimum = 30, as_percent = FALSE) } \arguments{ \item{ab1}{vector of antibiotic interpretations, they will be transformed internally with \code{\link{as.rsi}} if needed} @@ -35,9 +35,9 @@ portion_df(data, translate_ab = getOption("get_antibiotic_names", \item{minimum}{minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA}. The default number of \code{30} isolates is advised by the CLSI as best practice, see Source.} -\item{as_percent}{logical to indicate whether the output must be returned as percent (text), will else be a double} +\item{as_percent}{logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}.} -\item{data}{a code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}})} +\item{data}{a \code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}})} \item{translate_ab}{a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations to, using \code{\link{abname}}. This can be set with \code{\link{getOption}("get_antibiotic_names")}.} } @@ -52,6 +52,8 @@ These functions can be used to calculate the (co-)resistance of microbial isolat \details{ \strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set. +These functions are not meant to count isolates, but to calculate the portion of resistance/susceptibility. If a column has been transformed with \code{\link{as.rsi}}, just use e.g. \code{isolates[isolates == "R"]} to get the resistant ones. You could then calculate the \code{\link{length}} of it. + \code{portion_df} takes any variable from \code{data} that has an \code{"rsi"} class (created with \code{\link{as.rsi}}) and calculates the portions R, I and S. The resulting \emph{tidy data} (see Source) \code{data.frame} will have three rows (S/I/R) and a column for each variable with class \code{"rsi"}. The old \code{\link{rsi}} function is still available for backwards compatibility but is deprecated. @@ -143,7 +145,8 @@ my_table \%>\% } } \seealso{ -\code{\link{n_rsi}} to count cases with antimicrobial results. +\code{\link[AMR]{count}_*} to count resistant and susceptibile isolates.\cr +\code{\link{n_rsi}} to count all cases where antimicrobial results are available. } \keyword{antibiotics} \keyword{isolate} diff --git a/man/rsi.Rd b/man/rsi.Rd index 7643844e..41de4bb1 100644 --- a/man/rsi.Rd +++ b/man/rsi.Rd @@ -16,7 +16,7 @@ rsi(ab1, ab2 = NULL, interpretation = "IR", minimum = 30, \item{minimum}{minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA}. The default number of \code{30} isolates is advised by the CLSI as best practice, see Source.} -\item{as_percent}{logical to indicate whether the output must be returned as percent (text), will else be a double} +\item{as_percent}{logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}.} \item{...}{deprecated parameters to support usage on older versions} } diff --git a/tests/testthat/test-ggplot_rsi.R b/tests/testthat/test-ggplot_rsi.R index b4bc35d7..8ec10750 100644 --- a/tests/testthat/test-ggplot_rsi.R +++ b/tests/testthat/test-ggplot_rsi.R @@ -29,4 +29,13 @@ test_that("ggplot_rsi works", { summarise_all(portion_IR) %>% as.double() ) + expect_equal( + (septic_patients %>% select(amcl, cipr) %>% ggplot_rsi(x = "Antibiotic", + facet = "Interpretation", + fun = count_df))$data %>% + summarise_all(count_IR) %>% as.double(), + septic_patients %>% select(amcl, cipr) %>% + summarise_all(count_IR) %>% as.double() + ) + })