diff --git a/DESCRIPTION b/DESCRIPTION index 4e33e1ee..72867c1a 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 0.3.0.9007 -Date: 2018-09-08 +Version: 0.3.0.9008 +Date: 2018-09-16 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/NAMESPACE b/NAMESPACE index 5eb6c006..ec2413be 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -87,6 +87,7 @@ export(is.rsi.eligible) export(key_antibiotics) export(key_antibiotics_equal) export(kurtosis) +export(labels_rsi_count) export(left_join_microorganisms) export(like) export(mo_aerobic) diff --git a/NEWS.md b/NEWS.md index 6341d900..d2107421 100755 --- a/NEWS.md +++ b/NEWS.md @@ -25,6 +25,7 @@ * Column names inputs of `EUCAST_rules`, `first_isolate` and `key_antibiotics` * Column names of datasets `microorganisms` and `septic_patients` * All old syntaxes will still work with this version, but will throw warnings +* Function `labels_rsi_count` to print datalabels on a RSI `ggplot2` model * Functions `as.atc` and `is.atc` to transform/look up antibiotic ATC codes as defined by the WHO. The existing function `guess_atc` is now an alias of `as.atc`. * Aliases for existing function `mo_property`: `mo_family`, `mo_genus`, `mo_species`, `mo_subspecies`, `mo_fullname`, `mo_shortname`, `mo_aerobic`, `mo_type` and `mo_gramstain`. They also come with support for German, Dutch, French, Italian, Spanish and Portuguese, and it defaults to the systems locale: ```r @@ -70,6 +71,7 @@ ``` * Edited `ggplot_rsi` and `geom_rsi` so they can cope with `count_df`. The new `fun` parameter has value `portion_df` at default, but can be set to `count_df`. * Fix for `ggplot_rsi` when the `ggplot2` package was not loaded +* Added datalabels function `labels_rsi_count` to `ggplot_rsi` * Added possibility to set any parameter to `geom_rsi` (and `ggplot_rsi`) so you can set your own preferences * Fix for joins, where predefined suffices would not be honoured * Added parameter `quote` to the `freq` function diff --git a/R/ab_property.R b/R/ab_property.R index 19c056fd..b8befa3e 100644 --- a/R/ab_property.R +++ b/R/ab_property.R @@ -36,7 +36,7 @@ #' ab_umcg("amcl") # AMCL ab_property <- function(x, property = 'official') { property <- property[1] - if (!property %in% colnames(antibiotics)) { + if (!property %in% colnames(AMR::antibiotics)) { stop("invalid property: ", property, " - use a column name of the `antibiotics` data set") } if (!is.atc(x)) { diff --git a/R/count.R b/R/count.R index 50c39b3d..10594fd0 100644 --- a/R/count.R +++ b/R/count.R @@ -146,6 +146,10 @@ count_S <- function(...) { count_df <- function(data, translate_ab = getOption("get_antibiotic_names", "official")) { + if (!"data.frame" %in% class(data)) { + stop("`count_df` must be called on a data.frame") + } + if (data %>% select_if(is.rsi) %>% ncol() == 0) { stop("No columns with class 'rsi' found. See ?as.rsi.") } @@ -177,7 +181,7 @@ count_df <- function(data, res <- bind_rows(resS, resI, resR) %>% mutate(Interpretation = factor(Interpretation, levels = c("R", "I", "S"), ordered = TRUE)) %>% - tidyr::gather(Antibiotic, Count, -Interpretation, -data.groups) + tidyr::gather(Antibiotic, Value, -Interpretation, -data.groups) if (!translate_ab == FALSE) { if (!tolower(translate_ab) %in% tolower(colnames(AMR::antibiotics))) { diff --git a/R/ggplot_rsi.R b/R/ggplot_rsi.R index 335a4099..7f3f520b 100644 --- a/R/ggplot_rsi.R +++ b/R/ggplot_rsi.R @@ -20,13 +20,16 @@ #' #' Use these functions to create bar plots for antimicrobial resistance analysis. All functions rely on internal \code{\link[ggplot2]{ggplot}} functions. #' @param data a \code{data.frame} with column(s) of class \code{"rsi"} (see \code{\link{as.rsi}}) -#' @param position position adjustment of bars, either \code{"fill"}, \code{"stack"} (default when \code{fun} is \code{\link{portion_df}}) or \code{"dodge"} (default when \code{fun} is \code{\link{count_df}}) +#' @param position position adjustment of bars, either \code{"fill"} (default when \code{fun} is \code{\link{count_df}}), \code{"stack"} (default when \code{fun} is \code{\link{portion_df}}) or \code{"dodge"} #' @param x variable to show on x axis, either \code{"Antibiotic"} (default) or \code{"Interpretation"} or a grouping variable #' @param fill variable to categorise using the plots legend, either \code{"Antibiotic"} (default) or \code{"Interpretation"} or a grouping variable #' @param facet variable to split plots by, either \code{"Interpretation"} (default) or \code{"Antibiotic"} or a grouping variable #' @param translate_ab a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations into, using \code{\link{abname}}. Default behaviour is to translate to official names according to the WHO. Use \code{translate_ab = FALSE} to disable translation. -#' @param fun function to transform \code{data}, either \code{\link{portion_df}} (default) or \code{\link{count_df}} +#' @param fun function to transform \code{data}, either \code{\link{count_df}} (default) or \code{\link{portion_df}} #' @param nrow (when using \code{facet}) number of rows +#' @param datalabels show datalabels using \code{labels_rsi_count}, will at default only be shown when \code{fun = count_df} +#' @param datalabels.size size of the datalabels +#' @param datalabels.colour colour of the datalabels #' @param ... other parameters passed on to \code{geom_rsi} #' @details At default, the names of antibiotics will be shown on the plots using \code{\link{abname}}. This can be set with the option \code{get_antibiotic_names} (a logical value), so change it e.g. to \code{FALSE} with \code{options(get_antibiotic_names = FALSE)}. #' @@ -35,12 +38,14 @@ #' #' \code{facet_rsi} creates 2d plots (at default based on S/I/R) using \code{\link[ggplot2]{facet_wrap}}. #' -#' \code{scale_y_percent} transforms the y axis to a 0 to 100\% range. +#' \code{scale_y_percent} transforms the y axis to a 0 to 100\% range using \code{\link[ggplot2]{scale_y_continuous}}. #' -#' \code{scale_rsi_colours} sets colours to the bars: green for S, yellow for I and red for R. +#' \code{scale_rsi_colours} sets colours to the bars: green for S, yellow for I and red for R, using \code{\link[ggplot2]{scale_fill_brewer}}. #' #' \code{theme_rsi} is a \code{\link[ggplot2]{theme}} with minimal distraction. #' +#' \code{labels_rsi_count} print datalabels on the bars with percentage and amount of isolates using \code{\link[ggplot2]{geom_text}} +#' #' \code{ggplot_rsi} is a wrapper around all above functions that uses data as first input. This makes it possible to use this function after a pipe (\code{\%>\%}). See Examples. #' @rdname ggplot_rsi #' @export @@ -58,6 +63,7 @@ #' geom_rsi() + #' scale_y_percent() + #' scale_rsi_colours() + +#' labels_rsi_count() + #' theme_rsi() #' #' # or better yet, simplify this using the wrapper function - a single command: @@ -131,8 +137,11 @@ ggplot_rsi <- function(data, # params = list(), facet = NULL, translate_ab = "official", - fun = portion_df, + fun = count_df, nrow = NULL, + datalabels = TRUE, + datalabels.size = 3, + datalabels.colour = "grey15", ...) { if (!"ggplot2" %in% rownames(installed.packages())) { @@ -174,11 +183,22 @@ ggplot_rsi <- function(data, # set RSI colours p <- p + scale_rsi_colours() } - if (fun_name == "portion_df") { + if (is.null(position)) { + position <- "fill" + } + if (fun_name == "portion_df" + | (fun_name == "count_df" & position == "fill")) { # portions, so use y scale with percentage p <- p + scale_y_percent() } + if (fun_name == "count_df" & datalabels == TRUE) { + p <- p + labels_rsi_count(position = position, + x = x, + datalabels.size = datalabels.size, + datalabels.colour = datalabels.colour) + } + if (!is.null(facet)) { p <- p + facet_rsi(facet = facet, nrow = nrow) } @@ -192,20 +212,19 @@ geom_rsi <- function(position = NULL, x = c("Antibiotic", "Interpretation"), fill = "Interpretation", translate_ab = "official", - fun = portion_df, + fun = count_df, ...) { fun_name <- deparse(substitute(fun)) if (!fun_name %in% c("portion_df", "count_df", "fun")) { stop("`fun` must be portion_df or count_df") } + y <- "Value" if (identical(fun, count_df)) { - y <- "Count" if (missing(position) | is.null(position)) { - position <- "dodge" + position <- "fill" } } else { - y <- "Percentage" if (missing(position) | is.null(position)) { position <- "stack" } @@ -264,7 +283,6 @@ facet_rsi <- function(facet = c("Interpretation", "Antibiotic"), nrow = NULL) { #' @export scale_y_percent <- function() { ggplot2::scale_y_continuous(breaks = seq(0, 1, 0.1), - limits = c(0, 1), labels = percent(seq(0, 1, 0.1))) } @@ -282,3 +300,35 @@ theme_rsi <- function() { panel.grid.minor = ggplot2::element_blank(), panel.grid.major.y = ggplot2::element_line(colour = "grey75")) } + +#' @rdname ggplot_rsi +#' @export +labels_rsi_count <- function(position = NULL, + x = "Antibiotic", + datalabels.size = 3, + datalabels.colour = "grey15") { + if (is.null(position)) { + position <- "fill" + } + if (position == "fill") { + position <- ggplot2::position_fill(vjust = 0.5) + } + ggplot2::geom_text(mapping = ggplot2::aes_string(label = "lbl", + x = x, + y = "Value"), + position = position, + data = getlbls, + inherit.aes = FALSE, + size = datalabels.size, + colour = datalabels.colour) +} + +#' @importFrom dplyr %>% group_by mutate +getlbls <- function(data) { + data %>% + count_df() %>% + group_by(Antibiotic) %>% + mutate(lbl = paste0(percent(Value / sum(Value, na.rm = TRUE), force_zero = TRUE), + " (n=", Value, ")")) %>% + mutate(lbl = ifelse(lbl == "0.0% (n=0)", "", lbl)) +} diff --git a/R/globals.R b/R/globals.R index da66cc39..80ae961a 100755 --- a/R/globals.R +++ b/R/globals.R @@ -22,7 +22,6 @@ globalVariables(c(".", "antibiotics", "cnt", "count", - "Count", "cum_count", "cum_percent", "date_lab", @@ -37,6 +36,7 @@ globalVariables(c(".", "key_ab", "key_ab_lag", "key_ab_other", + "lbl", "median", "mic", "microorganisms", @@ -46,7 +46,6 @@ globalVariables(c(".", "other_pat_or_mo", "Pasted", "patient_id", - "Percentage", "prevalence", "R", "real_first_isolate", @@ -54,4 +53,5 @@ globalVariables(c(".", "septic_patients", "species", "value", + "Value", "y")) diff --git a/R/mo_property.R b/R/mo_property.R index c712d69f..ef77e293 100644 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -187,7 +187,7 @@ mo_aerobic <- function(x) { #' @export mo_property <- function(x, property = 'fullname', Becker = FALSE, Lancefield = FALSE, language = NULL) { property <- tolower(property[1]) - if (!property %in% colnames(microorganisms)) { + if (!property %in% colnames(AMR::microorganisms)) { stop("invalid property: ", property, " - use a column name of the `microorganisms` data set") } result1 <- as.mo(x = x, Becker = Becker, Lancefield = Lancefield) # this will give a warning if x cannot be coerced diff --git a/R/portion.R b/R/portion.R index f8ff7d8e..7bcc229e 100755 --- a/R/portion.R +++ b/R/portion.R @@ -203,6 +203,10 @@ portion_df <- function(data, minimum = 30, as_percent = FALSE) { + if (!"data.frame" %in% class(data)) { + stop("`portion_df` must be called on a data.frame") + } + if (data %>% select_if(is.rsi) %>% ncol() == 0) { stop("No columns with class 'rsi' found. See ?as.rsi.") } @@ -240,7 +244,7 @@ portion_df <- function(data, res <- bind_rows(resS, resI, resR) %>% mutate(Interpretation = factor(Interpretation, levels = c("R", "I", "S"), ordered = TRUE)) %>% - tidyr::gather(Antibiotic, Percentage, -Interpretation, -data.groups) + tidyr::gather(Antibiotic, Value, -Interpretation, -data.groups) if (!translate_ab == FALSE) { if (!tolower(translate_ab) %in% tolower(colnames(AMR::antibiotics))) { diff --git a/README.md b/README.md index e6cb07c7..3bd4f304 100755 --- a/README.md +++ b/README.md @@ -161,7 +161,8 @@ Adjust it with any parameter you know from the `ggplot2` package: ```r septic_patients %>% select(amox, nitr, fosf, trim, cipr) %>% - ggplot_rsi(width = 0.5, colour = "black", size = 1, linetype = 2, alpha = 0.25) + ggplot_rsi(datalabels = FALSE, + width = 0.5, colour = "black", size = 1, linetype = 2, alpha = 0.25) ``` ![example_3_rsi](man/figures/rsi_example3.png) @@ -174,7 +175,8 @@ septic_patients %>% group_by(hospital_id) %>% ggplot_rsi(x = "hospital_id", facet = "Antibiotic", - nrow = 1) + + nrow = 1, + datalabels = FALSE) + labs(title = "AMR of Anti-UTI Drugs Per Hospital", x = "Hospital") ``` diff --git a/man/figures/rsi_example2.png b/man/figures/rsi_example2.png index 87f4d8b5..399a8b48 100644 Binary files a/man/figures/rsi_example2.png and b/man/figures/rsi_example2.png differ diff --git a/man/figures/rsi_example3.png b/man/figures/rsi_example3.png index b2136dfa..2ec9c87c 100644 Binary files a/man/figures/rsi_example3.png and b/man/figures/rsi_example3.png differ diff --git a/man/figures/rsi_example4.png b/man/figures/rsi_example4.png index 739b1240..fe28be02 100644 Binary files a/man/figures/rsi_example4.png and b/man/figures/rsi_example4.png differ diff --git a/man/ggplot_rsi.Rd b/man/ggplot_rsi.Rd index a14afcf0..537abf78 100644 --- a/man/ggplot_rsi.Rd +++ b/man/ggplot_rsi.Rd @@ -7,15 +7,17 @@ \alias{scale_y_percent} \alias{scale_rsi_colours} \alias{theme_rsi} +\alias{labels_rsi_count} \title{AMR bar plots with \code{ggplot}} \usage{ ggplot_rsi(data, position = NULL, x = "Antibiotic", fill = "Interpretation", facet = NULL, translate_ab = "official", - fun = portion_df, nrow = NULL, ...) + fun = count_df, nrow = NULL, datalabels = TRUE, + datalabels.size = 3, datalabels.colour = "grey15", ...) geom_rsi(position = NULL, x = c("Antibiotic", "Interpretation"), - fill = "Interpretation", translate_ab = "official", - fun = portion_df, ...) + fill = "Interpretation", translate_ab = "official", fun = count_df, + ...) facet_rsi(facet = c("Interpretation", "Antibiotic"), nrow = NULL) @@ -24,11 +26,14 @@ scale_y_percent() scale_rsi_colours() theme_rsi() + +labels_rsi_count(position = NULL, x = "Antibiotic", + datalabels.size = 3, datalabels.colour = "grey15") } \arguments{ \item{data}{a \code{data.frame} with column(s) of class \code{"rsi"} (see \code{\link{as.rsi}})} -\item{position}{position adjustment of bars, either \code{"fill"}, \code{"stack"} (default when \code{fun} is \code{\link{portion_df}}) or \code{"dodge"} (default when \code{fun} is \code{\link{count_df}})} +\item{position}{position adjustment of bars, either \code{"fill"} (default when \code{fun} is \code{\link{count_df}}), \code{"stack"} (default when \code{fun} is \code{\link{portion_df}}) or \code{"dodge"}} \item{x}{variable to show on x axis, either \code{"Antibiotic"} (default) or \code{"Interpretation"} or a grouping variable} @@ -38,10 +43,16 @@ theme_rsi() \item{translate_ab}{a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations into, using \code{\link{abname}}. Default behaviour is to translate to official names according to the WHO. Use \code{translate_ab = FALSE} to disable translation.} -\item{fun}{function to transform \code{data}, either \code{\link{portion_df}} (default) or \code{\link{count_df}}} +\item{fun}{function to transform \code{data}, either \code{\link{count_df}} (default) or \code{\link{portion_df}}} \item{nrow}{(when using \code{facet}) number of rows} +\item{datalabels}{show datalabels using \code{labels_rsi_count}, will at default only be shown when \code{fun = count_df}} + +\item{datalabels.size}{size of the datalabels} + +\item{datalabels.colour}{colour of the datalabels} + \item{...}{other parameters passed on to \code{geom_rsi}} } \description{ @@ -55,12 +66,14 @@ At default, the names of antibiotics will be shown on the plots using \code{\lin \code{facet_rsi} creates 2d plots (at default based on S/I/R) using \code{\link[ggplot2]{facet_wrap}}. -\code{scale_y_percent} transforms the y axis to a 0 to 100\% range. +\code{scale_y_percent} transforms the y axis to a 0 to 100\% range using \code{\link[ggplot2]{scale_y_continuous}}. -\code{scale_rsi_colours} sets colours to the bars: green for S, yellow for I and red for R. +\code{scale_rsi_colours} sets colours to the bars: green for S, yellow for I and red for R, using \code{\link[ggplot2]{scale_fill_brewer}}. \code{theme_rsi} is a \code{\link[ggplot2]{theme}} with minimal distraction. +\code{labels_rsi_count} print datalabels on the bars with percentage and amount of isolates using \code{\link[ggplot2]{geom_text}} + \code{ggplot_rsi} is a wrapper around all above functions that uses data as first input. This makes it possible to use this function after a pipe (\code{\%>\%}). See Examples. } \examples{ @@ -77,6 +90,7 @@ ggplot(df) + geom_rsi() + scale_y_percent() + scale_rsi_colours() + + labels_rsi_count() + theme_rsi() # or better yet, simplify this using the wrapper function - a single command: diff --git a/tests/testthat/test-count.R b/tests/testthat/test-count.R index 92d39945..2d685cbb 100644 --- a/tests/testthat/test-count.R +++ b/tests/testthat/test-count.R @@ -41,4 +41,6 @@ test_that("counts work", { expect_error(count_S("test", minimum = "test")) expect_error(count_S("test", as_percent = "test")) + expect_error(count_df(c("A", "B", "C"))) + }) diff --git a/tests/testthat/test-portion.R b/tests/testthat/test-portion.R index b39481bd..8e80cceb 100755 --- a/tests/testthat/test-portion.R +++ b/tests/testthat/test-portion.R @@ -112,7 +112,7 @@ test_that("old rsi works", { # portion_df expect_equal( - septic_patients %>% select(amox) %>% portion_df() %>% pull(Percentage), + septic_patients %>% select(amox) %>% portion_df() %>% pull(Value), c(septic_patients$amox %>% portion_S(), septic_patients$amox %>% portion_I(), septic_patients$amox %>% portion_R()) @@ -165,4 +165,6 @@ test_that("prediction of rsi works", { col_ab = "mero", col_date = "date", info = TRUE)) + + expect_error(portion_df(c("A", "B", "C"))) })