ggplot_rsi improvements

This commit is contained in:
dr. M.S. (Matthijs) Berends 2018-08-13 16:42:37 +02:00
parent dba06c3295
commit 59ce1eb1b1
8 changed files with 140 additions and 67 deletions

View File

@ -2,7 +2,7 @@
#### New
* **BREAKING**: `rsi_df` was removed in favour of new functions `portion_R`, `portion_IR`, `portion_I`, `portion_SI` and `portion_S` to selectively calculate resistance or susceptibility. These functions are 20 to 30 times faster than the old `rsi` function. The old function still works, but is deprecated.
* New function `portion_df` to get all portions of S, I and R of a data set with antibiotic columns
* New function `portion_df` to get all portions of S, I and R of a data set with antibiotic columns, with support for grouped variables
* **BREAKING**: the methodology for determining first weighted isolates was changed. The antibiotics that are compared between isolates (call *key antibiotics*) to include more first isolates (afterwards called first *weighted* isolates) are now as follows:
* Universal: amoxicillin, amoxicillin/clavlanic acid, cefuroxime, piperacillin/tazobactam, ciprofloxacin, trimethoprim/sulfamethoxazole
* Gram-positive: vancomycin, teicoplanin, tetracycline, erythromycin, oxacillin, rifampicin
@ -11,6 +11,7 @@
* New functions `geom_rsi`, `facet_rsi`, `scale_y_percent`, `scale_rsi_colours` and `theme_rsi`
* New wrapper function `ggplot_rsi` to apply all above functions on a data set:
* `septic_patients %>% select(tobr, gent) %>% ggplot_rsi` will show portions of S, I and R immediately in a pretty plot
* Support for grouped variables, see `?ggplot_rsi`
* Determining bacterial ID:
* New functions `as.bactid` and `is.bactid` to transform/ look up microbial ID's.
* The existing function `guess_bactid` is now an alias of `as.bactid`

View File

@ -21,9 +21,11 @@
#' Use these functions to create bar plots for antimicrobial resistance analysis. All functions rely on internal \code{\link[ggplot2]{ggplot}} functions.
#' @param data a \code{data.frame} with column(s) of class \code{"rsi"} (see \code{\link{as.rsi}})
#' @param position position adjustment of bars, either \code{"stack"} (default) or \code{"dodge"}
#' @param x variable to show on x axis, either \code{"Antibiotic"} (default) or \code{"Interpretation"}
#' @param fill variable to categorise using the plots legend
#' @param facet variable to split plots by, either \code{"Interpretation"} (default) or \code{"Antibiotic"}
#' @param x variable to show on x axis, either \code{"Antibiotic"} (default) or \code{"Interpretation"} or a grouping variable
#' @param fill variable to categorise using the plots legend, either \code{"Antibiotic"} (default) or \code{"Interpretation"} or a grouping variable
#' @param facet variable to split plots by, either \code{"Interpretation"} (default) or \code{"Antibiotic"} or a grouping variable
#' @param translate_ab a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations into, using \code{\link{abname}}. Default behaviour is to translate to official names according to the WHO. Use \code{translate_ab = FALSE} to disable translation.
#' @param ... other parameters passed on to \code{\link[ggplot2]{facet_wrap}}
#' @details At default, the names of antibiotics will be shown on the plots using \code{\link{abname}}. This can be set with the option \code{get_antibiotic_names} (a logical value), so change it e.g. to \code{FALSE} with \code{options(get_antibiotic_names = FALSE)}.
#'
#' \strong{The functions}\cr
@ -31,7 +33,7 @@
#'
#' \code{facet_rsi} creates 2d plots (at default based on S/I/R) using \code{\link[ggplot2]{facet_wrap}}.
#'
#' \code{scale_y_percent} transforms the y axis to a 0 to 100% range.
#' \code{scale_y_percent} transforms the y axis to a 0 to 100\% range.
#'
#' \code{scale_rsi_colours} sets colours to the bars: green for S, yellow for I and red for R.
#'
@ -48,11 +50,11 @@
#' ggplot(septic_patients %>% select(amox, nitr, fosf, trim, cipr)) +
#' geom_rsi()
#'
#' # prettify it using some additional functions
#' # prettify the plot using some additional functions:
#' df <- septic_patients[, c("amox", "nitr", "fosf", "trim", "cipr")]
#' ggplot(df) +
#' geom_rsi(x = "Interpretation") +
#' facet_rsi(facet = "Antibiotic") +
#' geom_rsi() +
#' facet_rsi() +
#' scale_y_percent() +
#' scale_rsi_colours() +
#' theme_rsi()
@ -61,30 +63,58 @@
#' septic_patients %>%
#' select(amox, nitr, fosf, trim, cipr) %>%
#' ggplot_rsi()
#'
#' \donttest{
#' # it also supports groups (don't forget to use the group on `x` or `facet`):
#' septic_patients %>%
#' select(amox, nitr, fosf, trim, cipr) %>%
#' ggplot_rsi(x = "Interpretation", facet = "Antibiotic")
#'
#' # it also supports groups (don't forget to use facet on the group):
#' septic_patients %>%
#' select(hospital_id, amox, cipr) %>%
#' select(hospital_id, amox, nitr, fosf, trim, cipr) %>%
#' group_by(hospital_id) %>%
#' ggplot_rsi() +
#' facet_wrap("hospital_id", nrow = 1) +
#' labs(title = "AMR of Amoxicillin And Ciprofloxacine Per Hospital")
#' ggplot_rsi(x = "hospital_id",
#' facet = "Antibiotic",
#' nrow = 1) +
#' labs(title = "AMR of Anti-UTI Drugs Per Hospital",
#' x = "Hospital")
#'
#' # genuine analysis: check 2 most prevalent microorganisms
#' septic_patients %>%
#' # create new bacterial ID's, with all CoNS under the same group (Becker et al.)
#' mutate(bactid = as.bactid(bactid, Becker = TRUE)) %>%
#' # filter on top 2 bacterial ID's
#' filter(bactid %in% top_freq(freq(.$bactid), 2)) %>%
#' # determine first isolates
#' mutate(first_isolate = first_isolate(.,
#' col_date = "date",
#' col_patient_id = "patient_id",
#' col_bactid = "bactid")) %>%
#' # filter on first isolates
#' filter(first_isolate == TRUE) %>%
#' # join the `microorganisms` data set
#' left_join_microorganisms() %>%
#' # select full name and some antiseptic drugs
#' select(mo = fullname,
#' cfur, gent, cipr) %>%
#' # group by MO
#' group_by(mo) %>%
#' # plot the thing, putting MOs on the facet
#' ggplot_rsi(x = "Antibiotic",
#' facet = "mo") +
#' labs(title = "AMR of Top Two Microorganisms In Blood Culture Isolates",
#' subtitle = "Only First Isolates, CoNS grouped according to Becker et al.",
#' x = "Microorganisms")
#' }
ggplot_rsi <- function(data,
position = "stack",
x = "Antibiotic",
fill = "Interpretation",
facet = NULL) {
facet = NULL,
translate_ab = "official",
...) {
if (!"ggplot2" %in% rownames(installed.packages())) {
stop('this function requires the ggplot2 package.', call. = FALSE)
}
p <- ggplot2::ggplot(data = data) +
geom_rsi(position = position, x = x, fill = fill) +
geom_rsi(position = position, x = x, fill = fill, translate_ab = translate_ab) +
scale_y_percent() +
theme_rsi()
@ -94,7 +124,7 @@ ggplot_rsi <- function(data,
}
if (!is.null(facet)) {
p <- p + facet_rsi(facet = facet)
p <- p + facet_rsi(facet = facet, ...)
}
p
@ -102,13 +132,20 @@ ggplot_rsi <- function(data,
#' @rdname ggplot_rsi
#' @export
geom_rsi <- function(position = "stack", x = c("Antibiotic", "Interpretation"), fill = "Interpretation") {
geom_rsi <- function(position = "stack",
x = c("Antibiotic", "Interpretation"),
fill = "Interpretation",
translate_ab = "official") {
x <- x[1]
if (!x %in% c("Antibiotic", "Interpretation")) {
stop("`x` must be 'Antibiotic' or 'Interpretation'")
if (x %in% tolower(c('ab', 'antibiotic', 'abx', 'antibiotics'))) {
x <- "Antibiotic"
} else if (x %in% tolower(c('SIR', 'RSI', 'interpretation', 'interpretations', 'result'))) {
x <- "Interpretation"
}
options(get_antibiotic_names = translate_ab)
ggplot2::layer(geom = "bar", stat = "identity", position = position,
mapping = ggplot2::aes_string(x = x, y = "Percentage", fill = fill),
data = AMR::portion_df, params = list())
@ -117,12 +154,16 @@ geom_rsi <- function(position = "stack", x = c("Antibiotic", "Interpretation"),
#' @rdname ggplot_rsi
#' @export
facet_rsi <- function(facet = c("Interpretation", "Antibiotic")) {
facet_rsi <- function(facet = c("Interpretation", "Antibiotic"), ...) {
facet <- facet[1]
if (!facet %in% c("Antibiotic", "Interpretation")) {
stop("`facet` must be 'Antibiotic' or 'Interpretation'")
if (facet %in% tolower(c('SIR', 'RSI', 'interpretation', 'interpretations', 'result'))) {
facet <- "Interpretation"
} else if (facet %in% tolower(c('ab', 'antibiotic', 'abx', 'antibiotics'))) {
facet <- "Antibiotic"
}
ggplot2::facet_wrap(facets = facet, scales = "free")
ggplot2::facet_wrap(facets = facet, scales = "free", ...)
}
#' @rdname ggplot_rsi

View File

@ -26,7 +26,7 @@
#' @param minimum minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA}. The default number of \code{30} isolates is advised by the CLSI as best practice, see Source.
#' @param as_percent logical to indicate whether the output must be returned as percent (text), will else be a double
#' @param data a code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}})
#' @param translate a logical value to indicate whether antibiotic abbreviations should be translated with \code{\link{abname}}
#' @param translate_ab a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations to, using \code{\link{abname}}. This can be set with \code{\link{getOption}("get_antibiotic_names")}.
#' @details \strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set.
#'
#' \code{portion_df} takes any variable from \code{data} that has an \code{"rsi"} class (created with \code{\link{as.rsi}}) and calculates the portions R, I and S. The resulting \emph{tidy data} (see Source) \code{data.frame} will have three rows (S/I/R) and a column for each variable with class \code{"rsi"}.
@ -196,7 +196,12 @@ portion_S <- function(ab1,
#' @rdname portion
#' @importFrom dplyr bind_rows summarise_if mutate group_vars select everything
#' @export
portion_df <- function(data, translate = getOption("get_antibiotic_names", TRUE)) {
portion_df <- function(data, translate_ab = getOption("get_antibiotic_names", "official")) {
if (as.character(translate_ab) == "TRUE") {
translate_ab <- "official"
}
options(get_antibiotic_names = translate_ab)
resS <- summarise_if(.tbl = data,
.predicate = is.rsi,
@ -221,9 +226,14 @@ portion_df <- function(data, translate = getOption("get_antibiotic_names", TRUE)
res <- bind_rows(resS, resI, resR) %>%
mutate(Interpretation = factor(Interpretation, levels = c("R", "I", "S"), ordered = TRUE)) %>%
tidyr::gather(Antibiotic, Percentage, -Interpretation, -data.groups)
if (translate == TRUE) {
res <- res %>% mutate(Antibiotic = abname(Antibiotic, from = "guess", to = "official"))
if (!translate_ab == FALSE) {
if (!tolower(translate_ab) %in% tolower(colnames(AMR::antibiotics))) {
stop("Parameter `translate_ab` does not occur in the `antibiotics` data set.", call. = FALSE)
}
res <- res %>% mutate(Antibiotic = abname(Antibiotic, from = "guess", to = translate_ab))
}
res
}

View File

@ -42,6 +42,7 @@ This R package was intended to make microbial epidemiology easier. Most function
With `AMR` you can:
* Calculate the resistance (and even co-resistance) of microbial isolates with the `portion_R`, `portion_IR`, `portion_I`, `portion_SI` and `portion_S` functions, that can also be used with the `dplyr` package (e.g. in conjunction with `summarise`)
* Plot AMR results with `geom_rsi`, a function made for the `ggplot` package
* Predict antimicrobial resistance for the nextcoming years with the `resistance_predict` function
* Apply [EUCAST rules to isolates](http://www.eucast.org/expert_rules_and_intrinsic_resistance/) with the `EUCAST_rules` function
* Identify first isolates of every patient [using guidelines from the CLSI](https://clsi.org/standards/products/microbiology/documents/m39/) (Clinical and Laboratory Standards Institute) with the `first_isolate` function

View File

@ -10,12 +10,13 @@
\title{AMR bar plots with \code{ggplot}}
\usage{
ggplot_rsi(data, position = "stack", x = "Antibiotic",
fill = "Interpretation", facet = NULL)
fill = "Interpretation", facet = NULL, translate_ab = "official",
...)
geom_rsi(position = "stack", x = c("Antibiotic", "Interpretation"),
fill = "Interpretation")
fill = "Interpretation", translate_ab = "official")
facet_rsi(facet = c("Interpretation", "Antibiotic"))
facet_rsi(facet = c("Interpretation", "Antibiotic"), ...)
scale_y_percent()
@ -28,11 +29,15 @@ theme_rsi()
\item{position}{position adjustment of bars, either \code{"stack"} (default) or \code{"dodge"}}
\item{x}{variable to show on x axis, either \code{"Antibiotic"} (default) or \code{"Interpretation"}}
\item{x}{variable to show on x axis, either \code{"Antibiotic"} (default) or \code{"Interpretation"} or a grouping variable}
\item{fill}{variable to categorise using the plots legend}
\item{fill}{variable to categorise using the plots legend, either \code{"Antibiotic"} (default) or \code{"Interpretation"} or a grouping variable}
\item{facet}{variable to split plots by, either \code{"Interpretation"} (default) or \code{"Antibiotic"}}
\item{facet}{variable to split plots by, either \code{"Interpretation"} (default) or \code{"Antibiotic"} or a grouping variable}
\item{translate_ab}{a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations into, using \code{\link{abname}}. Default behaviour is to translate to official names according to the WHO. Use \code{translate_ab = FALSE} to disable translation.}
\item{...}{other parameters passed on to \code{\link[ggplot2]{facet_wrap}}}
}
\description{
Use these functions to create bar plots for antimicrobial resistance analysis. All functions rely on internal \code{\link[ggplot2]{ggplot}} functions.
@ -45,7 +50,7 @@ At default, the names of antibiotics will be shown on the plots using \code{\lin
\code{facet_rsi} creates 2d plots (at default based on S/I/R) using \code{\link[ggplot2]{facet_wrap}}.
\code{scale_y_percent} transforms the y axis to a 0 to 100% range.
\code{scale_y_percent} transforms the y axis to a 0 to 100\% range.
\code{scale_rsi_colours} sets colours to the bars: green for S, yellow for I and red for R.
@ -61,11 +66,11 @@ library(ggplot2)
ggplot(septic_patients \%>\% select(amox, nitr, fosf, trim, cipr)) +
geom_rsi()
# prettify it using some additional functions
# prettify the plot using some additional functions:
df <- septic_patients[, c("amox", "nitr", "fosf", "trim", "cipr")]
ggplot(df) +
geom_rsi(x = "Interpretation") +
facet_rsi(facet = "Antibiotic") +
geom_rsi() +
facet_rsi() +
scale_y_percent() +
scale_rsi_colours() +
theme_rsi()
@ -74,16 +79,42 @@ ggplot(df) +
septic_patients \%>\%
select(amox, nitr, fosf, trim, cipr) \%>\%
ggplot_rsi()
\donttest{
# it also supports groups (don't forget to use the group on `x` or `facet`):
septic_patients \%>\%
select(amox, nitr, fosf, trim, cipr) \%>\%
ggplot_rsi(x = "Interpretation", facet = "Antibiotic")
# it also supports groups (don't forget to use facet on the group):
septic_patients \%>\%
select(hospital_id, amox, cipr) \%>\%
select(hospital_id, amox, nitr, fosf, trim, cipr) \%>\%
group_by(hospital_id) \%>\%
ggplot_rsi() +
facet_wrap("hospital_id", nrow = 1) +
labs(title = "AMR of Amoxicillin And Ciprofloxacine Per Hospital")
ggplot_rsi(x = "hospital_id",
facet = "Antibiotic",
nrow = 1) +
labs(title = "AMR of Anti-UTI Drugs Per Hospital",
x = "Hospital")
# genuine analysis: check 2 most prevalent microorganisms
septic_patients \%>\%
# create new bacterial ID's, with all CoNS under the same group (Becker et al.)
mutate(bactid = as.bactid(bactid, Becker = TRUE)) \%>\%
# filter on top 2 bacterial ID's
filter(bactid \%in\% top_freq(freq(.$bactid), 2)) \%>\%
# determine first isolates
mutate(first_isolate = first_isolate(.,
col_date = "date",
col_patient_id = "patient_id",
col_bactid = "bactid")) \%>\%
# filter on first isolates
filter(first_isolate == TRUE) \%>\%
# join the `microorganisms` data set
left_join_microorganisms() \%>\%
# select full name and some antiseptic drugs
select(mo = fullname,
cfur, gent, cipr) \%>\%
# group by MO
group_by(mo) \%>\%
# plot the thing, putting MOs on the facet
ggplot_rsi(x = "Antibiotic",
facet = "mo") +
labs(title = "AMR of Top Two Microorganisms In Blood Culture Isolates",
subtitle = "Only First Isolates, CoNS grouped according to Becker et al.",
x = "Microorganisms")
}
}

View File

@ -25,7 +25,8 @@ portion_SI(ab1, ab2 = NULL, minimum = 30, as_percent = FALSE)
portion_S(ab1, ab2 = NULL, minimum = 30, as_percent = FALSE)
portion_df(data, translate = getOption("get_antibiotic_names", TRUE))
portion_df(data, translate_ab = getOption("get_antibiotic_names",
"official"))
}
\arguments{
\item{ab1}{vector of antibiotic interpretations, they will be transformed internally with \code{\link{as.rsi}} if needed}
@ -38,7 +39,7 @@ portion_df(data, translate = getOption("get_antibiotic_names", TRUE))
\item{data}{a code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}})}
\item{translate}{a logical value to indicate whether antibiotic abbreviations should be translated with \code{\link{abname}}}
\item{translate_ab}{a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations to, using \code{\link{abname}}. This can be set with \code{\link{getOption}("get_antibiotic_names")}.}
}
\value{
Double or, when \code{as_percent = TRUE}, a character.

View File

@ -29,16 +29,4 @@ test_that("ggplot_rsi works", {
summarise_all(portion_IR) %>% as.double()
)
expect_error(geom_rsi(x = "test"))
expect_error(facet_rsi(facet = "test"))
# support for groups
print(
septic_patients %>%
select(hospital_id, amox, cipr) %>%
group_by(hospital_id) %>%
ggplot_rsi() +
facet_grid("hospital_id")
)
})

View File

@ -111,7 +111,7 @@ test_that("old rsi works", {
# portion_df
expect_equal(
septic_patients %>% select(amox) %>% portion_df(TRUE) %>% pull(Percentage),
septic_patients %>% select(amox) %>% portion_df() %>% pull(Percentage),
c(septic_patients$amox %>% portion_S(),
septic_patients$amox %>% portion_I(),
septic_patients$amox %>% portion_R())