diff --git a/.Rbuildignore b/.Rbuildignore index 3c2145c5..e63696ad 100755 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -22,7 +22,6 @@ ^data-raw$ ^\.lintr$ ^tests/testthat/_snaps$ -^vignettes/AMR.Rmd$ ^vignettes/benchmarks.Rmd$ ^vignettes/EUCAST.Rmd$ ^vignettes/PCA.Rmd$ diff --git a/DESCRIPTION b/DESCRIPTION index eb573d2a..05f2dc3a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: AMR -Version: 1.5.0.9025 +Version: 1.5.0.9026 Date: 2021-02-25 Title: Antimicrobial Resistance Data Analysis Authors@R: c( diff --git a/NAMESPACE b/NAMESPACE index a120bd83..60de0112 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -243,6 +243,7 @@ export(theme_rsi) importFrom(graphics,arrows) importFrom(graphics,axis) importFrom(graphics,barplot) +importFrom(graphics,legend) importFrom(graphics,mtext) importFrom(graphics,plot) importFrom(graphics,points) diff --git a/NEWS.md b/NEWS.md index 4eb68a0e..7aaf7e1f 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# AMR 1.5.0.9025 +# AMR 1.5.0.9026 ## Last updated: 25 February 2021 ### New @@ -63,6 +63,7 @@ * The `like()` function (and its fast alias `%like%`) now always use Perl compatibility, improving speed for many functions in this package (e.g., `as.mo()` is now up to 4 times faster) * *Staphylococcus cornubiensis* is now correctly categorised as coagulase-positive * `random_disk()` and `random_mic()` now have an expanded range in their randomisation +* Support for GISA (glycopeptide-intermediate *S. aureus*), so e.g. `mo_genus("GISA")` will return `"Staphylococcus"` ### Other * Big documentation updates diff --git a/R/ggplot_rsi.R b/R/ggplot_rsi.R index 7206f3ea..5de41893 100755 --- a/R/ggplot_rsi.R +++ b/R/ggplot_rsi.R @@ -36,7 +36,7 @@ #' @param facet variable to split plots by, either `"interpretation"` (default) or `"antibiotic"` or a grouping variable #' @inheritParams proportion #' @param nrow (when using `facet`) number of rows -#' @param colours a named vector with colours for the bars. The names must be one or more of: S, SI, I, IR, R or be `FALSE` for standard [ggplot2][ggplot2::ggplot()] colours. The default colours are colour-blind friendly. +#' @param colours a named vector with colours for the bars. The names must be one or more of: S, SI, I, IR, R or be `FALSE` for standard [ggplot2][ggplot2::ggplot()] colours. The default colours are colour-blind friendly, while maintaining the convention that e.g. 'susceptible' should be green and 'resistant' should be red. #' @param aesthetics aesthetics to apply the colours to, defaults to "fill" but can also be "colour" or "both" #' @param datalabels show datalabels using [labels_rsi_count()] #' @param datalabels.size size of the datalabels @@ -119,11 +119,6 @@ #' CIP) %>% #' ggplot_rsi(x = "age_group") #' -#' # for colourblind mode, use divergent colours from the viridis package: -#' example_isolates %>% -#' select(AMX, NIT, FOS, TMP, CIP) %>% -#' ggplot_rsi() + -#' scale_fill_viridis_d() #' # a shorter version which also adjusts data label colours: #' example_isolates %>% #' select(AMX, NIT, FOS, TMP, CIP) %>% @@ -155,11 +150,11 @@ ggplot_rsi <- function(data, minimum = 30, language = get_locale(), nrow = NULL, - colours = c(S = "#61a8ff", - SI = "#61a8ff", - I = "#61f7ff", - IR = "#ff6961", - R = "#ff6961"), + colours = c(S = "#3CAEA3", + SI = "#3CAEA3", + I = "#F6D55C", + IR = "#ED553B", + R = "#ED553B"), datalabels = TRUE, datalabels.size = 2.5, datalabels.colour = "grey15", @@ -309,17 +304,19 @@ geom_rsi <- function(position = NULL, x <- "interpretation" } - ggplot2::layer(geom = "bar", stat = "identity", position = position, - mapping = ggplot2::aes_string(x = x, y = y, fill = fill), - params = list(...), data = function(x) { - rsi_df(data = x, - translate_ab = translate_ab, - language = language, - minimum = minimum, - combine_SI = combine_SI, - combine_IR = combine_IR) - }) - + ggplot2::geom_col( + data = function(x) { + rsi_df(data = x, + translate_ab = translate_ab, + language = language, + minimum = minimum, + combine_SI = combine_SI, + combine_IR = combine_IR) + }, + mapping = ggplot2::aes_string(x = x, y = y, fill = fill), + position = position, + ... + ) } #' @rdname ggplot_rsi diff --git a/R/globals.R b/R/globals.R index 557a7a69..5e1b96c0 100755 --- a/R/globals.R +++ b/R/globals.R @@ -33,7 +33,10 @@ globalVariables(c(".rowid", "atc_group1", "atc_group2", "code", + "cols", + "count", "data", + "disk", "dosage", "dose", "dose_times", @@ -52,6 +55,7 @@ globalVariables(c(".rowid", "language", "lookup", "method", + "mic ", "microorganism", "microorganisms", "microorganisms.codes", @@ -67,8 +71,8 @@ globalVariables(c(".rowid", "reference.rule", "reference.rule_group", "reference.version", - "rsi_translation", "rowid", + "rsi_translation", "rule_group", "rule_name", "se_max", diff --git a/R/mo.R b/R/mo.R index f1c1756b..e060a1c5 100755 --- a/R/mo.R +++ b/R/mo.R @@ -711,8 +711,8 @@ exec_as.mo <- function(x, } # translate known trivial abbreviations to genus + species ---- - if (toupper(x_backup_without_spp[i]) %in% c("MRSA", "MSSA", "VISA", "VRSA", "BORSA") - | x_backup_without_spp[i] %like_case% "(^| )(mrsa|mssa|visa|vrsa|borsa|la-?mrsa|ca-?mrsa)( |$)") { + if (toupper(x_backup_without_spp[i]) %in% c("MRSA", "MSSA", "VISA", "VRSA", "BORSA", "GISA") + | x_backup_without_spp[i] %like_case% "(^| )(mrsa|mssa|visa|vrsa|borsa|gisa|la-?mrsa|ca-?mrsa)( |$)") { x[i] <- lookup(fullname == "Staphylococcus aureus", uncertainty = -1) next } diff --git a/R/plot.R b/R/plot.R index 0d23358b..39b4fe4d 100644 --- a/R/plot.R +++ b/R/plot.R @@ -28,7 +28,7 @@ #' Functions to plot classes `rsi`, `mic` and `disk`, with support for base R and `ggplot2`. #' @inheritSection lifecycle Stable Lifecycle #' @inheritSection AMR Read more on Our Website! -#' @param x MIC values created with [as.mic()] or disk diffusion values created with [as.disk()] +#' @param x,data MIC values created with [as.mic()] or disk diffusion values created with [as.disk()] #' @param mapping aesthetic mappings to use for [`ggplot()`][ggplot2::ggplot()] #' @param main,title title of the plot #' @param xlab,ylab axis title @@ -37,7 +37,10 @@ #' @param guideline interpretation guideline to use, defaults to the latest included EUCAST guideline, see *Details* #' @param colours_RSI colours to use for filling in the bars, must be a vector of three values (in the order R, S and I). The default colours are colour-blind friendly. #' @param expand logical to indicate whether the range on the x axis should be expanded between the lowest and highest value. For MIC values, intermediate values will be factors of 2 starting from the highest MIC value. For disk diameters, the whole diameter range will be filled. -#' @details For interpreting MIC values as well as disk diffusion diameters, supported guidelines to be used as input for the `guideline` argument are: `r vector_and(AMR::rsi_translation$guideline, quotes = TRUE, reverse = TRUE)`. +#' @details +#' The interpretation of "I" will be named "Increased exposure" for all EUCAST guidelines since 2019, and will be named "Intermediate" in all other cases. +#' +#' For interpreting MIC values as well as disk diffusion diameters, supported guidelines to be used as input for the `guideline` argument are: `r vector_and(AMR::rsi_translation$guideline, quotes = TRUE, reverse = TRUE)`. #' #' Simply using `"CLSI"` or `"EUCAST"` as input will automatically select the latest version of that guideline. #' @name plot @@ -62,7 +65,7 @@ NULL #' @method plot mic -#' @importFrom graphics barplot axis mtext +#' @importFrom graphics barplot axis mtext legend #' @export #' @rdname plot plot.mic <- function(x, @@ -89,13 +92,13 @@ plot.mic <- function(x, x <- plot_prepare_table(x, expand = expand) - cols_sub <- plot_colours_and_sub(x = x, - mo = mo, - ab = ab, - guideline = guideline, - colours_RSI = colours_RSI, - fn = as.mic, - ...) + cols_sub <- plot_colours_subtitle_guideline(x = x, + mo = mo, + ab = ab, + guideline = guideline, + colours_RSI = colours_RSI, + fn = as.mic, + ...) barplot(x, col = cols_sub$cols, @@ -117,7 +120,7 @@ plot.mic <- function(x, legend_col <- colours_RSI[2] } if (colours_RSI[3] %in% cols_sub$cols) { - legend_txt <- c(legend_txt, "Incr. exposure") + legend_txt <- c(legend_txt, plot_name_of_I(cols_sub$guideline)) legend_col <- c(legend_col, colours_RSI[3]) } if (colours_RSI[1] %in% cols_sub$cols) { @@ -194,21 +197,21 @@ ggplot.mic <- function(data, title <- gsub(" +", " ", paste0(title, collapse = " ")) x <- plot_prepare_table(data, expand = expand) - cols_sub <- plot_colours_and_sub(x = x, - mo = mo, - ab = ab, - guideline = guideline, - colours_RSI = colours_RSI, - fn = as.mic, - ...) + cols_sub <- plot_colours_subtitle_guideline(x = x, + mo = mo, + ab = ab, + guideline = guideline, + colours_RSI = colours_RSI, + fn = as.mic, + ...) df <- as.data.frame(x, stringsAsFactors = TRUE) colnames(df) <- c("mic", "count") df$cols <- cols_sub$cols df$cols[df$cols == colours_RSI[1]] <- "Resistant" df$cols[df$cols == colours_RSI[2]] <- "Susceptible" - df$cols[df$cols == colours_RSI[3]] <- "Incr. exposure" + df$cols[df$cols == colours_RSI[3]] <- plot_name_of_I(cols_sub$guideline) df$cols <- factor(df$cols, - levels = c("Susceptible", "Incr. exposure", "Resistant"), + levels = c("Susceptible", plot_name_of_I(cols_sub$guideline), "Resistant"), ordered = TRUE) if (!is.null(mapping)) { p <- ggplot2::ggplot(df, mapping = mapping) @@ -218,10 +221,11 @@ ggplot.mic <- function(data, if (any(colours_RSI %in% cols_sub$cols)) { p <- p + - ggplot2::geom_col(aes(x = mic, y = count, fill = cols)) + + ggplot2::geom_col(ggplot2::aes(x = mic, y = count, fill = cols)) + ggplot2::scale_fill_manual(values = c("Resistant" = colours_RSI[1], "Susceptible" = colours_RSI[2], - "Incr. exposure" = colours_RSI[3]),, + "Incr. exposure" = colours_RSI[3], + "Intermediate" = colours_RSI[3]), name = NULL) } else { p <- p + @@ -235,7 +239,7 @@ ggplot.mic <- function(data, #' @method plot disk #' @export -#' @importFrom graphics barplot axis mtext +#' @importFrom graphics barplot axis mtext legend #' @rdname plot plot.disk <- function(x, main = paste("Disk zones values of", deparse(substitute(x))), @@ -261,13 +265,13 @@ plot.disk <- function(x, x <- plot_prepare_table(x, expand = expand) - cols_sub <- plot_colours_and_sub(x = x, - mo = mo, - ab = ab, - guideline = guideline, - colours_RSI = colours_RSI, - fn = as.disk, - ...) + cols_sub <- plot_colours_subtitle_guideline(x = x, + mo = mo, + ab = ab, + guideline = guideline, + colours_RSI = colours_RSI, + fn = as.disk, + ...) barplot(x, col = cols_sub$cols, @@ -289,7 +293,7 @@ plot.disk <- function(x, legend_col <- colours_RSI[1] } if (colours_RSI[3] %in% cols_sub$cols) { - legend_txt <- c(legend_txt, "Incr. exposure") + legend_txt <- c(legend_txt, plot_name_of_I(cols_sub$guideline)) legend_col <- c(legend_col, colours_RSI[3]) } if (colours_RSI[2] %in% cols_sub$cols) { @@ -367,21 +371,21 @@ ggplot.disk <- function(data, title <- gsub(" +", " ", paste0(title, collapse = " ")) x <- plot_prepare_table(data, expand = expand) - cols_sub <- plot_colours_and_sub(x = x, - mo = mo, - ab = ab, - guideline = guideline, - colours_RSI = colours_RSI, - fn = as.disk, - ...) + cols_sub <- plot_colours_subtitle_guideline(x = x, + mo = mo, + ab = ab, + guideline = guideline, + colours_RSI = colours_RSI, + fn = as.disk, + ...) df <- as.data.frame(x, stringsAsFactors = TRUE) colnames(df) <- c("disk", "count") df$cols <- cols_sub$cols df$cols[df$cols == colours_RSI[1]] <- "Resistant" df$cols[df$cols == colours_RSI[2]] <- "Susceptible" - df$cols[df$cols == colours_RSI[3]] <- "Incr. exposure" + df$cols[df$cols == colours_RSI[3]] <- plot_name_of_I(cols_sub$guideline) df$cols <- factor(df$cols, - levels = c("Resistant", "Incr. exposure", "Susceptible"), + levels = c("Resistant", plot_name_of_I(cols_sub$guideline), "Susceptible"), ordered = TRUE) if (!is.null(mapping)) { p <- ggplot2::ggplot(df, mapping = mapping) @@ -394,7 +398,8 @@ ggplot.disk <- function(data, ggplot2::geom_col(aes(x = disk, y = count, fill = cols)) + ggplot2::scale_fill_manual(values = c("Resistant" = colours_RSI[1], "Susceptible" = colours_RSI[2], - "Incr. exposure" = colours_RSI[3]), + "Incr. exposure" = colours_RSI[3], + "Intermediate" = colours_RSI[3]), name = NULL) } else { p <- p + @@ -402,7 +407,7 @@ ggplot.disk <- function(data, } p + - ggplot2::labs(title = title, x = xlab, y = ylab, sub = cols_sub$sub) + ggplot2::labs(title = title, x = xlab, y = ylab, subtitle = cols_sub$sub) } plot_prepare_table <- function(x, expand) { @@ -413,7 +418,9 @@ plot_prepare_table <- function(x, expand) { while (min(extra_range) / 2 > min(as.double(x))) { extra_range <- c(min(extra_range) / 2, extra_range) } - extra_range <- setNames(rep(0, length(extra_range)), extra_range) + nms <- extra_range + extra_range <- rep(0, length(extra_range)) + names(extra_range) <- nms x <- table(droplevels(x, as.mic = FALSE)) extra_range <- extra_range[!names(extra_range) %in% names(x)] x <- as.table(c(x, extra_range)) @@ -437,12 +444,22 @@ plot_prepare_table <- function(x, expand) { as.table(x) } -plot_colours_and_sub <- function(x, mo, ab, guideline, colours_RSI, fn, ...) { +plot_name_of_I <- function(guideline) { + if (!guideline %like% "CLSI" && as.double(gsub("[^0-9]+", "", guideline)) >= 2019) { + # interpretation since 2019 + "Incr. exposure" + } else { + # interpretation until 2019 + "Intermediate" + } +} + +plot_colours_subtitle_guideline <- function(x, mo, ab, guideline, colours_RSI, fn, ...) { + guideline <- get_guideline(guideline, AMR::rsi_translation) if (!is.null(mo) && !is.null(ab)) { # interpret and give colour based on MIC values mo <- as.mo(mo) ab <- as.ab(ab) - guideline <- get_guideline(guideline, AMR::rsi_translation) rsi <- suppressWarnings(suppressMessages(as.rsi(fn(names(x)), mo = mo, ab = ab, guideline = guideline, ...))) cols <- character(length = length(rsi)) cols[is.na(rsi)] <- "#BEBEBE" @@ -454,16 +471,16 @@ plot_colours_and_sub <- function(x, mo, ab, guideline, colours_RSI, fn, ...) { if (all(cols == "#BEBEBE")) { message_("No ", guideline, " interpretations found for ", ab_name(ab, language = NULL, tolower = TRUE), " in ", moname) - guideline <- "" + guideline_txt <- "" } else { - guideline <- paste0("(following ", guideline, ")") + guideline_txt <- paste0("(following ", guideline, ")") } - sub <- bquote(.(abname)~"in"~italic(.(moname))~.(guideline)) + sub <- bquote(.(abname)~"in"~italic(.(moname))~.(guideline_txt)) } else { cols <- "#BEBEBE" sub <- NULL } - list(cols = cols, sub = sub) + list(cols = cols, sub = sub, guideline = guideline) } diff --git a/data-raw/AMR_latest.tar.gz b/data-raw/AMR_latest.tar.gz index e22f2bc1..4dd2b077 100644 Binary files a/data-raw/AMR_latest.tar.gz and b/data-raw/AMR_latest.tar.gz differ diff --git a/docs/404.html b/docs/404.html index 5b6d4cbb..e711daef 100644 --- a/docs/404.html +++ b/docs/404.html @@ -81,7 +81,7 @@
diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 2380f47f..f67dc4fb 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -81,7 +81,7 @@ diff --git a/docs/articles/AMR.html b/docs/articles/AMR.html index 3b22530a..36c71043 100644 --- a/docs/articles/AMR.html +++ b/docs/articles/AMR.html @@ -39,7 +39,7 @@ @@ -193,7 +193,7 @@vignettes/AMR.Rmd
AMR.Rmd
Note: values on this page will change with every website update since they are based on randomly created values and the page was written in R Markdown. However, the methodology remains unchanged. This page was generated on 09 February 2021.
+Note: values on this page will change with every website update since they are based on randomly created values and the page was written in R Markdown. However, the methodology remains unchanged. This page was generated on 25 February 2021.
For completeness, we can also add the hospital where the patients was admitted and we need to define valid antibmicrobial results for our randomisation:
- -Using the sample()
function, we can randomly select items from all objects we defined earlier. To let our fake data reflect reality a bit, we will also approximately define the probabilities of bacteria and the antibiotic results with the prob
parameter.
+Using the
+sample()
function, we can randomly select items from all objects we defined earlier. To let our fake data reflect reality a bit, we will also approximately define the probabilities of bacteria and the antibiotic results, using therandom_rsi()
function.+ AMX = random_rsi(sample_size, prob_RSI = c(0.35, 0.60, 0.05)), + AMC = random_rsi(sample_size, prob_RSI = c(0.15, 0.75, 0.10)), + CIP = random_rsi(sample_size, prob_RSI = c(0.20, 0.80, 0.00)), + GEN = random_rsi(sample_size, prob_RSI = c(0.08, 0.92, 0.00)))sample_size <- 20000 data <- data.frame(date = sample(dates, size = sample_size, replace = TRUE), patient_id = sample(patients, size = sample_size, replace = TRUE), - hospital = sample(hospitals, size = sample_size, replace = TRUE, + hospital = sample(c("Hospital A", + "Hospital B", + "Hospital C", + "Hospital D"), + size = sample_size, replace = TRUE, prob = c(0.30, 0.35, 0.15, 0.20)), bacteria = sample(bacteria, size = sample_size, replace = TRUE, prob = c(0.50, 0.25, 0.15, 0.10)), - AMX = sample(ab_interpretations, size = sample_size, replace = TRUE, - prob = c(0.60, 0.05, 0.35)), - AMC = sample(ab_interpretations, size = sample_size, replace = TRUE, - prob = c(0.75, 0.10, 0.15)), - CIP = sample(ab_interpretations, size = sample_size, replace = TRUE, - prob = c(0.80, 0.00, 0.20)), - GEN = sample(ab_interpretations, size = sample_size, replace = TRUE, - prob = c(0.92, 0.00, 0.08)))
Using the left_join()
function from the dplyr
package, we can ‘map’ the gender to the patient ID using the patients_table
object we created earlier:
+data <- data %>% left_join(patients_table)
The resulting data set contains 20,000 blood culture isolates. With the
-head()
function we can preview the first 6 rows of this data set:+head(data)
@@ -352,31 +344,53 @@ - 2014-07-31 -F7 -Hospital B +2014-07-28 +B5 +Hospital A Escherichia coli -R +S S S S M - +2017-11-01 -W10 +2017-09-25 +X1 Hospital B -Escherichia coli +Klebsiella pneumoniae +R +I S S +F ++ +2017-04-18 +V9 +Hospital D +Klebsiella pneumoniae +I +S +S +S +F ++ 2016-06-06 +Z2 +Hospital A +Staphylococcus aureus +R +S S S F - 2013-06-17 -T1 -Hospital D +2016-01-08 +V6 +Hospital C Klebsiella pneumoniae S S @@ -385,35 +399,13 @@F - -2012-12-13 -Q2 -Hospital C -Streptococcus pneumoniae -R -R -S -S -F -- -2010-01-01 -O10 +2015-03-14 +Y2 Hospital B Staphylococcus aureus -R S S S -F -- @@ -427,7 +419,7 @@ Cleaning the data2010-07-24 -V5 -Hospital A -Staphylococcus aureus -S -S -R S F We also created a package dedicated to data cleaning and checking, called the
cleaner
package. Itfreq()
function can be used to create frequency tables.For example, for the
-gender
variable:+data %>% freq(gender)
Frequency table
Class: character
@@ -449,16 +441,16 @@ Longest: 11 M -10,335 -51.68% -10,335 -51.68% +10,502 +52.51% +10,502 +52.51% @@ -466,23 +458,23 @@ Longest: 1 2 F -9,665 -48.33% +9,498 +47.49% 20,000 100.00% So, we can draw at least two conclusions immediately. From a data scientists perspective, the data looks clean: only values
M
andF
. From a researchers perspective: there are slightly more men. Nothing we didn’t already know.The data is already quite clean, but we still need to transform some variables. The
-bacteria
column now consists of text, and we want to add more variables based on microbial IDs later on. So, we will transform this column to valid IDs. Themutate()
function of thedplyr
package makes this really easy:+We also want to transform the antibiotics, because in real life data we don’t know if they are really clean. The
-as.rsi()
function ensures reliability and reproducibility in these kind of variables. Themutate_at()
will run theas.rsi()
function on defined variables:+Finally, we will apply EUCAST rules on our antimicrobial results. In Europe, most medical microbiological laboratories already apply these rules. Our package features their latest insights on intrinsic resistance and exceptional phenotypes. Moreover, the
eucast_rules()
function can also apply additional rules, like forcingampicillin = R whenamoxicillin/clavulanic acid = R.Because the amoxicillin (column
-AMX
) and amoxicillin/clavulanic acid (columnAMC
) in our data were generated randomly, some rows will undoubtedly contain AMX = S and AMC = R, which is technically impossible. Theeucast_rules()
fixes this:+data <- eucast_rules(data, col_mo = "bacteria", rules = "all")
Adding new variables
Now that we have the microbial ID, we can add some taxonomic properties:
-+data <- data %>% mutate(gramstain = mo_gramstain(bacteria), genus = mo_genus(bacteria), @@ -497,25 +489,25 @@ Longest: 1
(…) When preparing a cumulative antibiogram to guide clinical decisions about empirical antimicrobial therapy of initial infections, only the first isolate of a given species per patient, per analysis period (eg, one year) should be included, irrespective of body site, antimicrobial susceptibility profile, or other phenotypical characteristics (eg, biotype). The first isolate is easily identified, and cumulative antimicrobial susceptibility test data prepared using the first isolate are generally comparable to cumulative antimicrobial susceptibility test data calculated by other methods, providing duplicate isolates are excluded.
M39-A4 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition. CLSI, 2014. Chapter 6.4This
-AMR
package includes this methodology with thefirst_isolate()
function. It adopts the episode of a year (can be changed by user) and it starts counting days after every selected isolate. This new variable can easily be added to our data:+data <- data %>% mutate(first = first_isolate()) # NOTE: Using column 'bacteria' as input for `col_mo`. # NOTE: Using column 'date' as input for `col_date`. # NOTE: Using column 'patient_id' as input for `col_patient_id`.
So only 28.3% is suitable for resistance analysis! We can now filter on it with the
-filter()
function, also from thedplyr
package:+data_1st <- data %>% filter(first == TRUE)
For future use, the above two syntaxes can be shortened with the
-filter_first_isolate()
function:+data_1st <- data %>% filter_first_isolate()
First weighted isolates
-We made a slight twist to the CLSI algorithm, to take into account the antimicrobial susceptibility profile. Have a look at all E. coli isolates of patient N1, sorted on date:
+We made a slight twist to the CLSI algorithm, to take into account the antimicrobial susceptibility profile. Have a look at all E. coli isolates of patient B7, sorted on date:
-
isolate @@ -531,10 +523,10 @@ Longest: 11 -2010-02-03 -N1 +2010-03-16 +B7 B_ESCHR_COLI -I +R S S S @@ -542,21 +534,21 @@ Longest: 12 -2010-04-02 -N1 +2010-05-03 +B7 B_ESCHR_COLI S S -S +R S FALSE 3 -2010-06-23 -N1 +2010-05-16 +B7 B_ESCHR_COLI -S +R S S S @@ -564,41 +556,41 @@ Longest: 14 -2010-06-25 -N1 +2010-07-25 +B7 B_ESCHR_COLI -R -R +S +S S S FALSE - 5 -2010-10-04 -N1 +2010-07-26 +B7 B_ESCHR_COLI -S -S -S -S -FALSE -- +6 -2010-10-11 -N1 -B_ESCHR_COLI -S -S +R +R S R FALSE + 6 +2010-10-14 +B7 +B_ESCHR_COLI +S +S +S +S +FALSE +7 -2010-10-29 -N1 +2010-12-24 +B7 B_ESCHR_COLI R S @@ -608,42 +600,42 @@ Longest: 18 -2010-11-11 -N1 +2011-02-27 +B7 B_ESCHR_COLI S S -R +S S FALSE 9 -2010-12-06 -N1 +2011-09-02 +B7 B_ESCHR_COLI -R +S +S S R -S -FALSE +TRUE 10 -2010-12-13 -N1 +2011-11-08 +B7 B_ESCHR_COLI -R -R +I +I S S FALSE Only 1 isolates are marked as ‘first’ according to CLSI guideline. But when reviewing the antibiogram, it is obvious that some isolates are absolutely different strains and should be included too. This is why we weigh isolates, based on their antibiogram. The
+key_antibiotics()
function adds a vector with 18 key antibiotics: 6 broad spectrum ones, 6 small spectrum for Gram negatives and 6 small spectrum for Gram positives. These can be defined by the user.Only 2 isolates are marked as ‘first’ according to CLSI guideline. But when reviewing the antibiogram, it is obvious that some isolates are absolutely different strains and should be included too. This is why we weigh isolates, based on their antibiogram. The
key_antibiotics()
function adds a vector with 18 key antibiotics: 6 broad spectrum ones, 6 small spectrum for Gram negatives and 6 small spectrum for Gram positives. These can be defined by the user.If a column exists with a name like ‘key(…)ab’ the
-first_isolate()
function will automatically use it and determine the first weighted isolates. Mind the NOTEs in below output:+data <- data %>% mutate(keyab = key_antibiotics()) %>% mutate(first_weighted = first_isolate()) @@ -665,10 +657,10 @@ Longest: 1
1 -2010-02-03 -N1 +2010-03-16 +B7 B_ESCHR_COLI -I +R S S S @@ -677,35 +669,35 @@ Longest: 12 -2010-04-02 -N1 +2010-05-03 +B7 B_ESCHR_COLI S S -S +R S FALSE -FALSE +TRUE 3 -2010-06-23 -N1 +2010-05-16 +B7 B_ESCHR_COLI -S +R S S S FALSE -FALSE +TRUE 4 -2010-06-25 -N1 +2010-07-25 +B7 B_ESCHR_COLI -R -R +S +S S S FALSE @@ -713,32 +705,32 @@ Longest: 1- 5 -2010-10-04 -N1 +2010-07-26 +B7 B_ESCHR_COLI -S -S -S -S -FALSE -TRUE -- +6 -2010-10-11 -N1 -B_ESCHR_COLI -S -S +R +R S R FALSE TRUE + 6 +2010-10-14 +B7 +B_ESCHR_COLI +S +S +S +S +FALSE +TRUE +7 -2010-10-29 -N1 +2010-12-24 +B7 B_ESCHR_COLI R S @@ -749,35 +741,35 @@ Longest: 18 -2010-11-11 -N1 +2011-02-27 +B7 B_ESCHR_COLI S S -R +S S FALSE TRUE 9 -2010-12-06 -N1 +2011-09-02 +B7 B_ESCHR_COLI -R +S +S S R -S -FALSE +TRUE TRUE - 10 -2010-12-13 -N1 +2011-11-08 +B7 B_ESCHR_COLI -R -R +I +I S S FALSE @@ -785,18 +777,18 @@ Longest: 1Instead of 1, now 8 isolates are flagged. In total, 78.2% of all isolates are marked ‘first weighted’ - 50.0% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.
+Instead of 2, now 10 isolates are flagged. In total, 79.1% of all isolates are marked ‘first weighted’ - 50.8% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.
As with
-filter_first_isolate()
, there’s a shortcut for this new algorithm too:+-data_1st <- data %>% filter_first_weighted_isolate()
So we end up with 15,648 isolates for analysis.
+So we end up with 15,821 isolates for analysis.
We can remove unneeded columns:
-+Now our data looks like:
-+head(data_1st)
@@ -833,46 +825,78 @@ Longest: 1 - +4 -2012-12-13 -Q2 -Hospital C -B_STRPT_PNMN +1 +2014-07-28 +B5 +Hospital A +B_ESCHR_COLI +S +S +S +S +M +Gram-negative +Escherichia +coli +TRUE ++ +2 +2017-09-25 +X1 +Hospital B +B_KLBSL_PNMN R +I +S +S +F +Gram-negative +Klebsiella +pneumoniae +TRUE ++ 3 +2017-04-18 +V9 +Hospital D +B_KLBSL_PNMN R S -R +S +S F -Gram-positive -Streptococcus +Gram-negative +Klebsiella pneumoniae TRUE 5 -2010-01-01 -O10 -Hospital B -B_STPHY_AURS +2016-01-08 +V6 +Hospital C +B_KLBSL_PNMN R S S S F -Gram-positive -Staphylococcus -aureus +Gram-negative +Klebsiella +pneumoniae TRUE 6 -2010-07-24 -V5 -Hospital A +2015-03-14 +Y2 +Hospital B B_STPHY_AURS S S -R +S S F Gram-positive @@ -882,47 +906,15 @@ Longest: 1- 7 -2014-07-13 -G3 -Hospital B -B_ESCHR_COLI -R -S -R -R -M -Gram-negative -Escherichia -coli -TRUE -- -8 -2013-07-06 -K1 -Hospital D -B_ESCHR_COLI -S -S -R -S -M -Gram-negative -Escherichia -coli -TRUE -- @@ -1003,7 +995,7 @@ Longest: 249 -2011-07-06 -X4 -Hospital C +2014-12-23 +G8 +Hospital A B_STPHY_AURS -R -R S S -F +S +S +M Gram-positive Staphylococcus aureus @@ -942,15 +934,15 @@ Longest: 1 Dispersion of speciesTo just get an idea how the species are distributed, create a frequency table with our
freq()
function. We created thegenus
andspecies
column earlier based on the microbial ID. Withpaste()
, we can concatenate them together.The
-freq()
function can be used like the base R language was intended:+Or can be used like the
-dplyr
way, which is easier readable:+data_1st %>% freq(genus, species)
Frequency table
Class: character
-Length: 15,648
-Available: 15,648 (100%, NA: 0 = 0%)
+Length: 15,821
+Available: 15,821 (100%, NA: 0 = 0%)
Unique: 4Shortest: 16
@@ -967,33 +959,33 @@ Longest: 24
Longest: 241 Escherichia coli -7,846 -50.14% -7,846 -50.14% +7,994 +50.53% +7,994 +50.53% 2 Staphylococcus aureus -3,871 -24.74% -11,717 -74.88% +3,923 +24.80% +11,917 +75.32% 3 Streptococcus pneumoniae -2,331 -14.90% -14,048 -89.78% +2,321 +14.67% +14,238 +89.99% 4 Klebsiella pneumoniae -1,600 -10.22% -15,648 +1,583 +10.01% +15,821 100.00% Overview of different bug/drug combinations
If you want to get a quick glance of the number of isolates in different bug/drug combinations, you can use the
-bug_drug_combinations()
function:+@@ -1020,55 +1012,55 @@ Longest: 24data_1st %>% bug_drug_combinations() %>% head() # show first 6 rows
E. coli AMX -3764 -263 -3819 -7846 +3815 +239 +3940 +7994 E. coli AMC -6286 -255 -1305 -7846 +6289 +304 +1401 +7994 E. coli CIP -5931 +6086 0 -1915 -7846 +1908 +7994 E. coli GEN -7043 +7158 0 -803 -7846 +836 +7994 K. pneumoniae AMX 0 0 -1600 -1600 +1583 +1583 K. pneumoniae AMC -1270 -64 -266 -1600 +1224 +57 +302 +1583 Using Tidyverse selections, you can also select columns based on the antibiotic class they are in:
-+@@ -1086,34 +1078,34 @@ Longest: 24data_1st %>% select(bacteria, fluoroquinolones()) %>% bug_drug_combinations()
E. coli CIP -5931 +6086 0 -1915 -7846 +1908 +7994 K. pneumoniae CIP -1214 +1215 0 -386 -1600 +368 +1583 S. aureus CIP -2965 +3062 0 -906 -3871 +861 +3923 S. pneumoniae CIP -1801 +1765 0 -530 -2331 +556 +2321
The functions resistance()
and susceptibility()
can be used to calculate antimicrobial resistance or susceptibility. For more specific analyses, the functions proportion_S()
, proportion_SI()
, proportion_I()
, proportion_IR()
and proportion_R()
can be used to determine the proportion of a specific antimicrobial outcome.
As per the EUCAST guideline of 2019, we calculate resistance as the proportion of R (proportion_R()
, equal to resistance()
) and susceptibility as the proportion of S and I (proportion_SI()
, equal to susceptibility()
). These functions can be used on their own:
++# [1] 0.5370078data_1st %>% resistance(AMX) -# [1] 0.5357873
Or can be used in conjuction with group_by()
and summarise()
, both from the dplyr
package:
+@@ -1141,24 +1133,24 @@ Longest: 24data_1st %>% group_by(hospital) %>% summarise(amoxicillin = resistance(AMX))
Hospital A -0.5376208 +0.5410321 Hospital B -0.5299270 +0.5381272 Hospital C -0.5439754 +0.5306981 Hospital D -0.5373087 +0.5338134 Of course it would be very convenient to know the number of isolates responsible for the percentages. For that purpose the
-n_rsi()
can be used, which works exactly liken_distinct()
from thedplyr
package. It counts all isolates available for every group (i.e. values S, I or R):+@@ -256,8 +256,8 @@ minimum = 30, language = get_locale(), nrow = NULL, - colours = c(S = "#61a8ff", SI = "#61a8ff", I = "#61f7ff", IR = "#ff6961", R = - "#ff6961"), + colours = c(S = "#3CAEA3", SI = "#3CAEA3", I = "#F6D55C", IR = "#ED553B", R = + "#ED553B"), datalabels = TRUE, datalabels.size = 2.5, datalabels.colour = "grey15", @@ -362,7 +362,7 @@data_1st %>% group_by(hospital) %>% summarise(amoxicillin = resistance(AMX), @@ -1173,28 +1165,28 @@ Longest: 24
Hospital A -0.5376208 -4758 +0.5410321 +4728 Hospital B -0.5299270 -5480 +0.5381272 +5521 Hospital C -0.5439754 -2274 +0.5306981 +2378 Hospital D -0.5373087 -3136 +0.5338134 +3194 These functions can also be used to get the proportion of multiple antibiotics, to calculate empiric susceptibility of combination therapies very easily:
-@@ -1913,13 +1915,13 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/+data_1st %>% group_by(genus) %>% summarise(amoxiclav = susceptibility(AMC), @@ -1211,32 +1203,32 @@ Longest: 24
Escherichia -0.8336732 -0.8976549 -0.9850879 +0.8247436 +0.8954216 +0.9836127 Klebsiella -0.8337500 -0.9050000 -0.9812500 +0.8092230 +0.9001895 +0.9823121 Staphylococcus -0.8235598 -0.9227590 -0.9842418 +0.8179964 +0.9166454 +0.9836860 Streptococcus -0.5456885 +0.5489013 0.0000000 -0.5456885 +0.5489013 To make a transition to the next part, let’s see how this difference could be plotted:
-+data_1st %>% group_by(genus) %>% summarise("1. Amoxi/clav" = susceptibility(AMC), @@ -1255,7 +1247,7 @@ Longest: 24
Plots
To show results in plots, most R users would nowadays use the
-ggplot2
package. This package lets you create plots in layers. You can read more about it on their website. A quick example would look like these syntaxes:+ freq(gender)+ggplot(data = a_data_set, mapping = aes(x = year, y = value)) + @@ -1269,13 +1261,13 @@ Longest: 24 ggplot(a_data_set) + geom_bar(aes(year))
The
-AMR
package contains functions to extend thisggplot2
package, for examplegeom_rsi()
. It automatically transforms data withcount_df()
orproportion_df()
and show results in stacked bars. Its simplest and shortest example:+Omit the
translate_ab = FALSE
to have the antibiotic codes (AMX, AMC, CIP, GEN) translated to official WHO names (amoxicillin, amoxicillin/clavulanic acid, ciprofloxacin, gentamicin).If we group on e.g. the
-genus
column and add some additional functions from our package, we can create this:-+# group the data on `genus` ggplot(data_1st %>% group_by(genus)) + # create bars with genus on x axis @@ -1284,7 +1276,7 @@ Longest: 24 geom_rsi(x = "genus") + # split plots on antibiotic facet_rsi(facet = "antibiotic") + - # set colours to the R/SI interpretations + # set colours to the R/SI interpretations (colour-blind friendly) scale_rsi_colours() + # show percentages on y axis scale_y_percent(breaks = 0:4 * 25) + @@ -1298,7 +1290,7 @@ Longest: 24 theme(axis.text.y = element_text(face = "italic"))
To simplify this, we also created the
-ggplot_rsi()
function, which combines almost all above functions:++data_1st %>% group_by(genus) %>% ggplot_rsi(x = "genus", @@ -1307,13 +1299,79 @@ Longest: 24 datalabels = FALSE) + coord_flip()
++Plotting MIC and disk diffusion values
+The AMR package also extends the
+plot()
andggplot()
functions for plotting minimum inhibitory concentrations (MIC, created withas.mic()
) and disk diffusion diameters (created withas.disk()
).With the
+random_mic()
andrandom_disk()
functions, we can generate sampled values for the new data types (S3 classes)<mic>
and<disk>
:++mic_values <- random_mic(size = 100) +mic_values +# Class <mic> +# [1] 128 64 0.5 8 64 0.125 16 64 8 2 +# [11] 0.0625 8 1 4 0.25 0.5 0.5 32 1 1 +# [21] 0.25 8 8 0.0625 16 16 2 64 0.125 8 +# [31] 1 0.125 4 0.125 32 16 0.25 1 0.5 128 +# [41] 32 128 64 1 0.125 32 0.125 0.125 8 0.125 +# [51] 64 8 8 0.0625 0.25 0.5 2 2 128 0.25 +# [61] 4 1 1 0.25 64 0.5 32 0.25 16 2 +# [71] 16 8 32 4 0.25 16 128 32 128 4 +# [81] 64 1 0.25 0.5 1 1 4 8 16 16 +# [91] 32 0.5 1 0.125 2 128 0.5 1 4 64
+ ++# base R: +plot(mic_values)
+ ++# ggplot2: +ggplot(mic_values)
But we could also be more specific, by generating MICs that are likely to be found in E. coli for ciprofloxacin:
+++# this will generate MICs that are likely to be found in E. coli for ciprofloxacin: +mic_values_specific <- random_mic(size = 100, mo = "E. coli", ab = "cipro")
For the
+plot()
andggplot()
function, we can define the microorganism and an antimicrobial agent the same way. This will add the interpretation of those values according to a chosen guidelines (defaults to the latest EUCAST guideline).Default colours are colour-blind friendly, while maintaining the convention that e.g. ‘susceptible’ should be green and ‘resistant’ should be red:
++ ++# base R: +plot(mic_values_specific, mo = "E. coli", ab = "cipro")
+ ++# ggplot2: +ggplot(mic_values_specific, mo = "E. coli", ab = "cipro")
For disk diffusion values, there is not much of a difference in plotting:
+++# this will generate disks that are likely to be found in E. coli for ciprofloxacin: +disk_values_specific <- random_disk(size = 100, mo = "E. coli", ab = "cipro") +# NOTE: Translation to one microorganism was guessed with uncertainty. Use +# `mo_uncertainties()` to review it. +disk_values_specific +# Class <disk> +# [1] 20 29 20 26 27 22 18 21 29 23 18 19 29 26 27 24 20 20 23 20 18 25 31 31 29 +# [26] 28 27 31 18 21 18 26 30 20 21 27 24 27 19 24 17 26 24 31 30 28 19 30 19 21 +# [51] 17 19 22 19 23 23 19 20 28 31 31 30 20 17 21 31 29 29 27 21 29 27 26 18 21 +# [76] 29 21 26 29 22 24 18 22 29 30 17 19 23 26 31 17 18 18 28 30 25 22 21 27 17
+ ++# base R: +plot(disk_values_specific, mo = "E. coli", ab = "cipro")
And when using the
+ggplot2
package, but now choosing the latest implemented CLSI guideline (notice that the EUCAST-specific term “Incr. exposure” has changed to “Intermediate”):+ ++# and ggplot2, but now choosing an old CLSI guideline: +ggplot(disk_values_specific, + mo = "E. coli", + ab = "cipro", + guideline = "CLSI")
@@ -236,9 +236,9 @@ Source:Independence test
The next example uses the
example_isolates
data set. This is a data set included with this package and contains 2,000 microbial isolates with their full antibiograms. It reflects reality and can be used to practice AMR data analysis.We will compare the resistance to fosfomycin (column
-FOS
) in hospital A and D. The input for thefisher.test()
can be retrieved with a transformation like this:diff --git a/docs/news/index.html b/docs/news/index.html index ee54c82c..eaa4cff7 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -81,7 +81,7 @@+# use package 'tidyr' to pivot data: library(tidyr) @@ -1332,7 +1390,7 @@ Longest: 24 # [1,] 25 77 # [2,] 24 33
We can apply the test now with:
-diff --git a/docs/extra.css b/docs/extra.css index 0ebc198f..a0e2a435 100644 --- a/docs/extra.css +++ b/docs/extra.css @@ -187,30 +187,30 @@ div[id^=last-updated] h2 { } /* tables, make them look like scientific ones */ -table { +.table { font-size: 90%; } -table * { +.table * { vertical-align: middle !important; } -table td { +.table td { padding: 4px !important; } -thead { +.table thead { border-top: 2px solid black; border-bottom: 2px solid black; } -thead ~ tbody { +.table thead ~ tbody { /* only when it has a header */ border-bottom: 2px solid black; } -thead th { +.table thead th { text-align: inherit; } -table a:not(.btn), .table a:not(.btn) { +.table a:not(.btn) { text-decoration: inherit; } -table a:not(.btn):hover, .table a:not(.btn):hover { +.table a:not(.btn):hover { text-decoration: underline; } diff --git a/docs/index.html b/docs/index.html index b1bbcd3e..43bfb2c4 100644 --- a/docs/index.html +++ b/docs/index.html @@ -43,7 +43,7 @@+diff --git a/docs/authors.html b/docs/authors.html index 5cb562e5..3e35632f 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -81,7 +81,7 @@# do Fisher's Exact Test fisher.test(check_FOS) # diff --git a/docs/articles/AMR_files/figure-html/plot 1-1.png b/docs/articles/AMR_files/figure-html/plot 1-1.png index fac59d74..29ba2535 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 1-1.png and b/docs/articles/AMR_files/figure-html/plot 1-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 3-1.png b/docs/articles/AMR_files/figure-html/plot 3-1.png index 1078fd51..ab08612c 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 3-1.png and b/docs/articles/AMR_files/figure-html/plot 3-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 4-1.png b/docs/articles/AMR_files/figure-html/plot 4-1.png index c096fea4..76f329c0 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 4-1.png and b/docs/articles/AMR_files/figure-html/plot 4-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 5-1.png b/docs/articles/AMR_files/figure-html/plot 5-1.png index 8134a60a..83c570d7 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 5-1.png and b/docs/articles/AMR_files/figure-html/plot 5-1.png differ diff --git a/docs/articles/AMR_files/figure-html/unnamed-chunk-13-1.png b/docs/articles/AMR_files/figure-html/unnamed-chunk-13-1.png new file mode 100644 index 00000000..7f506b40 Binary files /dev/null and b/docs/articles/AMR_files/figure-html/unnamed-chunk-13-1.png differ diff --git a/docs/articles/AMR_files/figure-html/unnamed-chunk-13-2.png b/docs/articles/AMR_files/figure-html/unnamed-chunk-13-2.png new file mode 100644 index 00000000..61f95564 Binary files /dev/null and b/docs/articles/AMR_files/figure-html/unnamed-chunk-13-2.png differ diff --git a/docs/articles/AMR_files/figure-html/unnamed-chunk-15-1.png b/docs/articles/AMR_files/figure-html/unnamed-chunk-15-1.png new file mode 100644 index 00000000..a7c32bcd Binary files /dev/null and b/docs/articles/AMR_files/figure-html/unnamed-chunk-15-1.png differ diff --git a/docs/articles/AMR_files/figure-html/unnamed-chunk-15-2.png b/docs/articles/AMR_files/figure-html/unnamed-chunk-15-2.png new file mode 100644 index 00000000..7c46433a Binary files /dev/null and b/docs/articles/AMR_files/figure-html/unnamed-chunk-15-2.png differ diff --git a/docs/articles/AMR_files/figure-html/unnamed-chunk-17-1.png b/docs/articles/AMR_files/figure-html/unnamed-chunk-17-1.png new file mode 100644 index 00000000..1d0c5d57 Binary files /dev/null and b/docs/articles/AMR_files/figure-html/unnamed-chunk-17-1.png differ diff --git a/docs/articles/AMR_files/figure-html/unnamed-chunk-18-1.png b/docs/articles/AMR_files/figure-html/unnamed-chunk-18-1.png new file mode 100644 index 00000000..6cd717bc Binary files /dev/null and b/docs/articles/AMR_files/figure-html/unnamed-chunk-18-1.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html index f038a464..af4e1b72 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -81,7 +81,7 @@
NEWS.md
-+ freq(mo_genus(mo))-AMR 1.5.0.9025 Unreleased +
+@@ -1265,7 +1267,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/+AMR 1.5.0.9026 Unreleased
@@ -333,6 +333,8 @@ Staphylococcus cornubiensis is now correctly categorised as coagulase-positive
+ random_disk()
andrandom_mic()
now have an expanded range in their randomisationSupport for GISA (glycopeptide-intermediate S. aureus), so e.g. mo_genus("GISA")
will return"Staphylococcus"
+@@ -659,7 +661,7 @@Making this package independent of especially the tidyverse (e.g. packages
dplyr
andtidyr
) tremendously increases sustainability on the long term, since tidyverse functions change quite often. Good for users, but hard for package maintainers. Most of our functions are replaced with versions that only rely on base R, which keeps this package fully functional for many years to come, without requiring a lot of maintenance to keep up with other packages anymore. Another upside it that this package can now be used with all versions of R since R-3.0.0 (April 2013). Our package is being used in settings where the resources are very limited. Fewer dependencies on newer software is helpful for such settings.Negative effects of this change are:
-
- Function
+freq()
that was borrowed from thecleaner
package was removed. Usecleaner::freq()
, or runlibrary("cleaner")
before you usefreq()
.- Function
freq()
that was borrowed from thecleaner
package was removed. Usecleaner::freq()
, or runlibrary("cleaner")
before you usefreq()
.Printing values of classmo
orrsi
in a tibble will no longer be in colour and printingrsi
in a tibble will show the class<ord>
, not<rsi>
anymore. This is purely a visual effect.All functions from themo_*
family (likemo_name()
andmo_gramstain()
) are noticeably slower when running on hundreds of thousands of rows.- For developers: classes
@@ -996,7 +998,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/ #> invalid microorganism code, NA generatedmo
andab
now both also inherit classcharacter
, to support any data transformation. This change invalidates code that checks for class length == 1.This is important, because a value like
-"testvalue"
could never be understood by e.g.mo_name()
, although the class would suggest a valid microbial code.+ Function
freq()
has moved to a new package,clean
(CRAN link), since creating frequency tables actually does not fit the scope of this package. Thefreq()
function still works, since it is re-exported from theclean
package (which will be installed automatically upon updating thisAMR
package).Function
freq()
has moved to a new package,clean
(CRAN link), since creating frequency tables actually does not fit the scope of this package. Thefreq()
function still works, since it is re-exported from theclean
package (which will be installed automatically upon updating thisAMR
package).Renamed data set
septic_patients
toexample_isolates
The age()
function gained a new argumentexact
to determine ages with decimalsRemoved deprecated functions -guess_mo()
,guess_atc()
,EUCAST_rules()
,interpretive_reading()
,rsi()
Frequency tables ( freq()
): +Frequency tables ( @@ -1290,7 +1292,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/freq()
):
speed improvement for microbial IDs
- @@ -1275,12 +1277,12 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
fixed factor level names for R Markdown
Added ceftazidim intrinsic resistance to Streptococci Changed default settings for -age_groups()
, to let groups of fives and tens end with 100+ instead of 120+Fix for freq()
for when all values areNA
+Fix for freq()
for when all values areNA
Fix for first_isolate()
for when dates are missingImproved speed of -guess_ab_col()
@@ -1531,7 +1533,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/Frequency tables ( freq()
function): +Frequency tables ( freq()
function):
Support for tidyverse quasiquotation! Now you can create frequency tables of function outcomes:
@@ -1541,15 +1543,15 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/ # OLD WAY septic_patients %>% mutate(genus = mo_genus(mo)) %>% - freq(genus) + freq(genus) # NEW WAY septic_patients %>% - freq(mo_genus(mo)) + freq(mo_genus(mo)) # Even supports grouping variables: septic_patients %>% group_by(gender) %>% - freq(mo_genus(mo))Header info is now available as a list, with the
header
function@@ -1632,7 +1634,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/ The argument
header
is now set toTRUE
at default, even for markdownUsing
portion_*
functions now throws a warning when total available isolate is below argumentminimum
Functions
as.mo
,as.rsi
,as.mic
,as.atc
andfreq
will not set package name as attribute anymore- Frequency tables -
+freq()
:Frequency tables -
freq()
:
Support for grouping variables, test with:
@@ -1640,14 +1642,14 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/septic_patients %>% group_by(hospital_id) %>% - freq(gender)
Support for (un)selecting columns:
@@ -1665,7 +1667,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/ Check for
hms::is.hms
Removed diacritics from all authors (columns
microorganisms$ref
andmicroorganisms.old$ref
) to comply with CRAN policy to only allow ASCII charactersFix for
mo_property
not working properly- Fix for
eucast_rules
where some Streptococci would become ceftazidime R in EUCAST rule 4.5+ Support for named vectors of class
mo
, useful fortop_freq()
Support for named vectors of class
mo
, useful fortop_freq()
ggplot_rsi
andscale_y_percent
havebreaks
argumentAI improvements for
@@ -1833,13 +1835,13 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/ +freq(my_matrix)as.mo
:For lists, subsetting is possible:
+my_list %>% freq(age) +my_list %>% freq(gender)my_list = list(age = septic_patients$age, gender = septic_patients$gender) -my_list %>% freq(age) -my_list %>% freq(gender)
- A vignette to explain its usage
- Support for
-rsi
(antimicrobial resistance) to use as input- Support for
table
to use as input:freq(table(x, y))
+- Support for
table
to use as input:freq(table(x, y))
- Support for existing functions
hist
andplot
to use a frequency table as input:hist(freq(df$age))
- Support for
-as.vector
,as.data.frame
,as_tibble
andformat
- Support for quasiquotation:
freq(mydata, mycolumn)
is the same asmydata %>% freq(mycolumn)
+- Support for quasiquotation:
freq(mydata, mycolumn)
is the same asmydata %>% freq(mycolumn)
- Function
top_freq
function to return the top/below n items as vector- Header of frequency tables now also show Mean Absolute Deviaton (MAD) and Interquartile Range (IQR)
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 77c9a852..6e8c2411 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -12,7 +12,7 @@ articles: datasets: datasets.html resistance_predict: resistance_predict.html welcome_to_AMR: welcome_to_AMR.html -last_built: 2021-02-25T09:21Z +last_built: 2021-02-25T11:30Z urls: reference: https://msberends.github.io/AMR//reference article: https://msberends.github.io/AMR//articles diff --git a/docs/reference/ggplot_rsi.html b/docs/reference/ggplot_rsi.html index 88a8daa4..22a4efab 100644 --- a/docs/reference/ggplot_rsi.html +++ b/docs/reference/ggplot_rsi.html @@ -82,7 +82,7 @@colours -+ a named vector with colours for the bars. The names must be one or more of: S, SI, I, IR, R or be
FALSE
for standard ggplot2 colours. The default colours are colour-blind friendly.a named vector with colours for the bars. The names must be one or more of: S, SI, I, IR, R or be
FALSE
for standard ggplot2 colours. The default colours are colour-blind friendly, while maintaining the convention that e.g. 'susceptible' should be green and 'resistant' should be red.datalabels @@ -484,11 +484,6 @@ The lifecycle of this function is maturing< CIP) %>% ggplot_rsi(x = "age_group") -# for colourblind mode, use divergent colours from the viridis package: -example_isolates %>% - select(AMX, NIT, FOS, TMP, CIP) %>% - ggplot_rsi() + - scale_fill_viridis_d() # a shorter version which also adjusts data label colours: example_isolates %>% select(AMX, NIT, FOS, TMP, CIP) %>% diff --git a/docs/reference/index.html b/docs/reference/index.html index 973a9272..1009a227 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -81,7 +81,7 @@ diff --git a/docs/reference/plot.html b/docs/reference/plot.html index b7a1516b..f5a0f234 100644 --- a/docs/reference/plot.html +++ b/docs/reference/plot.html @@ -82,7 +82,7 @@ @@ -313,7 +313,7 @@
- x +x, data MIC values created with
as.mic()
or disk diffusion values created withas.disk()
@@ -359,7 +359,8 @@ The
ggplot
functions return aggplot
model that is extendible with anyggplot2
function.Details
-For interpreting MIC values as well as disk diffusion diameters, supported guidelines to be used as input for the
+guideline
argument are: "EUCAST 2021", "EUCAST 2020", "EUCAST 2019", "EUCAST 2018", "EUCAST 2017", "EUCAST 2016", "EUCAST 2015", "EUCAST 2014", "EUCAST 2013", "EUCAST 2012", "EUCAST 2011", "CLSI 2019", "CLSI 2018", "CLSI 2017", "CLSI 2016", "CLSI 2015", "CLSI 2014", "CLSI 2013", "CLSI 2012", "CLSI 2011" and "CLSI 2010".The interpretation of "I" will be named "Increased exposure" for all EUCAST guidelines since 2019, and will be named "Intermediate" in all other cases.
+For interpreting MIC values as well as disk diffusion diameters, supported guidelines to be used as input for the
guideline
argument are: "EUCAST 2021", "EUCAST 2020", "EUCAST 2019", "EUCAST 2018", "EUCAST 2017", "EUCAST 2016", "EUCAST 2015", "EUCAST 2014", "EUCAST 2013", "EUCAST 2012", "EUCAST 2011", "CLSI 2019", "CLSI 2018", "CLSI 2017", "CLSI 2016", "CLSI 2015", "CLSI 2014", "CLSI 2013", "CLSI 2012", "CLSI 2011" and "CLSI 2010".Simply using
"CLSI"
or"EUCAST"
as input will automatically select the latest version of that guideline.Stable Lifecycle
diff --git a/docs/survey.html b/docs/survey.html index 5ec1ebb2..87e4af3a 100644 --- a/docs/survey.html +++ b/docs/survey.html @@ -81,7 +81,7 @@ diff --git a/man/ggplot_rsi.Rd b/man/ggplot_rsi.Rd index 4eb27701..1cc70659 100644 --- a/man/ggplot_rsi.Rd +++ b/man/ggplot_rsi.Rd @@ -24,8 +24,8 @@ ggplot_rsi( minimum = 30, language = get_locale(), nrow = NULL, - colours = c(S = "#61a8ff", SI = "#61a8ff", I = "#61f7ff", IR = "#ff6961", R = - "#ff6961"), + colours = c(S = "#3CAEA3", SI = "#3CAEA3", I = "#F6D55C", IR = "#ED553B", R = + "#ED553B"), datalabels = TRUE, datalabels.size = 2.5, datalabels.colour = "grey15", @@ -100,7 +100,7 @@ labels_rsi_count( \item{nrow}{(when using \code{facet}) number of rows} -\item{colours}{a named vector with colours for the bars. The names must be one or more of: S, SI, I, IR, R or be \code{FALSE} for standard \link[ggplot2:ggplot]{ggplot2} colours. The default colours are colour-blind friendly.} +\item{colours}{a named vector with colours for the bars. The names must be one or more of: S, SI, I, IR, R or be \code{FALSE} for standard \link[ggplot2:ggplot]{ggplot2} colours. The default colours are colour-blind friendly, while maintaining the convention that e.g. 'susceptible' should be green and 'resistant' should be red.} \item{datalabels}{show datalabels using \code{\link[=labels_rsi_count]{labels_rsi_count()}}} @@ -208,11 +208,6 @@ example_isolates \%>\% CIP) \%>\% ggplot_rsi(x = "age_group") -# for colourblind mode, use divergent colours from the viridis package: -example_isolates \%>\% - select(AMX, NIT, FOS, TMP, CIP) \%>\% - ggplot_rsi() + - scale_fill_viridis_d() # a shorter version which also adjusts data label colours: example_isolates \%>\% select(AMX, NIT, FOS, TMP, CIP) \%>\% diff --git a/man/plot.Rd b/man/plot.Rd index 11eedbdf..d8b95d10 100644 --- a/man/plot.Rd +++ b/man/plot.Rd @@ -72,7 +72,7 @@ ) } \arguments{ -\item{x}{MIC values created with \code{\link[=as.mic]{as.mic()}} or disk diffusion values created with \code{\link[=as.disk]{as.disk()}}} +\item{x, data}{MIC values created with \code{\link[=as.mic]{as.mic()}} or disk diffusion values created with \code{\link[=as.disk]{as.disk()}}} \item{main, title}{title of the plot} @@ -99,6 +99,8 @@ The \code{ggplot} functions return a \code{\link[ggplot2:ggplot]{ggplot}} model Functions to plot classes \code{rsi}, \code{mic} and \code{disk}, with support for base R and \code{ggplot2}. } \details{ +The interpretation of "I" will be named "Increased exposure" for all EUCAST guidelines since 2019, and will be named "Intermediate" in all other cases. + For interpreting MIC values as well as disk diffusion diameters, supported guidelines to be used as input for the \code{guideline} argument are: "EUCAST 2021", "EUCAST 2020", "EUCAST 2019", "EUCAST 2018", "EUCAST 2017", "EUCAST 2016", "EUCAST 2015", "EUCAST 2014", "EUCAST 2013", "EUCAST 2012", "EUCAST 2011", "CLSI 2019", "CLSI 2018", "CLSI 2017", "CLSI 2016", "CLSI 2015", "CLSI 2014", "CLSI 2013", "CLSI 2012", "CLSI 2011" and "CLSI 2010". Simply using \code{"CLSI"} or \code{"EUCAST"} as input will automatically select the latest version of that guideline. diff --git a/pkgdown/extra.css b/pkgdown/extra.css index 0ebc198f..a0e2a435 100644 --- a/pkgdown/extra.css +++ b/pkgdown/extra.css @@ -187,30 +187,30 @@ div[id^=last-updated] h2 { } /* tables, make them look like scientific ones */ -table { +.table { font-size: 90%; } -table * { +.table * { vertical-align: middle !important; } -table td { +.table td { padding: 4px !important; } -thead { +.table thead { border-top: 2px solid black; border-bottom: 2px solid black; } -thead ~ tbody { +.table thead ~ tbody { /* only when it has a header */ border-bottom: 2px solid black; } -thead th { +.table thead th { text-align: inherit; } -table a:not(.btn), .table a:not(.btn) { +.table a:not(.btn) { text-decoration: inherit; } -table a:not(.btn):hover, .table a:not(.btn):hover { +.table a:not(.btn):hover { text-decoration: underline; } diff --git a/vignettes/AMR.Rmd b/vignettes/AMR.Rmd index c95cca87..a9b03f2c 100755 --- a/vignettes/AMR.Rmd +++ b/vignettes/AMR.Rmd @@ -111,34 +111,26 @@ bacteria <- c("Escherichia coli", "Staphylococcus aureus", "Streptococcus pneumoniae", "Klebsiella pneumoniae") ``` -## Other variables -For completeness, we can also add the hospital where the patients was admitted and we need to define valid antibmicrobial results for our randomisation: - -```{r create other} -hospitals <- c("Hospital A", "Hospital B", "Hospital C", "Hospital D") -ab_interpretations <- c("S", "I", "R") -``` - ## Put everything together -Using the `sample()` function, we can randomly select items from all objects we defined earlier. To let our fake data reflect reality a bit, we will also approximately define the probabilities of bacteria and the antibiotic results with the `prob` parameter. +Using the `sample()` function, we can randomly select items from all objects we defined earlier. To let our fake data reflect reality a bit, we will also approximately define the probabilities of bacteria and the antibiotic results, using the `random_rsi()` function. ```{r merge data} sample_size <- 20000 data <- data.frame(date = sample(dates, size = sample_size, replace = TRUE), patient_id = sample(patients, size = sample_size, replace = TRUE), - hospital = sample(hospitals, size = sample_size, replace = TRUE, + hospital = sample(c("Hospital A", + "Hospital B", + "Hospital C", + "Hospital D"), + size = sample_size, replace = TRUE, prob = c(0.30, 0.35, 0.15, 0.20)), bacteria = sample(bacteria, size = sample_size, replace = TRUE, prob = c(0.50, 0.25, 0.15, 0.10)), - AMX = sample(ab_interpretations, size = sample_size, replace = TRUE, - prob = c(0.60, 0.05, 0.35)), - AMC = sample(ab_interpretations, size = sample_size, replace = TRUE, - prob = c(0.75, 0.10, 0.15)), - CIP = sample(ab_interpretations, size = sample_size, replace = TRUE, - prob = c(0.80, 0.00, 0.20)), - GEN = sample(ab_interpretations, size = sample_size, replace = TRUE, - prob = c(0.92, 0.00, 0.08))) + AMX = random_rsi(sample_size, prob_RSI = c(0.35, 0.60, 0.05)), + AMC = random_rsi(sample_size, prob_RSI = c(0.15, 0.75, 0.10)), + CIP = random_rsi(sample_size, prob_RSI = c(0.20, 0.80, 0.00)), + GEN = random_rsi(sample_size, prob_RSI = c(0.08, 0.92, 0.00))) ``` Using the `left_join()` function from the `dplyr` package, we can 'map' the gender to the patient ID using the `patients_table` object we created earlier: @@ -443,6 +435,7 @@ data_1st %>% ``` ## Plots + To show results in plots, most R users would nowadays use the `ggplot2` package. This package lets you create plots in layers. You can read more about it [on their website](https://ggplot2.tidyverse.org/). A quick example would look like these syntaxes: ```{r plot 2, eval = FALSE} @@ -480,7 +473,7 @@ ggplot(data_1st %>% group_by(genus)) + geom_rsi(x = "genus") + # split plots on antibiotic facet_rsi(facet = "antibiotic") + - # set colours to the R/SI interpretations + # set colours to the R/SI interpretations (colour-blind friendly) scale_rsi_colours() + # show percentages on y axis scale_y_percent(breaks = 0:4 * 25) + @@ -506,6 +499,65 @@ data_1st %>% coord_flip() ``` +### Plotting MIC and disk diffusion values + +The AMR package also extends the `plot()` and `ggplot()` functions for plotting minimum inhibitory concentrations (MIC, created with `as.mic()`) and disk diffusion diameters (created with `as.disk()`). + +With the `random_mic()` and `random_disk()` functions, we can generate sampled values for the new data types (S3 classes) `` and ` `: + +```{r, results='markup'} +mic_values <- random_mic(size = 100) +mic_values +``` + +```{r} +# base R: +plot(mic_values) +# ggplot2: +ggplot(mic_values) +``` + +But we could also be more specific, by generating MICs that are likely to be found in *E. coli* for ciprofloxacin: + +```{r, results = 'markup', message = FALSE, warning = FALSE} +# this will generate MICs that are likely to be found in E. coli for ciprofloxacin: +mic_values_specific <- random_mic(size = 100, mo = "E. coli", ab = "cipro") +``` + +For the `plot()` and `ggplot()` function, we can define the microorganism and an antimicrobial agent the same way. This will add the interpretation of those values according to a chosen guidelines (defaults to the latest EUCAST guideline). + +Default colours are colour-blind friendly, while maintaining the convention that e.g. 'susceptible' should be green and 'resistant' should be red: + +```{r, message = FALSE, warning = FALSE} +# base R: +plot(mic_values_specific, mo = "E. coli", ab = "cipro") +# ggplot2: +ggplot(mic_values_specific, mo = "E. coli", ab = "cipro") +``` + +For disk diffusion values, there is not much of a difference in plotting: + +```{r, results = 'markup'} +# this will generate disks that are likely to be found in E. coli for ciprofloxacin: +disk_values_specific <- random_disk(size = 100, mo = "E. coli", ab = "cipro") +disk_values_specific +``` + +```{r, message = FALSE, warning = FALSE} +# base R: +plot(disk_values_specific, mo = "E. coli", ab = "cipro") +``` + +And when using the `ggplot2` package, but now choosing the latest implemented CLSI guideline (notice that the EUCAST-specific term "Incr. exposure" has changed to "Intermediate"): + +```{r, message = FALSE, warning = FALSE} +# and ggplot2, but now choosing an old CLSI guideline: +ggplot(disk_values_specific, + mo = "E. coli", + ab = "cipro", + guideline = "CLSI") +``` + ## Independence test The next example uses the `example_isolates` data set. This is a data set included with this package and contains 2,000 microbial isolates with their full antibiograms. It reflects reality and can be used to practice AMR data analysis.