diff --git a/.Rbuildignore b/.Rbuildignore index 3c2145c5..e63696ad 100755 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -22,7 +22,6 @@ ^data-raw$ ^\.lintr$ ^tests/testthat/_snaps$ -^vignettes/AMR.Rmd$ ^vignettes/benchmarks.Rmd$ ^vignettes/EUCAST.Rmd$ ^vignettes/PCA.Rmd$ diff --git a/DESCRIPTION b/DESCRIPTION index eb573d2a..05f2dc3a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: AMR -Version: 1.5.0.9025 +Version: 1.5.0.9026 Date: 2021-02-25 Title: Antimicrobial Resistance Data Analysis Authors@R: c( diff --git a/NAMESPACE b/NAMESPACE index a120bd83..60de0112 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -243,6 +243,7 @@ export(theme_rsi) importFrom(graphics,arrows) importFrom(graphics,axis) importFrom(graphics,barplot) +importFrom(graphics,legend) importFrom(graphics,mtext) importFrom(graphics,plot) importFrom(graphics,points) diff --git a/NEWS.md b/NEWS.md index 4eb68a0e..7aaf7e1f 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# AMR 1.5.0.9025 +# AMR 1.5.0.9026 ## Last updated: 25 February 2021 ### New @@ -63,6 +63,7 @@ * The `like()` function (and its fast alias `%like%`) now always use Perl compatibility, improving speed for many functions in this package (e.g., `as.mo()` is now up to 4 times faster) * *Staphylococcus cornubiensis* is now correctly categorised as coagulase-positive * `random_disk()` and `random_mic()` now have an expanded range in their randomisation +* Support for GISA (glycopeptide-intermediate *S. aureus*), so e.g. `mo_genus("GISA")` will return `"Staphylococcus"` ### Other * Big documentation updates diff --git a/R/ggplot_rsi.R b/R/ggplot_rsi.R index 7206f3ea..5de41893 100755 --- a/R/ggplot_rsi.R +++ b/R/ggplot_rsi.R @@ -36,7 +36,7 @@ #' @param facet variable to split plots by, either `"interpretation"` (default) or `"antibiotic"` or a grouping variable #' @inheritParams proportion #' @param nrow (when using `facet`) number of rows -#' @param colours a named vector with colours for the bars. The names must be one or more of: S, SI, I, IR, R or be `FALSE` for standard [ggplot2][ggplot2::ggplot()] colours. The default colours are colour-blind friendly. +#' @param colours a named vector with colours for the bars. The names must be one or more of: S, SI, I, IR, R or be `FALSE` for standard [ggplot2][ggplot2::ggplot()] colours. The default colours are colour-blind friendly, while maintaining the convention that e.g. 'susceptible' should be green and 'resistant' should be red. #' @param aesthetics aesthetics to apply the colours to, defaults to "fill" but can also be "colour" or "both" #' @param datalabels show datalabels using [labels_rsi_count()] #' @param datalabels.size size of the datalabels @@ -119,11 +119,6 @@ #' CIP) %>% #' ggplot_rsi(x = "age_group") #' -#' # for colourblind mode, use divergent colours from the viridis package: -#' example_isolates %>% -#' select(AMX, NIT, FOS, TMP, CIP) %>% -#' ggplot_rsi() + -#' scale_fill_viridis_d() #' # a shorter version which also adjusts data label colours: #' example_isolates %>% #' select(AMX, NIT, FOS, TMP, CIP) %>% @@ -155,11 +150,11 @@ ggplot_rsi <- function(data, minimum = 30, language = get_locale(), nrow = NULL, - colours = c(S = "#61a8ff", - SI = "#61a8ff", - I = "#61f7ff", - IR = "#ff6961", - R = "#ff6961"), + colours = c(S = "#3CAEA3", + SI = "#3CAEA3", + I = "#F6D55C", + IR = "#ED553B", + R = "#ED553B"), datalabels = TRUE, datalabels.size = 2.5, datalabels.colour = "grey15", @@ -309,17 +304,19 @@ geom_rsi <- function(position = NULL, x <- "interpretation" } - ggplot2::layer(geom = "bar", stat = "identity", position = position, - mapping = ggplot2::aes_string(x = x, y = y, fill = fill), - params = list(...), data = function(x) { - rsi_df(data = x, - translate_ab = translate_ab, - language = language, - minimum = minimum, - combine_SI = combine_SI, - combine_IR = combine_IR) - }) - + ggplot2::geom_col( + data = function(x) { + rsi_df(data = x, + translate_ab = translate_ab, + language = language, + minimum = minimum, + combine_SI = combine_SI, + combine_IR = combine_IR) + }, + mapping = ggplot2::aes_string(x = x, y = y, fill = fill), + position = position, + ... + ) } #' @rdname ggplot_rsi diff --git a/R/globals.R b/R/globals.R index 557a7a69..5e1b96c0 100755 --- a/R/globals.R +++ b/R/globals.R @@ -33,7 +33,10 @@ globalVariables(c(".rowid", "atc_group1", "atc_group2", "code", + "cols", + "count", "data", + "disk", "dosage", "dose", "dose_times", @@ -52,6 +55,7 @@ globalVariables(c(".rowid", "language", "lookup", "method", + "mic ", "microorganism", "microorganisms", "microorganisms.codes", @@ -67,8 +71,8 @@ globalVariables(c(".rowid", "reference.rule", "reference.rule_group", "reference.version", - "rsi_translation", "rowid", + "rsi_translation", "rule_group", "rule_name", "se_max", diff --git a/R/mo.R b/R/mo.R index f1c1756b..e060a1c5 100755 --- a/R/mo.R +++ b/R/mo.R @@ -711,8 +711,8 @@ exec_as.mo <- function(x, } # translate known trivial abbreviations to genus + species ---- - if (toupper(x_backup_without_spp[i]) %in% c("MRSA", "MSSA", "VISA", "VRSA", "BORSA") - | x_backup_without_spp[i] %like_case% "(^| )(mrsa|mssa|visa|vrsa|borsa|la-?mrsa|ca-?mrsa)( |$)") { + if (toupper(x_backup_without_spp[i]) %in% c("MRSA", "MSSA", "VISA", "VRSA", "BORSA", "GISA") + | x_backup_without_spp[i] %like_case% "(^| )(mrsa|mssa|visa|vrsa|borsa|gisa|la-?mrsa|ca-?mrsa)( |$)") { x[i] <- lookup(fullname == "Staphylococcus aureus", uncertainty = -1) next } diff --git a/R/plot.R b/R/plot.R index 0d23358b..39b4fe4d 100644 --- a/R/plot.R +++ b/R/plot.R @@ -28,7 +28,7 @@ #' Functions to plot classes `rsi`, `mic` and `disk`, with support for base R and `ggplot2`. #' @inheritSection lifecycle Stable Lifecycle #' @inheritSection AMR Read more on Our Website! -#' @param x MIC values created with [as.mic()] or disk diffusion values created with [as.disk()] +#' @param x,data MIC values created with [as.mic()] or disk diffusion values created with [as.disk()] #' @param mapping aesthetic mappings to use for [`ggplot()`][ggplot2::ggplot()] #' @param main,title title of the plot #' @param xlab,ylab axis title @@ -37,7 +37,10 @@ #' @param guideline interpretation guideline to use, defaults to the latest included EUCAST guideline, see *Details* #' @param colours_RSI colours to use for filling in the bars, must be a vector of three values (in the order R, S and I). The default colours are colour-blind friendly. #' @param expand logical to indicate whether the range on the x axis should be expanded between the lowest and highest value. For MIC values, intermediate values will be factors of 2 starting from the highest MIC value. For disk diameters, the whole diameter range will be filled. -#' @details For interpreting MIC values as well as disk diffusion diameters, supported guidelines to be used as input for the `guideline` argument are: `r vector_and(AMR::rsi_translation$guideline, quotes = TRUE, reverse = TRUE)`. +#' @details +#' The interpretation of "I" will be named "Increased exposure" for all EUCAST guidelines since 2019, and will be named "Intermediate" in all other cases. +#' +#' For interpreting MIC values as well as disk diffusion diameters, supported guidelines to be used as input for the `guideline` argument are: `r vector_and(AMR::rsi_translation$guideline, quotes = TRUE, reverse = TRUE)`. #' #' Simply using `"CLSI"` or `"EUCAST"` as input will automatically select the latest version of that guideline. #' @name plot @@ -62,7 +65,7 @@ NULL #' @method plot mic -#' @importFrom graphics barplot axis mtext +#' @importFrom graphics barplot axis mtext legend #' @export #' @rdname plot plot.mic <- function(x, @@ -89,13 +92,13 @@ plot.mic <- function(x, x <- plot_prepare_table(x, expand = expand) - cols_sub <- plot_colours_and_sub(x = x, - mo = mo, - ab = ab, - guideline = guideline, - colours_RSI = colours_RSI, - fn = as.mic, - ...) + cols_sub <- plot_colours_subtitle_guideline(x = x, + mo = mo, + ab = ab, + guideline = guideline, + colours_RSI = colours_RSI, + fn = as.mic, + ...) barplot(x, col = cols_sub$cols, @@ -117,7 +120,7 @@ plot.mic <- function(x, legend_col <- colours_RSI[2] } if (colours_RSI[3] %in% cols_sub$cols) { - legend_txt <- c(legend_txt, "Incr. exposure") + legend_txt <- c(legend_txt, plot_name_of_I(cols_sub$guideline)) legend_col <- c(legend_col, colours_RSI[3]) } if (colours_RSI[1] %in% cols_sub$cols) { @@ -194,21 +197,21 @@ ggplot.mic <- function(data, title <- gsub(" +", " ", paste0(title, collapse = " ")) x <- plot_prepare_table(data, expand = expand) - cols_sub <- plot_colours_and_sub(x = x, - mo = mo, - ab = ab, - guideline = guideline, - colours_RSI = colours_RSI, - fn = as.mic, - ...) + cols_sub <- plot_colours_subtitle_guideline(x = x, + mo = mo, + ab = ab, + guideline = guideline, + colours_RSI = colours_RSI, + fn = as.mic, + ...) df <- as.data.frame(x, stringsAsFactors = TRUE) colnames(df) <- c("mic", "count") df$cols <- cols_sub$cols df$cols[df$cols == colours_RSI[1]] <- "Resistant" df$cols[df$cols == colours_RSI[2]] <- "Susceptible" - df$cols[df$cols == colours_RSI[3]] <- "Incr. exposure" + df$cols[df$cols == colours_RSI[3]] <- plot_name_of_I(cols_sub$guideline) df$cols <- factor(df$cols, - levels = c("Susceptible", "Incr. exposure", "Resistant"), + levels = c("Susceptible", plot_name_of_I(cols_sub$guideline), "Resistant"), ordered = TRUE) if (!is.null(mapping)) { p <- ggplot2::ggplot(df, mapping = mapping) @@ -218,10 +221,11 @@ ggplot.mic <- function(data, if (any(colours_RSI %in% cols_sub$cols)) { p <- p + - ggplot2::geom_col(aes(x = mic, y = count, fill = cols)) + + ggplot2::geom_col(ggplot2::aes(x = mic, y = count, fill = cols)) + ggplot2::scale_fill_manual(values = c("Resistant" = colours_RSI[1], "Susceptible" = colours_RSI[2], - "Incr. exposure" = colours_RSI[3]),, + "Incr. exposure" = colours_RSI[3], + "Intermediate" = colours_RSI[3]), name = NULL) } else { p <- p + @@ -235,7 +239,7 @@ ggplot.mic <- function(data, #' @method plot disk #' @export -#' @importFrom graphics barplot axis mtext +#' @importFrom graphics barplot axis mtext legend #' @rdname plot plot.disk <- function(x, main = paste("Disk zones values of", deparse(substitute(x))), @@ -261,13 +265,13 @@ plot.disk <- function(x, x <- plot_prepare_table(x, expand = expand) - cols_sub <- plot_colours_and_sub(x = x, - mo = mo, - ab = ab, - guideline = guideline, - colours_RSI = colours_RSI, - fn = as.disk, - ...) + cols_sub <- plot_colours_subtitle_guideline(x = x, + mo = mo, + ab = ab, + guideline = guideline, + colours_RSI = colours_RSI, + fn = as.disk, + ...) barplot(x, col = cols_sub$cols, @@ -289,7 +293,7 @@ plot.disk <- function(x, legend_col <- colours_RSI[1] } if (colours_RSI[3] %in% cols_sub$cols) { - legend_txt <- c(legend_txt, "Incr. exposure") + legend_txt <- c(legend_txt, plot_name_of_I(cols_sub$guideline)) legend_col <- c(legend_col, colours_RSI[3]) } if (colours_RSI[2] %in% cols_sub$cols) { @@ -367,21 +371,21 @@ ggplot.disk <- function(data, title <- gsub(" +", " ", paste0(title, collapse = " ")) x <- plot_prepare_table(data, expand = expand) - cols_sub <- plot_colours_and_sub(x = x, - mo = mo, - ab = ab, - guideline = guideline, - colours_RSI = colours_RSI, - fn = as.disk, - ...) + cols_sub <- plot_colours_subtitle_guideline(x = x, + mo = mo, + ab = ab, + guideline = guideline, + colours_RSI = colours_RSI, + fn = as.disk, + ...) df <- as.data.frame(x, stringsAsFactors = TRUE) colnames(df) <- c("disk", "count") df$cols <- cols_sub$cols df$cols[df$cols == colours_RSI[1]] <- "Resistant" df$cols[df$cols == colours_RSI[2]] <- "Susceptible" - df$cols[df$cols == colours_RSI[3]] <- "Incr. exposure" + df$cols[df$cols == colours_RSI[3]] <- plot_name_of_I(cols_sub$guideline) df$cols <- factor(df$cols, - levels = c("Resistant", "Incr. exposure", "Susceptible"), + levels = c("Resistant", plot_name_of_I(cols_sub$guideline), "Susceptible"), ordered = TRUE) if (!is.null(mapping)) { p <- ggplot2::ggplot(df, mapping = mapping) @@ -394,7 +398,8 @@ ggplot.disk <- function(data, ggplot2::geom_col(aes(x = disk, y = count, fill = cols)) + ggplot2::scale_fill_manual(values = c("Resistant" = colours_RSI[1], "Susceptible" = colours_RSI[2], - "Incr. exposure" = colours_RSI[3]), + "Incr. exposure" = colours_RSI[3], + "Intermediate" = colours_RSI[3]), name = NULL) } else { p <- p + @@ -402,7 +407,7 @@ ggplot.disk <- function(data, } p + - ggplot2::labs(title = title, x = xlab, y = ylab, sub = cols_sub$sub) + ggplot2::labs(title = title, x = xlab, y = ylab, subtitle = cols_sub$sub) } plot_prepare_table <- function(x, expand) { @@ -413,7 +418,9 @@ plot_prepare_table <- function(x, expand) { while (min(extra_range) / 2 > min(as.double(x))) { extra_range <- c(min(extra_range) / 2, extra_range) } - extra_range <- setNames(rep(0, length(extra_range)), extra_range) + nms <- extra_range + extra_range <- rep(0, length(extra_range)) + names(extra_range) <- nms x <- table(droplevels(x, as.mic = FALSE)) extra_range <- extra_range[!names(extra_range) %in% names(x)] x <- as.table(c(x, extra_range)) @@ -437,12 +444,22 @@ plot_prepare_table <- function(x, expand) { as.table(x) } -plot_colours_and_sub <- function(x, mo, ab, guideline, colours_RSI, fn, ...) { +plot_name_of_I <- function(guideline) { + if (!guideline %like% "CLSI" && as.double(gsub("[^0-9]+", "", guideline)) >= 2019) { + # interpretation since 2019 + "Incr. exposure" + } else { + # interpretation until 2019 + "Intermediate" + } +} + +plot_colours_subtitle_guideline <- function(x, mo, ab, guideline, colours_RSI, fn, ...) { + guideline <- get_guideline(guideline, AMR::rsi_translation) if (!is.null(mo) && !is.null(ab)) { # interpret and give colour based on MIC values mo <- as.mo(mo) ab <- as.ab(ab) - guideline <- get_guideline(guideline, AMR::rsi_translation) rsi <- suppressWarnings(suppressMessages(as.rsi(fn(names(x)), mo = mo, ab = ab, guideline = guideline, ...))) cols <- character(length = length(rsi)) cols[is.na(rsi)] <- "#BEBEBE" @@ -454,16 +471,16 @@ plot_colours_and_sub <- function(x, mo, ab, guideline, colours_RSI, fn, ...) { if (all(cols == "#BEBEBE")) { message_("No ", guideline, " interpretations found for ", ab_name(ab, language = NULL, tolower = TRUE), " in ", moname) - guideline <- "" + guideline_txt <- "" } else { - guideline <- paste0("(following ", guideline, ")") + guideline_txt <- paste0("(following ", guideline, ")") } - sub <- bquote(.(abname)~"in"~italic(.(moname))~.(guideline)) + sub <- bquote(.(abname)~"in"~italic(.(moname))~.(guideline_txt)) } else { cols <- "#BEBEBE" sub <- NULL } - list(cols = cols, sub = sub) + list(cols = cols, sub = sub, guideline = guideline) } diff --git a/data-raw/AMR_latest.tar.gz b/data-raw/AMR_latest.tar.gz index e22f2bc1..4dd2b077 100644 Binary files a/data-raw/AMR_latest.tar.gz and b/data-raw/AMR_latest.tar.gz differ diff --git a/docs/404.html b/docs/404.html index 5b6d4cbb..e711daef 100644 --- a/docs/404.html +++ b/docs/404.html @@ -81,7 +81,7 @@ AMR (for R) - 1.5.0.9025 + 1.5.0.9026 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 2380f47f..f67dc4fb 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -81,7 +81,7 @@ AMR (for R) - 1.5.0.9025 + 1.5.0.9026 diff --git a/docs/articles/AMR.html b/docs/articles/AMR.html index 3b22530a..36c71043 100644 --- a/docs/articles/AMR.html +++ b/docs/articles/AMR.html @@ -39,7 +39,7 @@ AMR (for R) - 1.5.0.9018 + 1.5.0.9026 @@ -193,7 +193,7 @@

How to conduct AMR data analysis

Matthijs S. Berends

-

09 February 2021

+

25 February 2021

Source: vignettes/AMR.Rmd @@ -202,7 +202,7 @@ -

Note: values on this page will change with every website update since they are based on randomly created values and the page was written in R Markdown. However, the methodology remains unchanged. This page was generated on 09 February 2021.

+

Note: values on this page will change with every website update since they are based on randomly created values and the page was written in R Markdown. However, the methodology remains unchanged. This page was generated on 25 February 2021.

Introduction

@@ -233,21 +233,21 @@ -2021-02-09 +2021-02-25 abcd Escherichia coli S S -2021-02-09 +2021-02-25 abcd Escherichia coli S R -2021-02-09 +2021-02-25 efgh Escherichia coli R @@ -304,39 +304,31 @@ "Streptococcus pneumoniae", "Klebsiella pneumoniae")
-
-

-Other variables

-

For completeness, we can also add the hospital where the patients was admitted and we need to define valid antibmicrobial results for our randomisation:

-
-hospitals <- c("Hospital A", "Hospital B", "Hospital C", "Hospital D")
-ab_interpretations <- c("S", "I", "R")
-

Put everything together

-

Using the sample() function, we can randomly select items from all objects we defined earlier. To let our fake data reflect reality a bit, we will also approximately define the probabilities of bacteria and the antibiotic results with the prob parameter.

-
+

Using the sample() function, we can randomly select items from all objects we defined earlier. To let our fake data reflect reality a bit, we will also approximately define the probabilities of bacteria and the antibiotic results, using the random_rsi() function.

+
 sample_size <- 20000
 data <- data.frame(date = sample(dates, size = sample_size, replace = TRUE),
                    patient_id = sample(patients, size = sample_size, replace = TRUE),
-                   hospital = sample(hospitals, size = sample_size, replace = TRUE,
+                   hospital = sample(c("Hospital A",
+                                       "Hospital B",
+                                       "Hospital C",
+                                       "Hospital D"),
+                                     size = sample_size, replace = TRUE,
                                      prob = c(0.30, 0.35, 0.15, 0.20)),
                    bacteria = sample(bacteria, size = sample_size, replace = TRUE,
                                      prob = c(0.50, 0.25, 0.15, 0.10)),
-                   AMX = sample(ab_interpretations, size = sample_size, replace = TRUE,
-                                 prob = c(0.60, 0.05, 0.35)),
-                   AMC = sample(ab_interpretations, size = sample_size, replace = TRUE,
-                                 prob = c(0.75, 0.10, 0.15)),
-                   CIP = sample(ab_interpretations, size = sample_size, replace = TRUE,
-                                 prob = c(0.80, 0.00, 0.20)),
-                   GEN = sample(ab_interpretations, size = sample_size, replace = TRUE,
-                                 prob = c(0.92, 0.00, 0.08)))
+ AMX = random_rsi(sample_size, prob_RSI = c(0.35, 0.60, 0.05)), + AMC = random_rsi(sample_size, prob_RSI = c(0.15, 0.75, 0.10)), + CIP = random_rsi(sample_size, prob_RSI = c(0.20, 0.80, 0.00)), + GEN = random_rsi(sample_size, prob_RSI = c(0.08, 0.92, 0.00)))

Using the left_join() function from the dplyr package, we can ‘map’ the gender to the patient ID using the patients_table object we created earlier:

-
+
 data <- data %>% left_join(patients_table)

The resulting data set contains 20,000 blood culture isolates. With the head() function we can preview the first 6 rows of this data set:

-
+
 head(data)
@@ -352,31 +344,53 @@ - - - + + + - + - - + + - + + + + + + + + + + + + + + + + + + + + + + + - - - + + + @@ -385,35 +399,13 @@ - - - - - - - - - - - - - + + - - - - - - - - - - - @@ -427,7 +419,7 @@ Cleaning the data

We also created a package dedicated to data cleaning and checking, called the cleaner package. It freq() function can be used to create frequency tables.

For example, for the gender variable:

-
+
 data %>% freq(gender)

Frequency table

Class: character
@@ -449,16 +441,16 @@ Longest: 1

- - - - + + + + - - + + @@ -466,23 +458,23 @@ Longest: 1

2014-07-31F7Hospital B2014-07-28B5Hospital A Escherichia coliRS S S S M
2017-11-01W102017-09-25X1 Hospital BEscherichia coliKlebsiella pneumoniaeRI S SF
2017-04-18V9Hospital DKlebsiella pneumoniaeISSSF
2016-06-06Z2Hospital AStaphylococcus aureusRS S S F
2013-06-17T1Hospital D2016-01-08V6Hospital C Klebsiella pneumoniae S SF
2012-12-13Q2Hospital CStreptococcus pneumoniaeRRSSF
2010-01-01O102015-03-14Y2 Hospital B Staphylococcus aureusR S S SF
2010-07-24V5Hospital AStaphylococcus aureusSSR S F
1 M10,33551.68%10,33551.68%10,50252.51%10,50252.51%
2 F9,66548.33%9,49847.49% 20,000 100.00%

So, we can draw at least two conclusions immediately. From a data scientists perspective, the data looks clean: only values M and F. From a researchers perspective: there are slightly more men. Nothing we didn’t already know.

The data is already quite clean, but we still need to transform some variables. The bacteria column now consists of text, and we want to add more variables based on microbial IDs later on. So, we will transform this column to valid IDs. The mutate() function of the dplyr package makes this really easy:

-
+
 data <- data %>%
   mutate(bacteria = as.mo(bacteria))

We also want to transform the antibiotics, because in real life data we don’t know if they are really clean. The as.rsi() function ensures reliability and reproducibility in these kind of variables. The mutate_at() will run the as.rsi() function on defined variables:

-
+
 data <- data %>%
   mutate_at(vars(AMX:GEN), as.rsi)

Finally, we will apply EUCAST rules on our antimicrobial results. In Europe, most medical microbiological laboratories already apply these rules. Our package features their latest insights on intrinsic resistance and exceptional phenotypes. Moreover, the eucast_rules() function can also apply additional rules, like forcing ampicillin = R when amoxicillin/clavulanic acid = R.

Because the amoxicillin (column AMX) and amoxicillin/clavulanic acid (column AMC) in our data were generated randomly, some rows will undoubtedly contain AMX = S and AMC = R, which is technically impossible. The eucast_rules() fixes this:

-
+
 data <- eucast_rules(data, col_mo = "bacteria", rules = "all")

Adding new variables

Now that we have the microbial ID, we can add some taxonomic properties:

-
+
 data <- data %>% 
   mutate(gramstain = mo_gramstain(bacteria),
          genus = mo_genus(bacteria),
@@ -497,25 +489,25 @@ Longest: 1

(…) When preparing a cumulative antibiogram to guide clinical decisions about empirical antimicrobial therapy of initial infections, only the first isolate of a given species per patient, per analysis period (eg, one year) should be included, irrespective of body site, antimicrobial susceptibility profile, or other phenotypical characteristics (eg, biotype). The first isolate is easily identified, and cumulative antimicrobial susceptibility test data prepared using the first isolate are generally comparable to cumulative antimicrobial susceptibility test data calculated by other methods, providing duplicate isolates are excluded.
M39-A4 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition. CLSI, 2014. Chapter 6.4

This AMR package includes this methodology with the first_isolate() function. It adopts the episode of a year (can be changed by user) and it starts counting days after every selected isolate. This new variable can easily be added to our data:

-
+
 data <- data %>% 
   mutate(first = first_isolate())
 # NOTE: Using column 'bacteria' as input for `col_mo`.
 # NOTE: Using column 'date' as input for `col_date`.
 # NOTE: Using column 'patient_id' as input for `col_patient_id`.

So only 28.3% is suitable for resistance analysis! We can now filter on it with the filter() function, also from the dplyr package:

-
+
 data_1st <- data %>% 
   filter(first == TRUE)

For future use, the above two syntaxes can be shortened with the filter_first_isolate() function:

-
+
 data_1st <- data %>% 
   filter_first_isolate()

First weighted isolates

-

We made a slight twist to the CLSI algorithm, to take into account the antimicrobial susceptibility profile. Have a look at all E. coli isolates of patient N1, sorted on date:

+

We made a slight twist to the CLSI algorithm, to take into account the antimicrobial susceptibility profile. Have a look at all E. coli isolates of patient B7, sorted on date:

@@ -531,10 +523,10 @@ Longest: 1

- - + + - + @@ -542,21 +534,21 @@ Longest: 1

- - + + - + - - + + - + @@ -564,41 +556,41 @@ Longest: 1

- - + + - - + + - - + + - - - - - - - - - - - - - + + + + + + + + + + + + + - - + + @@ -608,42 +600,42 @@ Longest: 1

- - + + - + - - + + - + + - - + - - + + - - + +
isolate
12010-02-03N12010-03-16B7 B_ESCHR_COLIIR S S S
22010-04-02N12010-05-03B7 B_ESCHR_COLI S SSR S FALSE
32010-06-23N12010-05-16B7 B_ESCHR_COLISR S S S
42010-06-25N12010-07-25B7 B_ESCHR_COLIRRSS S S FALSE
52010-10-04N12010-07-26B7 B_ESCHR_COLISSSSFALSE
62010-10-11N1B_ESCHR_COLISSRR S R FALSE
62010-10-14B7B_ESCHR_COLISSSSFALSE
72010-10-29N12010-12-24B7 B_ESCHR_COLI R S
82010-11-11N12011-02-27B7 B_ESCHR_COLI S SRS S FALSE
92010-12-06N12011-09-02B7 B_ESCHR_COLIRSS S RSFALSETRUE
102010-12-13N12011-11-08B7 B_ESCHR_COLIRRII S S FALSE
-

Only 1 isolates are marked as ‘first’ according to CLSI guideline. But when reviewing the antibiogram, it is obvious that some isolates are absolutely different strains and should be included too. This is why we weigh isolates, based on their antibiogram. The key_antibiotics() function adds a vector with 18 key antibiotics: 6 broad spectrum ones, 6 small spectrum for Gram negatives and 6 small spectrum for Gram positives. These can be defined by the user.

+

Only 2 isolates are marked as ‘first’ according to CLSI guideline. But when reviewing the antibiogram, it is obvious that some isolates are absolutely different strains and should be included too. This is why we weigh isolates, based on their antibiogram. The key_antibiotics() function adds a vector with 18 key antibiotics: 6 broad spectrum ones, 6 small spectrum for Gram negatives and 6 small spectrum for Gram positives. These can be defined by the user.

If a column exists with a name like ‘key(…)ab’ the first_isolate() function will automatically use it and determine the first weighted isolates. Mind the NOTEs in below output:

-
+
 data <- data %>% 
   mutate(keyab = key_antibiotics()) %>% 
   mutate(first_weighted = first_isolate())
@@ -665,10 +657,10 @@ Longest: 1

1 -2010-02-03 -N1 +2010-03-16 +B7 B_ESCHR_COLI -I +R S S S @@ -677,35 +669,35 @@ Longest: 1

2 -2010-04-02 -N1 +2010-05-03 +B7 B_ESCHR_COLI S S -S +R S FALSE -FALSE +TRUE 3 -2010-06-23 -N1 +2010-05-16 +B7 B_ESCHR_COLI -S +R S S S FALSE -FALSE +TRUE 4 -2010-06-25 -N1 +2010-07-25 +B7 B_ESCHR_COLI -R -R +S +S S S FALSE @@ -713,32 +705,32 @@ Longest: 1

5 -2010-10-04 -N1 +2010-07-26 +B7 B_ESCHR_COLI -S -S -S -S -FALSE -TRUE - - -6 -2010-10-11 -N1 -B_ESCHR_COLI -S -S +R +R S R FALSE TRUE + +6 +2010-10-14 +B7 +B_ESCHR_COLI +S +S +S +S +FALSE +TRUE + 7 -2010-10-29 -N1 +2010-12-24 +B7 B_ESCHR_COLI R S @@ -749,35 +741,35 @@ Longest: 1

8 -2010-11-11 -N1 +2011-02-27 +B7 B_ESCHR_COLI S S -R +S S FALSE TRUE 9 -2010-12-06 -N1 +2011-09-02 +B7 B_ESCHR_COLI -R +S +S S R -S -FALSE +TRUE TRUE 10 -2010-12-13 -N1 +2011-11-08 +B7 B_ESCHR_COLI -R -R +I +I S S FALSE @@ -785,18 +777,18 @@ Longest: 1

-

Instead of 1, now 8 isolates are flagged. In total, 78.2% of all isolates are marked ‘first weighted’ - 50.0% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.

+

Instead of 2, now 10 isolates are flagged. In total, 79.1% of all isolates are marked ‘first weighted’ - 50.8% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.

As with filter_first_isolate(), there’s a shortcut for this new algorithm too:

-
+
 data_1st <- data %>% 
   filter_first_weighted_isolate()
-

So we end up with 15,648 isolates for analysis.

+

So we end up with 15,821 isolates for analysis.

We can remove unneeded columns:

-
+
 data_1st <- data_1st %>% 
   select(-c(first, keyab))

Now our data looks like:

-
+
 head(data_1st)
@@ -833,46 +825,78 @@ Longest: 1

- - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + + - - + + - - - - + + + + - - - + + + - - - + + + - + @@ -882,47 +906,15 @@ Longest: 1

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + - - - + + + @@ -942,15 +934,15 @@ Longest: 1

Dispersion of species

To just get an idea how the species are distributed, create a frequency table with our freq() function. We created the genus and species column earlier based on the microbial ID. With paste(), we can concatenate them together.

The freq() function can be used like the base R language was intended:

-
+
 freq(paste(data_1st$genus, data_1st$species))

Or can be used like the dplyr way, which is easier readable:

-
+
 data_1st %>% freq(genus, species)

Frequency table

Class: character
-Length: 15,648
-Available: 15,648 (100%, NA: 0 = 0%)
+Length: 15,821
+Available: 15,821 (100%, NA: 0 = 0%)
Unique: 4

Shortest: 16
Longest: 24

@@ -967,33 +959,33 @@ Longest: 24

- - - - + + + + - - - - + + + + - - - - + + + + - - - + + + @@ -1003,7 +995,7 @@ Longest: 24

Overview of different bug/drug combinations

If you want to get a quick glance of the number of isolates in different bug/drug combinations, you can use the bug_drug_combinations() function:

-
+
 data_1st %>% 
   bug_drug_combinations() %>% 
   head() # show first 6 rows
@@ -1020,55 +1012,55 @@ Longest: 24

- - - - + + + + - - - - + + + + - + - - + + - + - - + + - - + + - - - - + + + +
42012-12-13Q2Hospital CB_STRPT_PNMN12014-07-28B5Hospital AB_ESCHR_COLISSSSMGram-negativeEscherichiacoliTRUE
22017-09-25X1Hospital BB_KLBSL_PNMN RISSFGram-negativeKlebsiellapneumoniaeTRUE
32017-04-18V9Hospital DB_KLBSL_PNMN R SRSS FGram-positiveStreptococcusGram-negativeKlebsiella pneumoniae TRUE
52010-01-01O10Hospital BB_STPHY_AURS2016-01-08V6Hospital CB_KLBSL_PNMN R S S S FGram-positiveStaphylococcusaureusGram-negativeKlebsiellapneumoniae TRUE
62010-07-24V5Hospital A2015-03-14Y2Hospital B B_STPHY_AURS S SRS S F Gram-positive
72014-07-13G3Hospital BB_ESCHR_COLIRSRRMGram-negativeEscherichiacoliTRUE
82013-07-06K1Hospital DB_ESCHR_COLISSRSMGram-negativeEscherichiacoliTRUE
92011-07-06X4Hospital C2014-12-23G8Hospital A B_STPHY_AURSRR S SFSSM Gram-positive Staphylococcus aureus
1 Escherichia coli7,84650.14%7,84650.14%7,99450.53%7,99450.53%
2 Staphylococcus aureus3,87124.74%11,71774.88%3,92324.80%11,91775.32%
3 Streptococcus pneumoniae2,33114.90%14,04889.78%2,32114.67%14,23889.99%
4 Klebsiella pneumoniae1,60010.22%15,6481,58310.01%15,821 100.00%
E. coli AMX376426338197846381523939407994
E. coli AMC628625513057846628930414017994
E. coli CIP59316086 01915784619087994
E. coli GEN70437158 080378468367994
K. pneumoniae AMX 0 01600160015831583
K. pneumoniae AMC12706426616001224573021583

Using Tidyverse selections, you can also select columns based on the antibiotic class they are in:

-
+
 data_1st %>% 
   select(bacteria, fluoroquinolones()) %>% 
   bug_drug_combinations()
@@ -1086,34 +1078,34 @@ Longest: 24

E. coli CIP -5931 +6086 0 -1915 -7846 +1908 +7994 K. pneumoniae CIP -1214 +1215 0 -386 -1600 +368 +1583 S. aureus CIP -2965 +3062 0 -906 -3871 +861 +3923 S. pneumoniae CIP -1801 +1765 0 -530 -2331 +556 +2321 @@ -1124,11 +1116,11 @@ Longest: 24

Resistance percentages

The functions resistance() and susceptibility() can be used to calculate antimicrobial resistance or susceptibility. For more specific analyses, the functions proportion_S(), proportion_SI(), proportion_I(), proportion_IR() and proportion_R() can be used to determine the proportion of a specific antimicrobial outcome.

As per the EUCAST guideline of 2019, we calculate resistance as the proportion of R (proportion_R(), equal to resistance()) and susceptibility as the proportion of S and I (proportion_SI(), equal to susceptibility()). These functions can be used on their own:

-
+
 data_1st %>% resistance(AMX)
-# [1] 0.5357873
+# [1] 0.5370078

Or can be used in conjuction with group_by() and summarise(), both from the dplyr package:

-
+
 data_1st %>% 
   group_by(hospital) %>% 
   summarise(amoxicillin = resistance(AMX))
@@ -1141,24 +1133,24 @@ Longest: 24

Hospital A -0.5376208 +0.5410321 Hospital B -0.5299270 +0.5381272 Hospital C -0.5439754 +0.5306981 Hospital D -0.5373087 +0.5338134

Of course it would be very convenient to know the number of isolates responsible for the percentages. For that purpose the n_rsi() can be used, which works exactly like n_distinct() from the dplyr package. It counts all isolates available for every group (i.e. values S, I or R):

-
+
 data_1st %>% 
   group_by(hospital) %>% 
   summarise(amoxicillin = resistance(AMX),
@@ -1173,28 +1165,28 @@ Longest: 24

Hospital A -0.5376208 -4758 +0.5410321 +4728 Hospital B -0.5299270 -5480 +0.5381272 +5521 Hospital C -0.5439754 -2274 +0.5306981 +2378 Hospital D -0.5373087 -3136 +0.5338134 +3194

These functions can also be used to get the proportion of multiple antibiotics, to calculate empiric susceptibility of combination therapies very easily:

-
+
 data_1st %>% 
   group_by(genus) %>% 
   summarise(amoxiclav = susceptibility(AMC),
@@ -1211,32 +1203,32 @@ Longest: 24

Escherichia -0.8336732 -0.8976549 -0.9850879 +0.8247436 +0.8954216 +0.9836127 Klebsiella -0.8337500 -0.9050000 -0.9812500 +0.8092230 +0.9001895 +0.9823121 Staphylococcus -0.8235598 -0.9227590 -0.9842418 +0.8179964 +0.9166454 +0.9836860 Streptococcus -0.5456885 +0.5489013 0.0000000 -0.5456885 +0.5489013

To make a transition to the next part, let’s see how this difference could be plotted:

-
+
 data_1st %>% 
   group_by(genus) %>% 
   summarise("1. Amoxi/clav" = susceptibility(AMC),
@@ -1255,7 +1247,7 @@ Longest: 24

Plots

To show results in plots, most R users would nowadays use the ggplot2 package. This package lets you create plots in layers. You can read more about it on their website. A quick example would look like these syntaxes:

-
+
 ggplot(data = a_data_set,
        mapping = aes(x = year,
                      y = value)) +
@@ -1269,13 +1261,13 @@ Longest: 24

ggplot(a_data_set) + geom_bar(aes(year))

The AMR package contains functions to extend this ggplot2 package, for example geom_rsi(). It automatically transforms data with count_df() or proportion_df() and show results in stacked bars. Its simplest and shortest example:

-
+
 ggplot(data_1st) +
   geom_rsi(translate_ab = FALSE)

Omit the translate_ab = FALSE to have the antibiotic codes (AMX, AMC, CIP, GEN) translated to official WHO names (amoxicillin, amoxicillin/clavulanic acid, ciprofloxacin, gentamicin).

If we group on e.g. the genus column and add some additional functions from our package, we can create this:

-
+
 # group the data on `genus`
 ggplot(data_1st %>% group_by(genus)) + 
   # create bars with genus on x axis
@@ -1284,7 +1276,7 @@ Longest: 24

geom_rsi(x = "genus") + # split plots on antibiotic facet_rsi(facet = "antibiotic") + - # set colours to the R/SI interpretations + # set colours to the R/SI interpretations (colour-blind friendly) scale_rsi_colours() + # show percentages on y axis scale_y_percent(breaks = 0:4 * 25) + @@ -1298,7 +1290,7 @@ Longest: 24

theme(axis.text.y = element_text(face = "italic"))

To simplify this, we also created the ggplot_rsi() function, which combines almost all above functions:

-
+
 data_1st %>% 
   group_by(genus) %>%
   ggplot_rsi(x = "genus",
@@ -1307,13 +1299,79 @@ Longest: 24

datalabels = FALSE) + coord_flip()

+
+

+Plotting MIC and disk diffusion values

+

The AMR package also extends the plot() and ggplot() functions for plotting minimum inhibitory concentrations (MIC, created with as.mic()) and disk diffusion diameters (created with as.disk()).

+

With the random_mic() and random_disk() functions, we can generate sampled values for the new data types (S3 classes) <mic> and <disk>:

+
+mic_values <- random_mic(size = 100)
+mic_values
+# Class <mic>
+#   [1] 128    64     0.5    8      64     0.125  16     64     8      2     
+#  [11] 0.0625 8      1      4      0.25   0.5    0.5    32     1      1     
+#  [21] 0.25   8      8      0.0625 16     16     2      64     0.125  8     
+#  [31] 1      0.125  4      0.125  32     16     0.25   1      0.5    128   
+#  [41] 32     128    64     1      0.125  32     0.125  0.125  8      0.125 
+#  [51] 64     8      8      0.0625 0.25   0.5    2      2      128    0.25  
+#  [61] 4      1      1      0.25   64     0.5    32     0.25   16     2     
+#  [71] 16     8      32     4      0.25   16     128    32     128    4     
+#  [81] 64     1      0.25   0.5    1      1      4      8      16     16    
+#  [91] 32     0.5    1      0.125  2      128    0.5    1      4      64
+
+# base R:
+plot(mic_values)
+

+
+# ggplot2:
+ggplot(mic_values)
+

+

But we could also be more specific, by generating MICs that are likely to be found in E. coli for ciprofloxacin:

+
+# this will generate MICs that are likely to be found in E. coli for ciprofloxacin:
+mic_values_specific <- random_mic(size = 100, mo = "E. coli", ab = "cipro")
+

For the plot() and ggplot() function, we can define the microorganism and an antimicrobial agent the same way. This will add the interpretation of those values according to a chosen guidelines (defaults to the latest EUCAST guideline).

+

Default colours are colour-blind friendly, while maintaining the convention that e.g. ‘susceptible’ should be green and ‘resistant’ should be red:

+
+# base R:
+plot(mic_values_specific, mo = "E. coli", ab = "cipro")
+

+
+# ggplot2:
+ggplot(mic_values_specific, mo = "E. coli", ab = "cipro")
+

+

For disk diffusion values, there is not much of a difference in plotting:

+
+# this will generate disks that are likely to be found in E. coli for ciprofloxacin:
+disk_values_specific <- random_disk(size = 100, mo = "E. coli", ab = "cipro")
+# NOTE: Translation to one microorganism was guessed with uncertainty. Use
+#       `mo_uncertainties()` to review it.
+disk_values_specific
+# Class <disk>
+#   [1] 20 29 20 26 27 22 18 21 29 23 18 19 29 26 27 24 20 20 23 20 18 25 31 31 29
+#  [26] 28 27 31 18 21 18 26 30 20 21 27 24 27 19 24 17 26 24 31 30 28 19 30 19 21
+#  [51] 17 19 22 19 23 23 19 20 28 31 31 30 20 17 21 31 29 29 27 21 29 27 26 18 21
+#  [76] 29 21 26 29 22 24 18 22 29 30 17 19 23 26 31 17 18 18 28 30 25 22 21 27 17
+
+# base R:
+plot(disk_values_specific, mo = "E. coli", ab = "cipro")
+

+

And when using the ggplot2 package, but now choosing the latest implemented CLSI guideline (notice that the EUCAST-specific term “Incr. exposure” has changed to “Intermediate”):

+
+# and ggplot2, but now choosing an old CLSI guideline:
+ggplot(disk_values_specific,
+       mo = "E. coli",
+       ab = "cipro",
+       guideline = "CLSI")
+

+

Independence test

The next example uses the example_isolates data set. This is a data set included with this package and contains 2,000 microbial isolates with their full antibiograms. It reflects reality and can be used to practice AMR data analysis.

We will compare the resistance to fosfomycin (column FOS) in hospital A and D. The input for the fisher.test() can be retrieved with a transformation like this:

-
+
 # use package 'tidyr' to pivot data:
 library(tidyr)
 
@@ -1332,7 +1390,7 @@ Longest: 24

# [1,] 25 77 # [2,] 24 33

We can apply the test now with:

-
+
 # do Fisher's Exact Test
 fisher.test(check_FOS)                            
 # 
diff --git a/docs/articles/AMR_files/figure-html/plot 1-1.png b/docs/articles/AMR_files/figure-html/plot 1-1.png
index fac59d74..29ba2535 100644
Binary files a/docs/articles/AMR_files/figure-html/plot 1-1.png and b/docs/articles/AMR_files/figure-html/plot 1-1.png differ
diff --git a/docs/articles/AMR_files/figure-html/plot 3-1.png b/docs/articles/AMR_files/figure-html/plot 3-1.png
index 1078fd51..ab08612c 100644
Binary files a/docs/articles/AMR_files/figure-html/plot 3-1.png and b/docs/articles/AMR_files/figure-html/plot 3-1.png differ
diff --git a/docs/articles/AMR_files/figure-html/plot 4-1.png b/docs/articles/AMR_files/figure-html/plot 4-1.png
index c096fea4..76f329c0 100644
Binary files a/docs/articles/AMR_files/figure-html/plot 4-1.png and b/docs/articles/AMR_files/figure-html/plot 4-1.png differ
diff --git a/docs/articles/AMR_files/figure-html/plot 5-1.png b/docs/articles/AMR_files/figure-html/plot 5-1.png
index 8134a60a..83c570d7 100644
Binary files a/docs/articles/AMR_files/figure-html/plot 5-1.png and b/docs/articles/AMR_files/figure-html/plot 5-1.png differ
diff --git a/docs/articles/AMR_files/figure-html/unnamed-chunk-13-1.png b/docs/articles/AMR_files/figure-html/unnamed-chunk-13-1.png
new file mode 100644
index 00000000..7f506b40
Binary files /dev/null and b/docs/articles/AMR_files/figure-html/unnamed-chunk-13-1.png differ
diff --git a/docs/articles/AMR_files/figure-html/unnamed-chunk-13-2.png b/docs/articles/AMR_files/figure-html/unnamed-chunk-13-2.png
new file mode 100644
index 00000000..61f95564
Binary files /dev/null and b/docs/articles/AMR_files/figure-html/unnamed-chunk-13-2.png differ
diff --git a/docs/articles/AMR_files/figure-html/unnamed-chunk-15-1.png b/docs/articles/AMR_files/figure-html/unnamed-chunk-15-1.png
new file mode 100644
index 00000000..a7c32bcd
Binary files /dev/null and b/docs/articles/AMR_files/figure-html/unnamed-chunk-15-1.png differ
diff --git a/docs/articles/AMR_files/figure-html/unnamed-chunk-15-2.png b/docs/articles/AMR_files/figure-html/unnamed-chunk-15-2.png
new file mode 100644
index 00000000..7c46433a
Binary files /dev/null and b/docs/articles/AMR_files/figure-html/unnamed-chunk-15-2.png differ
diff --git a/docs/articles/AMR_files/figure-html/unnamed-chunk-17-1.png b/docs/articles/AMR_files/figure-html/unnamed-chunk-17-1.png
new file mode 100644
index 00000000..1d0c5d57
Binary files /dev/null and b/docs/articles/AMR_files/figure-html/unnamed-chunk-17-1.png differ
diff --git a/docs/articles/AMR_files/figure-html/unnamed-chunk-18-1.png b/docs/articles/AMR_files/figure-html/unnamed-chunk-18-1.png
new file mode 100644
index 00000000..6cd717bc
Binary files /dev/null and b/docs/articles/AMR_files/figure-html/unnamed-chunk-18-1.png differ
diff --git a/docs/articles/index.html b/docs/articles/index.html
index f038a464..af4e1b72 100644
--- a/docs/articles/index.html
+++ b/docs/articles/index.html
@@ -81,7 +81,7 @@
       
       
         AMR (for R)
-        1.5.0.9025
+        1.5.0.9026
       
     
diff --git a/docs/authors.html b/docs/authors.html index 5cb562e5..3e35632f 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -81,7 +81,7 @@ AMR (for R) - 1.5.0.9025 + 1.5.0.9026
diff --git a/docs/extra.css b/docs/extra.css index 0ebc198f..a0e2a435 100644 --- a/docs/extra.css +++ b/docs/extra.css @@ -187,30 +187,30 @@ div[id^=last-updated] h2 { } /* tables, make them look like scientific ones */ -table { +.table { font-size: 90%; } -table * { +.table * { vertical-align: middle !important; } -table td { +.table td { padding: 4px !important; } -thead { +.table thead { border-top: 2px solid black; border-bottom: 2px solid black; } -thead ~ tbody { +.table thead ~ tbody { /* only when it has a header */ border-bottom: 2px solid black; } -thead th { +.table thead th { text-align: inherit; } -table a:not(.btn), .table a:not(.btn) { +.table a:not(.btn) { text-decoration: inherit; } -table a:not(.btn):hover, .table a:not(.btn):hover { +.table a:not(.btn):hover { text-decoration: underline; } diff --git a/docs/index.html b/docs/index.html index b1bbcd3e..43bfb2c4 100644 --- a/docs/index.html +++ b/docs/index.html @@ -43,7 +43,7 @@ AMR (for R) - 1.5.0.9025 + 1.5.0.9026
diff --git a/docs/news/index.html b/docs/news/index.html index ee54c82c..eaa4cff7 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.5.0.9025 + 1.5.0.9026
@@ -236,9 +236,9 @@ Source: NEWS.md
-
-

-AMR 1.5.0.9025 Unreleased +
+

+AMR 1.5.0.9026 Unreleased

@@ -333,6 +333,8 @@ Staphylococcus cornubiensis is now correctly categorised as coagulase-positive
  • random_disk() and random_mic() now have an expanded range in their randomisation
  • +
  • Support for GISA (glycopeptide-intermediate S. aureus), so e.g. mo_genus("GISA") will return "Staphylococcus" +
  • @@ -659,7 +661,7 @@

    Making this package independent of especially the tidyverse (e.g. packages dplyr and tidyr) tremendously increases sustainability on the long term, since tidyverse functions change quite often. Good for users, but hard for package maintainers. Most of our functions are replaced with versions that only rely on base R, which keeps this package fully functional for many years to come, without requiring a lot of maintenance to keep up with other packages anymore. Another upside it that this package can now be used with all versions of R since R-3.0.0 (April 2013). Our package is being used in settings where the resources are very limited. Fewer dependencies on newer software is helpful for such settings.

    Negative effects of this change are:

      -
    • Function freq() that was borrowed from the cleaner package was removed. Use cleaner::freq(), or run library("cleaner") before you use freq().
    • +
    • Function freq() that was borrowed from the cleaner package was removed. Use cleaner::freq(), or run library("cleaner") before you use freq().
    • Printing values of class mo or rsi in a tibble will no longer be in colour and printing rsi in a tibble will show the class <ord>, not <rsi> anymore. This is purely a visual effect.
    • All functions from the mo_* family (like mo_name() and mo_gramstain()) are noticeably slower when running on hundreds of thousands of rows.
    • For developers: classes mo and ab now both also inherit class character, to support any data transformation. This change invalidates code that checks for class length == 1.
    • @@ -996,7 +998,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/ #> invalid microorganism code, NA generated

    This is important, because a value like "testvalue" could never be understood by e.g. mo_name(), although the class would suggest a valid microbial code.

    -
  • Function freq() has moved to a new package, clean (CRAN link), since creating frequency tables actually does not fit the scope of this package. The freq() function still works, since it is re-exported from the clean package (which will be installed automatically upon updating this AMR package).

  • +
  • Function freq() has moved to a new package, clean (CRAN link), since creating frequency tables actually does not fit the scope of this package. The freq() function still works, since it is re-exported from the clean package (which will be installed automatically upon updating this AMR package).

  • Renamed data set septic_patients to example_isolates

  • @@ -1265,7 +1267,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
  • The age() function gained a new argument exact to determine ages with decimals
  • Removed deprecated functions guess_mo(), guess_atc(), EUCAST_rules(), interpretive_reading(), rsi()
  • -
  • Frequency tables (freq()): +
  • Frequency tables (freq()):
    • speed improvement for microbial IDs

    • fixed factor level names for R Markdown

    • @@ -1275,12 +1277,12 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
       
       septic_patients %>% 
      -  freq(age) %>% 
      +  freq(age) %>% 
         boxplot()
       # grouped boxplots:
       septic_patients %>% 
         group_by(hospital_id) %>% 
      -  freq(age) %>%
      +  freq(age) %>%
         boxplot()
    @@ -1290,7 +1292,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
  • Added ceftazidim intrinsic resistance to Streptococci
  • Changed default settings for age_groups(), to let groups of fives and tens end with 100+ instead of 120+
  • -
  • Fix for freq() for when all values are NA +
  • Fix for freq() for when all values are NA
  • Fix for first_isolate() for when dates are missing
  • Improved speed of guess_ab_col() @@ -1531,7 +1533,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
  • -
  • Frequency tables (freq() function): +
  • Frequency tables (freq() function):
    • Support for tidyverse quasiquotation! Now you can create frequency tables of function outcomes:

      @@ -1541,15 +1543,15 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/ # OLD WAY septic_patients %>% mutate(genus = mo_genus(mo)) %>% - freq(genus) + freq(genus) # NEW WAY septic_patients %>% - freq(mo_genus(mo)) + freq(mo_genus(mo)) # Even supports grouping variables: septic_patients %>% group_by(gender) %>% - freq(mo_genus(mo))
  • + freq(mo_genus(mo))
  • Header info is now available as a list, with the header function

  • The argument header is now set to TRUE at default, even for markdown

  • @@ -1632,7 +1634,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
  • Using portion_* functions now throws a warning when total available isolate is below argument minimum

  • Functions as.mo, as.rsi, as.mic, as.atc and freq will not set package name as attribute anymore

  • -

    Frequency tables - freq():

    +

    Frequency tables - freq():

    • Support for grouping variables, test with:

      @@ -1640,14 +1642,14 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/ septic_patients %>% group_by(hospital_id) %>% - freq(gender)
  • + freq(gender)
  • Support for (un)selecting columns:

     
     septic_patients %>% 
    -  freq(hospital_id) %>% 
    +  freq(hospital_id) %>% 
       select(-count, -cum_count) # only get item, percent, cum_percent
  • Check for hms::is.hms

  • @@ -1665,7 +1667,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
  • Removed diacritics from all authors (columns microorganisms$ref and microorganisms.old$ref) to comply with CRAN policy to only allow ASCII characters

  • Fix for mo_property not working properly

  • Fix for eucast_rules where some Streptococci would become ceftazidime R in EUCAST rule 4.5

  • -
  • Support for named vectors of class mo, useful for top_freq()

  • +
  • Support for named vectors of class mo, useful for top_freq()

  • ggplot_rsi and scale_y_percent have breaks argument

  • AI improvements for as.mo:

    @@ -1833,13 +1835,13 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
     
     my_matrix = with(septic_patients, matrix(c(age, gender), ncol = 2))
    -freq(my_matrix)
    +freq(my_matrix)
  • For lists, subsetting is possible:

     
     my_list = list(age = septic_patients$age, gender = septic_patients$gender)
    -my_list %>% freq(age)
    -my_list %>% freq(gender)
    +my_list %>% freq(age) +my_list %>% freq(gender)
    @@ -1913,13 +1915,13 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
    • A vignette to explain its usage
    • Support for rsi (antimicrobial resistance) to use as input
    • -
    • Support for table to use as input: freq(table(x, y)) +
    • Support for table to use as input: freq(table(x, y))
    • Support for existing functions hist and plot to use a frequency table as input: hist(freq(df$age))
    • Support for as.vector, as.data.frame, as_tibble and format
    • -
    • Support for quasiquotation: freq(mydata, mycolumn) is the same as mydata %>% freq(mycolumn) +
    • Support for quasiquotation: freq(mydata, mycolumn) is the same as mydata %>% freq(mycolumn)
    • Function top_freq function to return the top/below n items as vector
    • Header of frequency tables now also show Mean Absolute Deviaton (MAD) and Interquartile Range (IQR)
    • diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 77c9a852..6e8c2411 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -12,7 +12,7 @@ articles: datasets: datasets.html resistance_predict: resistance_predict.html welcome_to_AMR: welcome_to_AMR.html -last_built: 2021-02-25T09:21Z +last_built: 2021-02-25T11:30Z urls: reference: https://msberends.github.io/AMR//reference article: https://msberends.github.io/AMR//articles diff --git a/docs/reference/ggplot_rsi.html b/docs/reference/ggplot_rsi.html index 88a8daa4..22a4efab 100644 --- a/docs/reference/ggplot_rsi.html +++ b/docs/reference/ggplot_rsi.html @@ -82,7 +82,7 @@ AMR (for R) - 1.5.0.9025 + 1.5.0.9026
    @@ -256,8 +256,8 @@ minimum = 30, language = get_locale(), nrow = NULL, - colours = c(S = "#61a8ff", SI = "#61a8ff", I = "#61f7ff", IR = "#ff6961", R = - "#ff6961"), + colours = c(S = "#3CAEA3", SI = "#3CAEA3", I = "#F6D55C", IR = "#ED553B", R = + "#ED553B"), datalabels = TRUE, datalabels.size = 2.5, datalabels.colour = "grey15", @@ -362,7 +362,7 @@ colours -

    a named vector with colours for the bars. The names must be one or more of: S, SI, I, IR, R or be FALSE for standard ggplot2 colours. The default colours are colour-blind friendly.

    +

    a named vector with colours for the bars. The names must be one or more of: S, SI, I, IR, R or be FALSE for standard ggplot2 colours. The default colours are colour-blind friendly, while maintaining the convention that e.g. 'susceptible' should be green and 'resistant' should be red.

    datalabels @@ -484,11 +484,6 @@ The lifecycle of this function is maturing< CIP) %>% ggplot_rsi(x = "age_group") -# for colourblind mode, use divergent colours from the viridis package: -example_isolates %>% - select(AMX, NIT, FOS, TMP, CIP) %>% - ggplot_rsi() + - scale_fill_viridis_d() # a shorter version which also adjusts data label colours: example_isolates %>% select(AMX, NIT, FOS, TMP, CIP) %>% diff --git a/docs/reference/index.html b/docs/reference/index.html index 973a9272..1009a227 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.5.0.9025 + 1.5.0.9026
    diff --git a/docs/reference/plot.html b/docs/reference/plot.html index b7a1516b..f5a0f234 100644 --- a/docs/reference/plot.html +++ b/docs/reference/plot.html @@ -82,7 +82,7 @@ AMR (for R) - 1.5.0.9025 + 1.5.0.9026
    @@ -313,7 +313,7 @@ - + @@ -359,7 +359,8 @@

    The ggplot functions return a ggplot model that is extendible with any ggplot2 function.

    Details

    -

    For interpreting MIC values as well as disk diffusion diameters, supported guidelines to be used as input for the guideline argument are: "EUCAST 2021", "EUCAST 2020", "EUCAST 2019", "EUCAST 2018", "EUCAST 2017", "EUCAST 2016", "EUCAST 2015", "EUCAST 2014", "EUCAST 2013", "EUCAST 2012", "EUCAST 2011", "CLSI 2019", "CLSI 2018", "CLSI 2017", "CLSI 2016", "CLSI 2015", "CLSI 2014", "CLSI 2013", "CLSI 2012", "CLSI 2011" and "CLSI 2010".

    +

    The interpretation of "I" will be named "Increased exposure" for all EUCAST guidelines since 2019, and will be named "Intermediate" in all other cases.

    +

    For interpreting MIC values as well as disk diffusion diameters, supported guidelines to be used as input for the guideline argument are: "EUCAST 2021", "EUCAST 2020", "EUCAST 2019", "EUCAST 2018", "EUCAST 2017", "EUCAST 2016", "EUCAST 2015", "EUCAST 2014", "EUCAST 2013", "EUCAST 2012", "EUCAST 2011", "CLSI 2019", "CLSI 2018", "CLSI 2017", "CLSI 2016", "CLSI 2015", "CLSI 2014", "CLSI 2013", "CLSI 2012", "CLSI 2011" and "CLSI 2010".

    Simply using "CLSI" or "EUCAST" as input will automatically select the latest version of that guideline.

    Stable Lifecycle

    diff --git a/docs/survey.html b/docs/survey.html index 5ec1ebb2..87e4af3a 100644 --- a/docs/survey.html +++ b/docs/survey.html @@ -81,7 +81,7 @@ AMR (for R) - 1.5.0.9025 + 1.5.0.9026 diff --git a/man/ggplot_rsi.Rd b/man/ggplot_rsi.Rd index 4eb27701..1cc70659 100644 --- a/man/ggplot_rsi.Rd +++ b/man/ggplot_rsi.Rd @@ -24,8 +24,8 @@ ggplot_rsi( minimum = 30, language = get_locale(), nrow = NULL, - colours = c(S = "#61a8ff", SI = "#61a8ff", I = "#61f7ff", IR = "#ff6961", R = - "#ff6961"), + colours = c(S = "#3CAEA3", SI = "#3CAEA3", I = "#F6D55C", IR = "#ED553B", R = + "#ED553B"), datalabels = TRUE, datalabels.size = 2.5, datalabels.colour = "grey15", @@ -100,7 +100,7 @@ labels_rsi_count( \item{nrow}{(when using \code{facet}) number of rows} -\item{colours}{a named vector with colours for the bars. The names must be one or more of: S, SI, I, IR, R or be \code{FALSE} for standard \link[ggplot2:ggplot]{ggplot2} colours. The default colours are colour-blind friendly.} +\item{colours}{a named vector with colours for the bars. The names must be one or more of: S, SI, I, IR, R or be \code{FALSE} for standard \link[ggplot2:ggplot]{ggplot2} colours. The default colours are colour-blind friendly, while maintaining the convention that e.g. 'susceptible' should be green and 'resistant' should be red.} \item{datalabels}{show datalabels using \code{\link[=labels_rsi_count]{labels_rsi_count()}}} @@ -208,11 +208,6 @@ example_isolates \%>\% CIP) \%>\% ggplot_rsi(x = "age_group") -# for colourblind mode, use divergent colours from the viridis package: -example_isolates \%>\% - select(AMX, NIT, FOS, TMP, CIP) \%>\% - ggplot_rsi() + - scale_fill_viridis_d() # a shorter version which also adjusts data label colours: example_isolates \%>\% select(AMX, NIT, FOS, TMP, CIP) \%>\% diff --git a/man/plot.Rd b/man/plot.Rd index 11eedbdf..d8b95d10 100644 --- a/man/plot.Rd +++ b/man/plot.Rd @@ -72,7 +72,7 @@ ) } \arguments{ -\item{x}{MIC values created with \code{\link[=as.mic]{as.mic()}} or disk diffusion values created with \code{\link[=as.disk]{as.disk()}}} +\item{x, data}{MIC values created with \code{\link[=as.mic]{as.mic()}} or disk diffusion values created with \code{\link[=as.disk]{as.disk()}}} \item{main, title}{title of the plot} @@ -99,6 +99,8 @@ The \code{ggplot} functions return a \code{\link[ggplot2:ggplot]{ggplot}} model Functions to plot classes \code{rsi}, \code{mic} and \code{disk}, with support for base R and \code{ggplot2}. } \details{ +The interpretation of "I" will be named "Increased exposure" for all EUCAST guidelines since 2019, and will be named "Intermediate" in all other cases. + For interpreting MIC values as well as disk diffusion diameters, supported guidelines to be used as input for the \code{guideline} argument are: "EUCAST 2021", "EUCAST 2020", "EUCAST 2019", "EUCAST 2018", "EUCAST 2017", "EUCAST 2016", "EUCAST 2015", "EUCAST 2014", "EUCAST 2013", "EUCAST 2012", "EUCAST 2011", "CLSI 2019", "CLSI 2018", "CLSI 2017", "CLSI 2016", "CLSI 2015", "CLSI 2014", "CLSI 2013", "CLSI 2012", "CLSI 2011" and "CLSI 2010". Simply using \code{"CLSI"} or \code{"EUCAST"} as input will automatically select the latest version of that guideline. diff --git a/pkgdown/extra.css b/pkgdown/extra.css index 0ebc198f..a0e2a435 100644 --- a/pkgdown/extra.css +++ b/pkgdown/extra.css @@ -187,30 +187,30 @@ div[id^=last-updated] h2 { } /* tables, make them look like scientific ones */ -table { +.table { font-size: 90%; } -table * { +.table * { vertical-align: middle !important; } -table td { +.table td { padding: 4px !important; } -thead { +.table thead { border-top: 2px solid black; border-bottom: 2px solid black; } -thead ~ tbody { +.table thead ~ tbody { /* only when it has a header */ border-bottom: 2px solid black; } -thead th { +.table thead th { text-align: inherit; } -table a:not(.btn), .table a:not(.btn) { +.table a:not(.btn) { text-decoration: inherit; } -table a:not(.btn):hover, .table a:not(.btn):hover { +.table a:not(.btn):hover { text-decoration: underline; } diff --git a/vignettes/AMR.Rmd b/vignettes/AMR.Rmd index c95cca87..a9b03f2c 100755 --- a/vignettes/AMR.Rmd +++ b/vignettes/AMR.Rmd @@ -111,34 +111,26 @@ bacteria <- c("Escherichia coli", "Staphylococcus aureus", "Streptococcus pneumoniae", "Klebsiella pneumoniae") ``` -## Other variables -For completeness, we can also add the hospital where the patients was admitted and we need to define valid antibmicrobial results for our randomisation: - -```{r create other} -hospitals <- c("Hospital A", "Hospital B", "Hospital C", "Hospital D") -ab_interpretations <- c("S", "I", "R") -``` - ## Put everything together -Using the `sample()` function, we can randomly select items from all objects we defined earlier. To let our fake data reflect reality a bit, we will also approximately define the probabilities of bacteria and the antibiotic results with the `prob` parameter. +Using the `sample()` function, we can randomly select items from all objects we defined earlier. To let our fake data reflect reality a bit, we will also approximately define the probabilities of bacteria and the antibiotic results, using the `random_rsi()` function. ```{r merge data} sample_size <- 20000 data <- data.frame(date = sample(dates, size = sample_size, replace = TRUE), patient_id = sample(patients, size = sample_size, replace = TRUE), - hospital = sample(hospitals, size = sample_size, replace = TRUE, + hospital = sample(c("Hospital A", + "Hospital B", + "Hospital C", + "Hospital D"), + size = sample_size, replace = TRUE, prob = c(0.30, 0.35, 0.15, 0.20)), bacteria = sample(bacteria, size = sample_size, replace = TRUE, prob = c(0.50, 0.25, 0.15, 0.10)), - AMX = sample(ab_interpretations, size = sample_size, replace = TRUE, - prob = c(0.60, 0.05, 0.35)), - AMC = sample(ab_interpretations, size = sample_size, replace = TRUE, - prob = c(0.75, 0.10, 0.15)), - CIP = sample(ab_interpretations, size = sample_size, replace = TRUE, - prob = c(0.80, 0.00, 0.20)), - GEN = sample(ab_interpretations, size = sample_size, replace = TRUE, - prob = c(0.92, 0.00, 0.08))) + AMX = random_rsi(sample_size, prob_RSI = c(0.35, 0.60, 0.05)), + AMC = random_rsi(sample_size, prob_RSI = c(0.15, 0.75, 0.10)), + CIP = random_rsi(sample_size, prob_RSI = c(0.20, 0.80, 0.00)), + GEN = random_rsi(sample_size, prob_RSI = c(0.08, 0.92, 0.00))) ``` Using the `left_join()` function from the `dplyr` package, we can 'map' the gender to the patient ID using the `patients_table` object we created earlier: @@ -443,6 +435,7 @@ data_1st %>% ``` ## Plots + To show results in plots, most R users would nowadays use the `ggplot2` package. This package lets you create plots in layers. You can read more about it [on their website](https://ggplot2.tidyverse.org/). A quick example would look like these syntaxes: ```{r plot 2, eval = FALSE} @@ -480,7 +473,7 @@ ggplot(data_1st %>% group_by(genus)) + geom_rsi(x = "genus") + # split plots on antibiotic facet_rsi(facet = "antibiotic") + - # set colours to the R/SI interpretations + # set colours to the R/SI interpretations (colour-blind friendly) scale_rsi_colours() + # show percentages on y axis scale_y_percent(breaks = 0:4 * 25) + @@ -506,6 +499,65 @@ data_1st %>% coord_flip() ``` +### Plotting MIC and disk diffusion values + +The AMR package also extends the `plot()` and `ggplot()` functions for plotting minimum inhibitory concentrations (MIC, created with `as.mic()`) and disk diffusion diameters (created with `as.disk()`). + +With the `random_mic()` and `random_disk()` functions, we can generate sampled values for the new data types (S3 classes) `` and ``: + +```{r, results='markup'} +mic_values <- random_mic(size = 100) +mic_values +``` + +```{r} +# base R: +plot(mic_values) +# ggplot2: +ggplot(mic_values) +``` + +But we could also be more specific, by generating MICs that are likely to be found in *E. coli* for ciprofloxacin: + +```{r, results = 'markup', message = FALSE, warning = FALSE} +# this will generate MICs that are likely to be found in E. coli for ciprofloxacin: +mic_values_specific <- random_mic(size = 100, mo = "E. coli", ab = "cipro") +``` + +For the `plot()` and `ggplot()` function, we can define the microorganism and an antimicrobial agent the same way. This will add the interpretation of those values according to a chosen guidelines (defaults to the latest EUCAST guideline). + +Default colours are colour-blind friendly, while maintaining the convention that e.g. 'susceptible' should be green and 'resistant' should be red: + +```{r, message = FALSE, warning = FALSE} +# base R: +plot(mic_values_specific, mo = "E. coli", ab = "cipro") +# ggplot2: +ggplot(mic_values_specific, mo = "E. coli", ab = "cipro") +``` + +For disk diffusion values, there is not much of a difference in plotting: + +```{r, results = 'markup'} +# this will generate disks that are likely to be found in E. coli for ciprofloxacin: +disk_values_specific <- random_disk(size = 100, mo = "E. coli", ab = "cipro") +disk_values_specific +``` + +```{r, message = FALSE, warning = FALSE} +# base R: +plot(disk_values_specific, mo = "E. coli", ab = "cipro") +``` + +And when using the `ggplot2` package, but now choosing the latest implemented CLSI guideline (notice that the EUCAST-specific term "Incr. exposure" has changed to "Intermediate"): + +```{r, message = FALSE, warning = FALSE} +# and ggplot2, but now choosing an old CLSI guideline: +ggplot(disk_values_specific, + mo = "E. coli", + ab = "cipro", + guideline = "CLSI") +``` + ## Independence test The next example uses the `example_isolates` data set. This is a data set included with this package and contains 2,000 microbial isolates with their full antibiograms. It reflects reality and can be used to practice AMR data analysis.
    xx, data

    MIC values created with as.mic() or disk diffusion values created with as.disk()