diff --git a/DESCRIPTION b/DESCRIPTION index d98e45ab..dcb6027d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR Version: 0.5.0.9017 -Date: 2019-02-10 +Date: 2019-02-11 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/NAMESPACE b/NAMESPACE index 79cac6fa..4a7ba715 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -253,6 +253,7 @@ importFrom(graphics,axis) importFrom(graphics,barplot) importFrom(graphics,hist) importFrom(graphics,plot) +importFrom(graphics,points) importFrom(graphics,text) importFrom(hms,is.hms) importFrom(knitr,kable) diff --git a/R/resistance_predict.R b/R/resistance_predict.R index 2380b533..55e60d92 100755 --- a/R/resistance_predict.R +++ b/R/resistance_predict.R @@ -35,6 +35,7 @@ #' @param preserve_measurements a logical to indicate whether predictions of years that are actually available in the data should be overwritten by the original data. The standard errors of those years will be \code{NA}. #' @param info a logical to indicate whether textual analysis should be printed with the name and \code{\link{summary}} of the statistical model. #' @param main title of the plot +#' @param ribbon a logical to indicate whether a ribbon should be shown (default) or error bars #' @details Valid options for the statistical model are: #' \itemize{ #' \item{\code{"binomial"} or \code{"binom"} or \code{"logit"}: a generalised linear regression model with binomial distribution} @@ -51,6 +52,7 @@ #' \item{\code{observed}, the original observed resistant percentages} #' \item{\code{estimated}, the estimated resistant percentages, calculated by the model} #' } +#' Furthermore, the model itself is available as an attribute: \code{attributes(x)$model}, see Examples. #' @seealso The \code{\link{portion}} function to calculate resistance, \cr \code{\link{lm}} \code{\link{glm}} #' @rdname resistance_predict #' @export @@ -71,7 +73,12 @@ #' plot(x) #' #' -#' # create nice plots with ggplot yourself +#' # get the model from the object +#' mymodel <- attributes(x)$model +#' summary(mymodel) +#' +#' +#' # create nice plots with ggplot2 yourself #' if (!require(ggplot2)) { #' #' data <- septic_patients %>% @@ -295,8 +302,8 @@ rsi_predict <- resistance_predict #' @exportMethod plot.mic #' @export -#' @importFrom dplyr %>% group_by summarise -#' @importFrom graphics plot axis arrows +#' @importFrom dplyr filter +#' @importFrom graphics plot axis arrows points #' @rdname resistance_predict plot.resistance_predict <- function(x, main = paste("Resistance prediction of", attributes(x)$ab), ...) { if (attributes(x)$I_as_R == TRUE) { @@ -316,18 +323,30 @@ plot.resistance_predict <- function(x, main = paste("Resistance prediction of", ", model: ", attributes(x)$model_title, ")"), cex.sub = 0.75) + axis(side = 2, at = seq(0, 1, 0.1), labels = paste0(0:10 * 10, "%")) - # arrows hack: https://stackoverflow.com/a/22037078/4575331 + # hack for error bars: https://stackoverflow.com/a/22037078/4575331 arrows(x0 = x$year, y0 = x$se_min, x1 = x$year, - y1 = x$se_max, length = 0.05, angle = 90, code = 3) + y1 = x$se_max, + length = 0.05, angle = 90, code = 3, lwd = 1.5) + + # overlay grey points for prediction + points(x = filter(x, is.na(observations))$year, + y = filter(x, is.na(observations))$value, + pch = 19, + col = "grey40") } #' @rdname resistance_predict +#' @importFrom dplyr filter #' @export -ggplot_rsi_predict <- function(x, main = paste("Resistance prediction of", attributes(x)$ab), ...) { +ggplot_rsi_predict <- function(x, + main = paste("Resistance prediction of", attributes(x)$ab), + ribbon = TRUE, + ...) { if (!"resistance_predict" %in% class(x)) { stop("`x` must be a resistance prediction model created with resistance_predict().") @@ -338,15 +357,26 @@ ggplot_rsi_predict <- function(x, main = paste("Resistance prediction of", attri } else { ylab <- "%R" } - suppressWarnings( - ggplot2::ggplot(x, ggplot2::aes(x = year, y = value)) + - ggplot2::geom_point(size = 2) + - ggplot2::geom_errorbar(ggplot2::aes(ymin = se_min, ymax = se_max), na.rm = TRUE, width = 0.5) + - scale_y_percent(limits = c(0, 1)) + - ggplot2::labs(title = main, - y = paste0("Percentage (", ylab, ")"), - x = "Year", - caption = paste0("(n = ", sum(x$observations, na.rm = TRUE), - ", model: ", attributes(x)$model_title, ")")) - ) + + p <- ggplot2::ggplot(x, ggplot2::aes(x = year, y = value)) + + ggplot2::geom_point(data = filter(x, !is.na(observations)), + size = 2) + + scale_y_percent(limits = c(0, 1)) + + ggplot2::labs(title = main, + y = paste0("Percentage (", ylab, ")"), + x = "Year", + caption = paste0("(n = ", sum(x$observations, na.rm = TRUE), + ", model: ", attributes(x)$model_title, ")")) + + if (ribbon == TRUE) { + p <- p + ggplot2::geom_ribbon(ggplot2::aes(ymin = se_min, ymax = se_max), alpha = 0.25) + } else { + p <- p + ggplot2::geom_errorbar(ggplot2::aes(ymin = se_min, ymax = se_max), na.rm = TRUE, width = 0.5) + } + p <- p + + # overlay grey points for prediction + ggplot2::geom_point(data = filter(x, is.na(observations)), + size = 2, + colour = "grey40") + p } diff --git a/README.md b/README.md index c90a692b..8c69270c 100755 --- a/README.md +++ b/README.md @@ -23,11 +23,11 @@ Bhanu N.M. Sinha - - - - + + + + + ## How to get this package All stable versions of this package [are published on CRAN](https://CRAN.R-project.org/package=AMR), the official R network with a peer-reviewed submission process. diff --git a/_pkgdown.yml b/_pkgdown.yml index 01e534e4..1f73ca9e 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -37,7 +37,7 @@ navbar: href: 'articles/AMR.html' - text: 'Predict antimicrobial resistance' icon: 'fa-dice' - href: 'articles/Predict.html' + href: 'articles/resistance_predict.html' - text: 'Work with WHONET data' icon: 'fa-globe-americas' href: 'articles/WHONET.html' @@ -46,10 +46,12 @@ navbar: href: 'articles/EUCAST.html' - text: 'Get properties of a microorganism' icon: 'fa-bug' - href: 'articles/mo_property.html' + # href: 'articles/mo_property.html' + href: 'reference/mo_property.html' - text: 'Get properties of an antibiotic' icon: 'fa-capsules' - href: 'articles/ab_property.html' + # href: 'articles/atc_property.html' + href: 'reference/atc_property.html' - text: 'Create frequency tables' icon: 'fa-sort-amount-down' href: 'articles/freq.html' diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index d7f04512..b66d2788 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -108,7 +108,7 @@
  • - + Predict antimicrobial resistance @@ -129,14 +129,14 @@
  • - + Get properties of a microorganism
  • - + Get properties of an antibiotic diff --git a/docs/articles/AMR.html b/docs/articles/AMR.html index 66955ce3..0fcd39d7 100644 --- a/docs/articles/AMR.html +++ b/docs/articles/AMR.html @@ -70,7 +70,7 @@
  • - + Predict antimicrobial resistance @@ -91,14 +91,14 @@
  • - + Get properties of a microorganism
  • - + Get properties of an antibiotic @@ -185,7 +185,7 @@

    How to conduct AMR analysis

    Matthijs S. Berends

    -

    09 February 2019

    +

    11 February 2019

    @@ -194,7 +194,7 @@ -

    Note: values on this page will change with every website update since they are based on randomly created values and the page was written in RMarkdown. However, the methodology remains unchanged. This page was generated on 09 February 2019.

    +

    Note: values on this page will change with every website update since they are based on randomly created values and the page was written in RMarkdown. However, the methodology remains unchanged. This page was generated on 11 February 2019.

    Introduction

    @@ -210,21 +210,21 @@ -2019-02-09 +2019-02-11 abcd Escherichia coli S S -2019-02-09 +2019-02-11 abcd Escherichia coli S R -2019-02-09 +2019-02-11 efgh Escherichia coli R @@ -237,12 +237,12 @@ Needed R packages

    As with many uses in R, we need some additional packages for AMR analysis. Our package works closely together with the tidyverse packages dplyr and ggplot2 by Dr Hadley Wickham. The tidyverse tremendously improves the way we conduct data science - it allows for a very natural way of writing syntaxes and creating beautiful plots in R.

    Our AMR package depends on these packages and even extends their use and functions.

    - +
    @@ -254,51 +254,58 @@

    Patients

    To start with patients, we need a unique list of patients.

    -
    patients <- unlist(lapply(LETTERS, paste0, 1:10))
    +
    patients <- unlist(lapply(LETTERS, paste0, 1:10))

    The LETTERS object is available in R - it’s a vector with 26 characters: A to Z. The patients object we just created is now a vector of length 260, with values (patient IDs) varying from A1 to Z10. Now we we also set the gender of our patients, by putting the ID and the gender in a table:

    -
    patients_table <- data.frame(patient_id = patients,
    -                             gender = c(rep("M", 135),
    -                                        rep("F", 125)))
    +
    patients_table <- data.frame(patient_id = patients,
    +                             gender = c(rep("M", 135),
    +                                        rep("F", 125)))

    The first 135 patient IDs are now male, the other 125 are female.

    Dates

    Let’s pretend that our data consists of blood cultures isolates from 1 January 2010 until 1 January 2018.

    -
    dates <- seq(as.Date("2010-01-01"), as.Date("2018-01-01"), by = "day")
    +
    dates <- seq(as.Date("2010-01-01"), as.Date("2018-01-01"), by = "day")

    This dates object now contains all days in our date range.

    Microorganisms

    For this tutorial, we will uses four different microorganisms: Escherichia coli, Staphylococcus aureus, Streptococcus pneumoniae, and Klebsiella pneumoniae:

    -
    bacteria <- c("Escherichia coli", "Staphylococcus aureus",
    -              "Streptococcus pneumoniae", "Klebsiella pneumoniae")
    +
    bacteria <- c("Escherichia coli", "Staphylococcus aureus",
    +              "Streptococcus pneumoniae", "Klebsiella pneumoniae")

    Other variables

    For completeness, we can also add the hospital where the patients was admitted and we need to define valid antibmicrobial results for our randomisation:

    -
    hospitals <- c("Hospital A", "Hospital B", "Hospital C", "Hospital D")
    -ab_interpretations <- c("S", "I", "R")
    +
    hospitals <- c("Hospital A", "Hospital B", "Hospital C", "Hospital D")
    +ab_interpretations <- c("S", "I", "R")

    Put everything together

    -

    Using the sample() function, we can randomly select items from all objects we defined earlier. To let our fake data reflect reality a bit, we will also approximately define the probabilities of bacteria and the antibiotic results with the prob parameter.

    -
    data <- data.frame(date = sample(dates, 5000, replace = TRUE),
    -                   patient_id = sample(patients, 5000, replace = TRUE),
    -                   hospital = sample(hospitals, 5000, replace = TRUE, prob = c(0.30, 0.35, 0.15, 0.20)),
    -                   bacteria = sample(bacteria, 5000, replace = TRUE, prob = c(0.50, 0.25, 0.15, 0.10)),
    -                   amox = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.60, 0.05, 0.35)),
    -                   amcl = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.75, 0.10, 0.15)),
    -                   cipr = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.80, 0.00, 0.20)),
    -                   gent = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.92, 0.00, 0.08))
    -                   )
    -

    Using the left_join() function from the dplyr package, we can ‘map’ the gender to the patient ID using the patients_table object we created earlier:

    - +

    Using the sample() function, we can randomly select items from all objects we defined earlier. To let our fake data reflect reality a bit, we will also approximately define the probabilities of bacteria and the antibiotic results with the prob parameter.

    +
    sample_size <- 20000
    +data <- data.frame(date = sample(dates, size = sample_size, replace = TRUE),
    +                   patient_id = sample(patients, size = sample_size, replace = TRUE),
    +                   hospital = sample(hospitals, size = sample_size, replace = TRUE,
    +                                     prob = c(0.30, 0.35, 0.15, 0.20)),
    +                   bacteria = sample(bacteria, size = sample_size, replace = TRUE,
    +                                     prob = c(0.50, 0.25, 0.15, 0.10)),
    +                   amox = sample(ab_interpretations, size = sample_size, replace = TRUE,
    +                                 prob = c(0.60, 0.05, 0.35)),
    +                   amcl = sample(ab_interpretations, size = sample_size, replace = TRUE,
    +                                 prob = c(0.75, 0.10, 0.15)),
    +                   cipr = sample(ab_interpretations, size = sample_size, replace = TRUE,
    +                                 prob = c(0.80, 0.00, 0.20)),
    +                   gent = sample(ab_interpretations, size = sample_size, replace = TRUE,
    +                                 prob = c(0.92, 0.00, 0.08))
    +                   )
    +

    Using the left_join() function from the dplyr package, we can ‘map’ the gender to the patient ID using the patients_table object we created earlier:

    +

    The resulting data set contains 5,000 blood culture isolates. With the head() function we can preview the first 6 values of this data set:

    -
    head(data)
    +
    head(data)
    @@ -313,41 +320,63 @@ - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - + + @@ -356,28 +385,6 @@ - - - - - - - - - - - - - - - - - - - - - -
    date
    2012-10-05F32013-12-28M2Hospital BStreptococcus pneumoniaeRSSSM
    2017-03-17Z5Hospital BEscherichia coliRSSSF
    2011-05-13G5Hospital BStreptococcus pneumoniaeSSSSM
    2011-03-06X3Hospital DEscherichia coliSSSSF
    2016-11-01C9 Hospital D Escherichia coli R SSSM
    2012-11-18X7Hospital AEscherichia coliSSSSF
    2012-04-27K2Hospital BEscherichia coliRS R S M
    2014-09-03M62010-12-16E2 Hospital A Streptococcus pneumoniae SS M
    2015-09-13R4Hospital AEscherichia coliSSSSF
    2011-09-19D6Hospital BStreptococcus pneumoniaeSISSM

    Now, let’s start the cleaning and the analysis!

    @@ -387,78 +394,78 @@

    Cleaning the data

    Use the frequency table function freq() to look specifically for unique values in any variable. For example, for the gender variable:

    -
    data %>% freq(gender) # this would be the same: freq(data$gender)
    -
    # Frequency table of `gender` from a data.frame (5,000 x 9) 
    -# Class:   factor (numeric)
    -# Levels:  F, M
    -# Length:  5,000 (of which NA: 0 = 0.00%)
    -# Unique:  2
    -# 
    -#      Item    Count   Percent   Cum. Count   Cum. Percent
    -# ---  -----  ------  --------  -----------  -------------
    -# 1    M       2,565     51.3%        2,565          51.3%
    -# 2    F       2,435     48.7%        5,000         100.0%
    +
    data %>% freq(gender) # this would be the same: freq(data$gender)
    +
    #> Frequency table of `gender` from a data.frame (20,000 x 9) 
    +#> Class:   factor (numeric)
    +#> Levels:  F, M
    +#> Length:  20,000 (of which NA: 0 = 0.00%)
    +#> Unique:  2
    +#> 
    +#>      Item     Count   Percent   Cum. Count   Cum. Percent
    +#> ---  -----  -------  --------  -----------  -------------
    +#> 1    M       10,390     52.0%       10,390          52.0%
    +#> 2    F        9,610     48.1%       20,000         100.0%

    So, we can draw at least two conclusions immediately. From a data scientist perspective, the data looks clean: only values M and F. From a researcher perspective: there are slightly more men. Nothing we didn’t already know.

    -

    The data is already quite clean, but we still need to transform some variables. The bacteria column now consists of text, and we want to add more variables based on microbial IDs later on. So, we will transform this column to valid IDs. The mutate() function of the dplyr package makes this really easy:

    -
    data <- data %>%
    -  mutate(bacteria = as.mo(bacteria))
    -

    We also want to transform the antibiotics, because in real life data we don’t know if they are really clean. The as.rsi() function ensures reliability and reproducibility in these kind of variables. The mutate_at() will run the as.rsi() function on defined variables:

    -
    data <- data %>%
    -  mutate_at(vars(amox:gent), as.rsi)
    +

    The data is already quite clean, but we still need to transform some variables. The bacteria column now consists of text, and we want to add more variables based on microbial IDs later on. So, we will transform this column to valid IDs. The mutate() function of the dplyr package makes this really easy:

    +
    data <- data %>%
    +  mutate(bacteria = as.mo(bacteria))
    +

    We also want to transform the antibiotics, because in real life data we don’t know if they are really clean. The as.rsi() function ensures reliability and reproducibility in these kind of variables. The mutate_at() will run the as.rsi() function on defined variables:

    +
    data <- data %>%
    +  mutate_at(vars(amox:gent), as.rsi)

    Finally, we will apply EUCAST rules on our antimicrobial results. In Europe, most medical microbiological laboratories already apply these rules. Our package features their latest insights on intrinsic resistance and exceptional phenotypes. Moreover, the eucast_rules() function can also apply additional rules, like forcing ampicillin = R when amoxicillin/clavulanic acid = R.

    Because the amoxicillin (column amox) and amoxicillin/clavulanic acid (column amcl) in our data were generated randomly, some rows will undoubtedly contain amox = S and amcl = R, which is technically impossible. The eucast_rules() fixes this:

    -
    data <- eucast_rules(data, col_mo = "bacteria")
    -# 
    -# Rules by the European Committee on Antimicrobial Susceptibility Testing (EUCAST)
    -# 
    -# EUCAST Clinical Breakpoints (v9.0, 2019)
    -# Enterobacteriales (Order) (no changes)
    -# Staphylococcus (no changes)
    -# Enterococcus (no changes)
    -# Streptococcus groups A, B, C, G (no changes)
    -# Streptococcus pneumoniae (no changes)
    -# Viridans group streptococci (no changes)
    -# Haemophilus influenzae (no changes)
    -# Moraxella catarrhalis (no changes)
    -# Anaerobic Gram positives (no changes)
    -# Anaerobic Gram negatives (no changes)
    -# Pasteurella multocida (no changes)
    -# Campylobacter jejuni and C. coli (no changes)
    -# Aerococcus sanguinicola and A. urinae (no changes)
    -# Kingella kingae (no changes)
    -# 
    -# EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes (v3.1, 2016)
    -# Table 1:  Intrinsic resistance in Enterobacteriaceae (306 changes)
    -# Table 2:  Intrinsic resistance in non-fermentative Gram-negative bacteria (no changes)
    -# Table 3:  Intrinsic resistance in other Gram-negative bacteria (no changes)
    -# Table 4:  Intrinsic resistance in Gram-positive bacteria (681 changes)
    -# Table 8:  Interpretive rules for B-lactam agents and Gram-positive cocci (no changes)
    -# Table 9:  Interpretive rules for B-lactam agents and Gram-negative rods (no changes)
    -# Table 10: Interpretive rules for B-lactam agents and other Gram-negative bacteria (no changes)
    -# Table 11: Interpretive rules for macrolides, lincosamides, and streptogramins (no changes)
    -# Table 12: Interpretive rules for aminoglycosides (no changes)
    -# Table 13: Interpretive rules for quinolones (no changes)
    -# 
    -# Other rules
    -# Non-EUCAST: ampicillin = R where amoxicillin/clav acid = R (no changes)
    -# Non-EUCAST: piperacillin = R where piperacillin/tazobactam = R (no changes)
    -# Non-EUCAST: trimethoprim = R where trimethoprim/sulfa = R (no changes)
    -# Non-EUCAST: amoxicillin/clav acid = S where ampicillin = S (no changes)
    -# Non-EUCAST: piperacillin/tazobactam = S where piperacillin = S (no changes)
    -# Non-EUCAST: trimethoprim/sulfa = S where trimethoprim = S (no changes)
    -# 
    -# => EUCAST rules affected 1,796 out of 5,000 rows
    -#    -> added 0 test results
    -#    -> changed 987 test results (0 to S; 0 to I; 987 to R)
    +
    data <- eucast_rules(data, col_mo = "bacteria")
    +#> 
    +#> Rules by the European Committee on Antimicrobial Susceptibility Testing (EUCAST)
    +#> 
    +#> EUCAST Clinical Breakpoints (v9.0, 2019)
    +#> Enterobacteriales (Order) (no changes)
    +#> Staphylococcus (no changes)
    +#> Enterococcus (no changes)
    +#> Streptococcus groups A, B, C, G (no changes)
    +#> Streptococcus pneumoniae (no changes)
    +#> Viridans group streptococci (no changes)
    +#> Haemophilus influenzae (no changes)
    +#> Moraxella catarrhalis (no changes)
    +#> Anaerobic Gram positives (no changes)
    +#> Anaerobic Gram negatives (no changes)
    +#> Pasteurella multocida (no changes)
    +#> Campylobacter jejuni and C. coli (no changes)
    +#> Aerococcus sanguinicola and A. urinae (no changes)
    +#> Kingella kingae (no changes)
    +#> 
    +#> EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes (v3.1, 2016)
    +#> Table 1:  Intrinsic resistance in Enterobacteriaceae (1288 changes)
    +#> Table 2:  Intrinsic resistance in non-fermentative Gram-negative bacteria (no changes)
    +#> Table 3:  Intrinsic resistance in other Gram-negative bacteria (no changes)
    +#> Table 4:  Intrinsic resistance in Gram-positive bacteria (2832 changes)
    +#> Table 8:  Interpretive rules for B-lactam agents and Gram-positive cocci (no changes)
    +#> Table 9:  Interpretive rules for B-lactam agents and Gram-negative rods (no changes)
    +#> Table 10: Interpretive rules for B-lactam agents and other Gram-negative bacteria (no changes)
    +#> Table 11: Interpretive rules for macrolides, lincosamides, and streptogramins (no changes)
    +#> Table 12: Interpretive rules for aminoglycosides (no changes)
    +#> Table 13: Interpretive rules for quinolones (no changes)
    +#> 
    +#> Other rules
    +#> Non-EUCAST: ampicillin = R where amoxicillin/clav acid = R (no changes)
    +#> Non-EUCAST: piperacillin = R where piperacillin/tazobactam = R (no changes)
    +#> Non-EUCAST: trimethoprim = R where trimethoprim/sulfa = R (no changes)
    +#> Non-EUCAST: amoxicillin/clav acid = S where ampicillin = S (no changes)
    +#> Non-EUCAST: piperacillin/tazobactam = S where piperacillin = S (no changes)
    +#> Non-EUCAST: trimethoprim/sulfa = S where trimethoprim = S (no changes)
    +#> 
    +#> => EUCAST rules affected 7,405 out of 20,000 rows
    +#>    -> added 0 test results
    +#>    -> changed 4,120 test results (0 to S; 0 to I; 4,120 to R)

    Adding new variables

    Now that we have the microbial ID, we can add some taxonomic properties:

    -
    data <- data %>% 
    -  mutate(gramstain = mo_gramstain(bacteria),
    -         genus = mo_genus(bacteria),
    -         species = mo_species(bacteria))
    +
    data <- data %>% 
    +  mutate(gramstain = mo_gramstain(bacteria),
    +         genus = mo_genus(bacteria),
    +         species = mo_species(bacteria))

    First isolates

    @@ -469,18 +476,18 @@

    (…) When preparing a cumulative antibiogram to guide clinical decisions about empirical antimicrobial therapy of initial infections, only the first isolate of a given species per patient, per analysis period (eg, one year) should be included, irrespective of body site, antimicrobial susceptibility profile, or other phenotypical characteristics (eg, biotype). The first isolate is easily identified, and cumulative antimicrobial susceptibility test data prepared using the first isolate are generally comparable to cumulative antimicrobial susceptibility test data calculated by other methods, providing duplicate isolates are excluded.
    M39-A4 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition. CLSI, 2014. Chapter 6.4

    This AMR package includes this methodology with the first_isolate() function. It adopts the episode of a year (can be changed by user) and it starts counting days after every selected isolate. This new variable can easily be added to our data:

    - -

    So only 59% is suitable for resistance analysis! We can now filter on it with the filter() function, also from the dplyr package:

    - + +

    So only 28.2% is suitable for resistance analysis! We can now filter on it with the filter() function, also from the dplyr package:

    +

    For future use, the above two syntaxes can be shortened with the filter_first_isolate() function:

    - +

    @@ -501,43 +508,43 @@ 1 -2010-05-12 -B4 +2010-01-02 +F5 B_ESCHR_COL R -I +S S S TRUE 2 -2010-05-12 -B4 +2010-04-18 +F5 B_ESCHR_COL +S +I R S -R -R FALSE 3 -2010-07-18 -B4 +2010-05-05 +F5 B_ESCHR_COL R S -R +S S FALSE 4 -2011-01-18 -B4 +2010-11-03 +F5 B_ESCHR_COL -R +S S S S @@ -545,32 +552,32 @@ 5 -2011-11-20 -B4 +2010-12-16 +F5 B_ESCHR_COL S +S R S +FALSE + + +6 +2011-02-23 +F5 +B_ESCHR_COL +S +S +S S TRUE - -6 -2011-12-03 -B4 -B_ESCHR_COL -S -S -S -S -FALSE - 7 -2012-09-09 -B4 +2011-04-26 +F5 B_ESCHR_COL -R +S S S S @@ -578,19 +585,19 @@ 8 -2013-01-24 -B4 +2011-08-01 +F5 B_ESCHR_COL -S +R I S S -TRUE +FALSE 9 -2013-04-25 -B4 +2011-09-09 +F5 B_ESCHR_COL S S @@ -600,29 +607,29 @@ 10 -2014-02-01 -B4 +2011-09-11 +F5 B_ESCHR_COL S -S R S -TRUE +S +FALSE -

    Only 4 isolates are marked as ‘first’ according to CLSI guideline. But when reviewing the antibiogram, it is obvious that some isolates are absolutely different strains and should be included too. This is why we weigh isolates, based on their antibiogram. The key_antibiotics() function adds a vector with 18 key antibiotics: 6 broad spectrum ones, 6 small spectrum for Gram negatives and 6 small spectrum for Gram positives. These can be defined by the user.

    +

    Only 2 isolates are marked as ‘first’ according to CLSI guideline. But when reviewing the antibiogram, it is obvious that some isolates are absolutely different strains and should be included too. This is why we weigh isolates, based on their antibiogram. The key_antibiotics() function adds a vector with 18 key antibiotics: 6 broad spectrum ones, 6 small spectrum for Gram negatives and 6 small spectrum for Gram positives. These can be defined by the user.

    If a column exists with a name like ‘key(…)ab’ the first_isolate() function will automatically use it and determine the first weighted isolates. Mind the NOTEs in below output:

    - + @@ -639,11 +646,11 @@ - - + + - + @@ -651,34 +658,34 @@ - - + + + + - - - - + + - + - - + + - + @@ -687,88 +694,88 @@ - - + + + - - + - - + + - + - - + + - + - + - - + + - + - + - - + + - + - - + + - - + +
    isolate
    12010-05-12B42010-01-02F5 B_ESCHR_COL RIS S S TRUE
    22010-05-12B42010-04-18F5 B_ESCHR_COLSI R SRR FALSE TRUE
    32010-07-18B42010-05-05F5 B_ESCHR_COL R SRS S FALSE TRUE
    42011-01-18B42010-11-03F5 B_ESCHR_COLRS S S S
    52011-11-20B42010-12-16F5 B_ESCHR_COL SS R SSTRUEFALSE TRUE
    62011-12-03B42011-02-23F5 B_ESCHR_COL S S S SFALSETRUE TRUE
    72012-09-09B42011-04-26F5 B_ESCHR_COLRS S S S FALSETRUEFALSE
    82013-01-24B42011-08-01F5 B_ESCHR_COLSR I S STRUEFALSE TRUE
    92013-04-25B42011-09-09F5 B_ESCHR_COL S S S S FALSEFALSETRUE
    102014-02-01B42011-09-11F5 B_ESCHR_COL SS R STRUESFALSE TRUE
    -

    Instead of 4, now 9 isolates are flagged. In total, 88.1% of all isolates are marked ‘first weighted’ - 29.1% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.

    +

    Instead of 2, now 9 isolates are flagged. In total, 78.9% of all isolates are marked ‘first weighted’ - 50.7% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.

    As with filter_first_isolate(), there’s a shortcut for this new algorithm too:

    - -

    So we end up with 4,405 isolates for analysis.

    + +

    So we end up with 15,789 isolates for analysis.

    We can remove unneeded columns:

    - +

    Now our data looks like:

    -
    head(data_1st)
    +
    head(data_1st)
    @@ -788,13 +795,29 @@ - - - - - + + + + + + + + + + + + + + + + + + + + + @@ -803,27 +826,11 @@ - - - - - - - - - - - - - - - - - - - - + + + + @@ -837,15 +844,15 @@ - - - + + + + + - - - + @@ -853,12 +860,12 @@ - - - + + + - + @@ -869,15 +876,15 @@ - - + + - + @@ -897,12 +904,12 @@ Dispersion of species

    To just get an idea how the species are distributed, create a frequency table with our freq() function. We created the genus and species column earlier based on the microbial ID. With paste(), we can concatenate them together.

    The freq() function can be used like the base R language was intended:

    -
    freq(paste(data_1st$genus, data_1st$species))
    +
    freq(paste(data_1st$genus, data_1st$species))

    Or can be used like the dplyr way, which is easier readable:

    -
    data_1st %>% freq(genus, species)
    -

    Frequency table of genus and species from a data.frame (4,405 x 13)
    +

    data_1st %>% freq(genus, species)
    +

    Frequency table of genus and species from a data.frame (15,789 x 13)
    Columns: 2
    -Length: 4,405 (of which NA: 0 = 0.00%)
    +Length: 15,789 (of which NA: 0 = 0.00%)
    Unique: 4

    Shortest: 16
    Longest: 24

    @@ -919,33 +926,33 @@ Longest: 24

    - - - - + + + + - - - + + + - - - - + + + + - - - + + + @@ -955,12 +962,12 @@ Longest: 24

    Resistance percentages

    The functions portion_R, portion_RI, portion_I, portion_IS and portion_S can be used to determine the portion of a specific antimicrobial outcome. They can be used on their own:

    - -

    Or can be used in conjuction with group_by() and summarise(), both from the dplyr package:

    -
    data_1st %>% 
    -  group_by(hospital) %>% 
    -  summarise(amoxicillin = portion_IR(amox))
    + +

    Or can be used in conjuction with group_by() and summarise(), both from the dplyr package:

    +
    data_1st %>% 
    +  group_by(hospital) %>% 
    +  summarise(amoxicillin = portion_IR(amox))
    22012-11-18X7Hospital AB_ESCHR_COL12013-12-28M2Hospital BB_STRPTC_PNER S SRMGram positiveStreptococcuspneumoniaeTRUE
    22017-03-17Z5Hospital BB_ESCHR_COLRS S S Fcoli TRUE
    32012-04-27K2Hospital BB_ESCHR_COLRSRSMGram negativeEscherichiacoliTRUE
    42014-09-03M6Hospital A32011-05-13G5Hospital B B_STRPTC_PNE S S
    52015-09-13R4Hospital A2016-11-01C9Hospital D B_ESCHR_COLR SR SSSFM Gram negative Escherichia coli
    62011-09-19D6Hospital B2010-12-16E2Hospital A B_STRPTC_PNE SIS S R M
    72013-01-13Q32013-06-03G1 Hospital A B_STRPTC_PNE R S S RFM Gram positive Streptococcus pneumoniae
    1 Escherichia coli2,16049.0%2,16049.0%7,78749.3%7,78749.3%
    2 Staphylococcus aureus1,10925.2%3,2693,92224.8%11,709 74.2%
    3 Streptococcus pneumoniae69015.7%3,95989.9%2,56716.3%14,27690.4%
    4 Klebsiella pneumoniae44610.1%4,4051,5139.6%15,789 100.0%
    @@ -969,27 +976,27 @@ Longest: 24

    - + - + - + - +
    hospital
    Hospital A0.47113160.4824505
    Hospital B0.49107140.4689469
    Hospital C0.45977010.4689858
    Hospital D0.44886980.4772799
    -

    Of course it would be very convenient to know the number of isolates responsible for the percentages. For that purpose the n_rsi() can be used, which works exactly like n_distinct() from the dplyr package. It counts all isolates available for every group (i.e. values S, I or R):

    -
    data_1st %>% 
    -  group_by(hospital) %>% 
    -  summarise(amoxicillin = portion_IR(amox),
    -            available = n_rsi(amox))
    +

    Of course it would be very convenient to know the number of isolates responsible for the percentages. For that purpose the n_rsi() can be used, which works exactly like n_distinct() from the dplyr package. It counts all isolates available for every group (i.e. values S, I or R):

    +
    data_1st %>% 
    +  group_by(hospital) %>% 
    +  summarise(amoxicillin = portion_IR(amox),
    +            available = n_rsi(amox))
    @@ -999,32 +1006,32 @@ Longest: 24

    - - + + - - + + - - + + - - + +
    hospital
    Hospital A0.471131612990.48245054701
    Hospital B0.491071415680.46894695555
    Hospital C0.45977016090.46898582386
    Hospital D0.44886989290.47727993147

    These functions can also be used to get the portion of multiple antibiotics, to calculate co-resistance very easily:

    -
    data_1st %>% 
    -  group_by(genus) %>% 
    -  summarise(amoxicillin = portion_S(amcl),
    -            gentamicin = portion_S(gent),
    -            "amox + gent" = portion_S(amcl, gent))
    +
    data_1st %>% 
    +  group_by(genus) %>% 
    +  summarise(amoxicillin = portion_S(amcl),
    +            gentamicin = portion_S(gent),
    +            "amox + gent" = portion_S(amcl, gent))
    @@ -1035,94 +1042,94 @@ Longest: 24

    - - - + + + - - - + + + - - - + + + - + - +
    genus
    Escherichia0.74074070.90000000.97638890.73031980.90561190.9745730
    Klebsiella0.75336320.91479820.98430490.73430270.89557170.9643093
    Staphylococcus0.73038770.93056810.98196570.73049460.91968380.9821520
    Streptococcus0.71739130.7308142 0.00000000.71739130.7308142

    To make a transition to the next part, let’s see how this difference could be plotted:

    -
    data_1st %>% 
    -  group_by(genus) %>% 
    -  summarise("1. Amoxicillin" = portion_S(amcl),
    -            "2. Gentamicin" = portion_S(gent),
    -            "3. Amox + gent" = portion_S(amcl, gent)) %>% 
    -  tidyr::gather("Antibiotic", "S", -genus) %>%
    -  ggplot(aes(x = genus,
    -             y = S,
    -             fill = Antibiotic)) +
    -  geom_col(position = "dodge2")
    +
    data_1st %>% 
    +  group_by(genus) %>% 
    +  summarise("1. Amoxicillin" = portion_S(amcl),
    +            "2. Gentamicin" = portion_S(gent),
    +            "3. Amox + gent" = portion_S(amcl, gent)) %>% 
    +  tidyr::gather("Antibiotic", "S", -genus) %>%
    +  ggplot(aes(x = genus,
    +             y = S,
    +             fill = Antibiotic)) +
    +  geom_col(position = "dodge2")

    Plots

    To show results in plots, most R users would nowadays use the ggplot2 package. This package lets you create plots in layers. You can read more about it on their website. A quick example would look like these syntaxes:

    -
    ggplot(data = a_data_set,
    -       mapping = aes(x = year,
    -                     y = value)) +
    -  geom_col() +
    -  labs(title = "A title",
    -       subtitle = "A subtitle",
    -       x = "My X axis",
    -       y = "My Y axis")
    -
    -ggplot(a_data_set,
    -       aes(year, value) +
    -  geom_bar()
    +
    ggplot(data = a_data_set,
    +       mapping = aes(x = year,
    +                     y = value)) +
    +  geom_col() +
    +  labs(title = "A title",
    +       subtitle = "A subtitle",
    +       x = "My X axis",
    +       y = "My Y axis")
    +
    +ggplot(a_data_set,
    +       aes(year, value) +
    +  geom_bar()

    The AMR package contains functions to extend this ggplot2 package, for example geom_rsi(). It automatically transforms data with count_df() or portion_df() and show results in stacked bars. Its simplest and shortest example:

    -
    ggplot(data_1st) +
    -  geom_rsi(translate_ab = FALSE)
    +
    ggplot(data_1st) +
    +  geom_rsi(translate_ab = FALSE)

    Omit the translate_ab = FALSE to have the antibiotic codes (amox, amcl, cipr, gent) translated to official WHO names (amoxicillin, amoxicillin and betalactamase inhibitor, ciprofloxacin, gentamicin).

    If we group on e.g. the genus column and add some additional functions from our package, we can create this:

    - +

    To simplify this, we also created the ggplot_rsi() function, which combines almost all above functions:

    - +

    @@ -1150,26 +1157,26 @@ Longest: 24

    We can transform the data and apply the test in only a couple of lines:

    -
    septic_patients %>%
    -  filter(hospital_id %in% c("A", "D")) %>% # filter on only hospitals A and D
    -  select(hospital_id, fosf) %>%            # select the hospitals and fosfomycin
    -  group_by(hospital_id) %>%                # group on the hospitals
    -  count_df(combine_IR = TRUE) %>%          # count all isolates per group (hospital_id)
    -  tidyr::spread(hospital_id, Value) %>%    # transform output so A and D are columns
    -  select(A, D) %>%                         # and select these only
    -  as.matrix() %>%                          # transform to good old matrix for fisher.test()
    -  fisher.test()                            # do Fisher's Exact Test
    -# 
    -#   Fisher's Exact Test for Count Data
    -# 
    -# data:  .
    -# p-value = 0.03104
    -# alternative hypothesis: true odds ratio is not equal to 1
    -# 95 percent confidence interval:
    -#  1.054283 4.735995
    -# sample estimates:
    -# odds ratio 
    -#   2.228006
    +
    septic_patients %>%
    +  filter(hospital_id %in% c("A", "D")) %>% # filter on only hospitals A and D
    +  select(hospital_id, fosf) %>%            # select the hospitals and fosfomycin
    +  group_by(hospital_id) %>%                # group on the hospitals
    +  count_df(combine_IR = TRUE) %>%          # count all isolates per group (hospital_id)
    +  tidyr::spread(hospital_id, Value) %>%    # transform output so A and D are columns
    +  select(A, D) %>%                         # and select these only
    +  as.matrix() %>%                          # transform to good old matrix for fisher.test()
    +  fisher.test()                            # do Fisher's Exact Test
    +#> 
    +#>  Fisher's Exact Test for Count Data
    +#> 
    +#> data:  .
    +#> p-value = 0.03104
    +#> alternative hypothesis: true odds ratio is not equal to 1
    +#> 95 percent confidence interval:
    +#>  1.054283 4.735995
    +#> sample estimates:
    +#> odds ratio 
    +#>   2.228006

    As can be seen, the p value is 0.03, which means that the fosfomycin resistances found in hospital A and D are really different.

    diff --git a/docs/articles/AMR_files/figure-html/plot 1-1.png b/docs/articles/AMR_files/figure-html/plot 1-1.png index ba111e0e..eef839e2 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 1-1.png and b/docs/articles/AMR_files/figure-html/plot 1-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 3-1.png b/docs/articles/AMR_files/figure-html/plot 3-1.png index 4302fee6..989e41bf 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 3-1.png and b/docs/articles/AMR_files/figure-html/plot 3-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 4-1.png b/docs/articles/AMR_files/figure-html/plot 4-1.png index f6efe027..4bda66c7 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 4-1.png and b/docs/articles/AMR_files/figure-html/plot 4-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 5-1.png b/docs/articles/AMR_files/figure-html/plot 5-1.png index 61634b97..995be60a 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 5-1.png and b/docs/articles/AMR_files/figure-html/plot 5-1.png differ diff --git a/docs/articles/EUCAST.html b/docs/articles/EUCAST.html index 7cede9f4..2187de6e 100644 --- a/docs/articles/EUCAST.html +++ b/docs/articles/EUCAST.html @@ -70,7 +70,7 @@
  • - + Predict antimicrobial resistance @@ -91,14 +91,14 @@
  • - + Get properties of a microorganism
  • - + Get properties of an antibiotic @@ -185,7 +185,7 @@

    How to apply EUCAST rules

    Matthijs S. Berends

    -

    09 February 2019

    +

    11 February 2019

    diff --git a/docs/articles/atc_property.html b/docs/articles/atc_property.html new file mode 100644 index 00000000..80e89e8f --- /dev/null +++ b/docs/articles/atc_property.html @@ -0,0 +1,233 @@ + + + + + + + +How to get properties of an antibiotic • AMR (for R) + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    +
    + + + + +

    (will be available soon)

    +
    + + + +
    + + + +
    + + + + + diff --git a/docs/articles/index.html b/docs/articles/index.html index fcbf71f0..88a1f82d 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -108,7 +108,7 @@
  • - + Predict antimicrobial resistance @@ -129,14 +129,14 @@
  • - + Get properties of a microorganism
  • - + Get properties of an antibiotic @@ -232,12 +232,12 @@
  • How to conduct AMR analysis
  • How to apply EUCAST rules
  • How to use the *G*-test
  • -
  • How to predict antimicrobial resistance
  • How to work with WHONET data
  • -
  • How to get properties of an antibiotic
  • +
  • How to get properties of an antibiotic
  • Benchmarks
  • How to create frequency tables
  • How to get properties of a microorganism
  • +
  • How to predict antimicrobial resistance
  • diff --git a/docs/articles/mo_property.html b/docs/articles/mo_property.html index c9423a29..b6d725b1 100644 --- a/docs/articles/mo_property.html +++ b/docs/articles/mo_property.html @@ -70,7 +70,7 @@
  • - + Predict antimicrobial resistance @@ -91,14 +91,14 @@
  • - + Get properties of a microorganism
  • - + Get properties of an antibiotic @@ -185,7 +185,7 @@

    How to get properties of a microorganism

    Matthijs S. Berends

    -

    09 February 2019

    +

    11 February 2019

    diff --git a/docs/articles/resistance_predict.html b/docs/articles/resistance_predict.html new file mode 100644 index 00000000..07ec214e --- /dev/null +++ b/docs/articles/resistance_predict.html @@ -0,0 +1,406 @@ + + + + + + + +How to predict antimicrobial resistance • AMR (for R) + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    +
    + + + + +
    +

    +Needed R packages

    +

    As with many uses in R, we need some additional packages for AMR analysis. Our package works closely together with the tidyverse packages dplyr and ggplot2 by Dr Hadley Wickham. The tidyverse tremendously improves the way we conduct data science - it allows for a very natural way of writing syntaxes and creating beautiful plots in R.

    +

    Our AMR package depends on these packages and even extends their use and functions.

    + +
    +
    +

    +Prediction analysis

    +

    Our package contains a function resistance_predict(), which takes the same input as functions for other AMR analysis. Based on a date column, it calculates cases per year and uses a regression model to predict antimicrobial resistance.

    +

    It is basically as easy as:

    + +

    The function will look for a date column itself if col_date is not set.

    +

    When running any of these commands, a summary of the regression model will be printed unless using resistance_predict(..., info = FALSE).

    +
    #> NOTE: Using column `date` as input for `col_date`.
    +#> 
    +#> Logistic regression model (logit) with binomial distribution
    +#> ------------------------------------------------------------
    +#> 
    +#> Call:
    +#> glm(formula = df_matrix ~ year, family = binomial)
    +#> 
    +#> Deviance Residuals: 
    +#>     Min       1Q   Median       3Q      Max  
    +#> -2.9224  -1.3120   0.0170   0.7586   3.1932  
    +#> 
    +#> Coefficients:
    +#>               Estimate Std. Error z value Pr(>|z|)    
    +#> (Intercept) -222.92857   45.93922  -4.853 1.22e-06 ***
    +#> year           0.10994    0.02284   4.814 1.48e-06 ***
    +#> ---
    +#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    +#> 
    +#> (Dispersion parameter for binomial family taken to be 1)
    +#> 
    +#>     Null deviance: 59.794  on 14  degrees of freedom
    +#> Residual deviance: 35.191  on 13  degrees of freedom
    +#> AIC: 93.464
    +#> 
    +#> Number of Fisher Scoring iterations: 4
    +

    This text is only a printed summary - the actual result (output) of the function is a data.frame containing for each year: the number of observations, the actual observed resistance, the estimated resistance and the standard error below and above the estimation:

    +
    predict_pita
    +#>    year      value    se_min    se_max observations   observed  estimated
    +#> 1  2003 0.06250000        NA        NA           32 0.06250000 0.06177594
    +#> 2  2004 0.08536585        NA        NA           82 0.08536585 0.06846343
    +#> 3  2005 0.10000000        NA        NA           60 0.10000000 0.07581637
    +#> 4  2006 0.05084746        NA        NA           59 0.05084746 0.08388789
    +#> 5  2007 0.12121212        NA        NA           66 0.12121212 0.09273250
    +#> 6  2008 0.04166667        NA        NA           72 0.04166667 0.10240539
    +#> 7  2009 0.01639344        NA        NA           61 0.01639344 0.11296163
    +#> 8  2010 0.09433962        NA        NA           53 0.09433962 0.12445516
    +#> 9  2011 0.18279570        NA        NA           93 0.18279570 0.13693759
    +#> 10 2012 0.30769231        NA        NA           65 0.30769231 0.15045682
    +#> 11 2013 0.08620690        NA        NA           58 0.08620690 0.16505550
    +#> 12 2014 0.15254237        NA        NA           59 0.15254237 0.18076926
    +#> 13 2015 0.27272727        NA        NA           55 0.27272727 0.19762493
    +#> 14 2016 0.25000000        NA        NA           84 0.25000000 0.21563859
    +#> 15 2017 0.16279070        NA        NA           86 0.16279070 0.23481370
    +#> 16 2018 0.25513926 0.2228376 0.2874409           NA         NA 0.25513926
    +#> 17 2019 0.27658825 0.2386811 0.3144954           NA         NA 0.27658825
    +#> 18 2020 0.29911630 0.2551715 0.3430611           NA         NA 0.29911630
    +#> 19 2021 0.32266085 0.2723340 0.3729877           NA         NA 0.32266085
    +#> 20 2022 0.34714076 0.2901847 0.4040968           NA         NA 0.34714076
    +#> 21 2023 0.37245666 0.3087318 0.4361815           NA         NA 0.37245666
    +#> 22 2024 0.39849187 0.3279750 0.4690088           NA         NA 0.39849187
    +#> 23 2025 0.42511415 0.3479042 0.5023241           NA         NA 0.42511415
    +#> 24 2026 0.45217796 0.3684992 0.5358568           NA         NA 0.45217796
    +#> 25 2027 0.47952757 0.3897276 0.5693275           NA         NA 0.47952757
    +#> 26 2028 0.50700045 0.4115444 0.6024565           NA         NA 0.50700045
    +#> 27 2029 0.53443111 0.4338908 0.6349714           NA         NA 0.53443111
    +

    The function plot is available in base R, and can be extended by other packages to depend the output based on the type of input. We extended its function to cope with resistance predictions:

    +
    plot(predict_pita)
    +

    +

    This is the fastest way to plot the result. It automatically adds the right axes, error bars, titles, number of available observations and type of model.

    +

    We also support the ggplot2 package with our custom function ggplot_rsi_predict() to create more appealing plots:

    +
    ggplot_rsi_predict(predict_pita)
    +

    + +

    +
    +

    +Choosing the right model

    +

    Resistance is not easily predicted; if we look at vancomycin resistance in Gram positives, the spread (i.e. standard error) is enormous:

    +
    septic_patients %>%
    +  filter(mo_gramstain(mo) == "Gram positive") %>%
    +  resistance_predict(col_ab = "vanc", year_min = 2010, info = FALSE) %>% 
    +  ggplot_rsi_predict()
    +#> NOTE: Using column `date` as input for `col_date`.
    +

    +

    Vancomycin resistance could be 100% in ten years, but might also stay around 0%.

    +

    You can define the model with the model parameter. The default model is a generalised linear regression model using a binomial distribution, assuming that a period of zero resistance was followed by a period of increasing resistance leading slowly to more and more resistance.

    +

    Valid values are:

    + +++++ + + + + + + + + + + + + + + + + + + + + + + +
    Input valuesFunction used by RType of model
    +"binomial" or "binom" or "logit" +glm(..., family = binomial)Generalised linear model with binomial distribution
    +"loglin" or "poisson" +glm(..., family = poisson)Generalised linear model with poisson distribution
    +"lin" or "linear" +lm()Linear model
    +

    For the vancomycin resistance in Gram positive bacteria, a linear model might be more appropriate since no (left half of a) binomial distribution is to be expected based on the observed years:

    +
    septic_patients %>%
    +  filter(mo_gramstain(mo) == "Gram positive") %>%
    +  resistance_predict(col_ab = "vanc", year_min = 2010, info = FALSE, model = "linear") %>% 
    +  ggplot_rsi_predict()
    +#> NOTE: Using column `date` as input for `col_date`.
    +

    +

    This seems more likely, doesn’t it?

    +

    The model itself is also available from the object, as an attribute:

    + +
    +
    +
    + + + +
    + + + +
    + + + + + diff --git a/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-4-1.png b/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-4-1.png new file mode 100644 index 00000000..544c7be2 Binary files /dev/null and b/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-5-1.png new file mode 100644 index 00000000..4e99fb8b Binary files /dev/null and b/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-5-2.png b/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-5-2.png new file mode 100644 index 00000000..6090dfa7 Binary files /dev/null and b/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-5-2.png differ diff --git a/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-6-1.png new file mode 100644 index 00000000..f968ce8b Binary files /dev/null and b/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-7-1.png b/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-7-1.png new file mode 100644 index 00000000..4db59da9 Binary files /dev/null and b/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-7-1.png differ diff --git a/docs/authors.html b/docs/authors.html index 51adde52..a2906631 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -108,7 +108,7 @@
  • - + Predict antimicrobial resistance @@ -129,14 +129,14 @@
  • - + Get properties of a microorganism
  • - + Get properties of an antibiotic diff --git a/docs/extra.css b/docs/extra.css index 7847e8ee..9d2cdc72 100644 --- a/docs/extra.css +++ b/docs/extra.css @@ -23,12 +23,12 @@ /* R for Data Science (r4ds) */ #r4ds a { - display: inline; + display: inline-flex; + align-items: center; + text-align: right; } -#r4ds * { - text-align: center; - display: block; - margin: 0 auto; +#r4ds img { + margin-left: 10px; } /* class for footer */ diff --git a/docs/extra.js b/docs/extra.js index dc4be477..10c05985 100644 --- a/docs/extra.js +++ b/docs/extra.js @@ -34,15 +34,19 @@ $( document ).ready(function() { window.location.replace(url_new); } - $('#sidebar').prepend( - '
    ' + - ' ' + - ' ' + - ' ' + - '

    Learn R reading this great book!

    ' + - '

    Or read it free online: r4ds.co.nz.

    ' + - '
    ' + - '
    '); + // PR for 'R for Data Science' on How To pages + if ($(".template-article").length > 0) { + $('#sidebar').prepend( + '
    ' + + ' Learn R reading this great book: R for Data Science.' + + '
    ' + + '
    ' + + ' Click to read it online - it was published for free.' + + ' ' + + '
    ' + + '
    ' + + '
    '); + } $('footer').html( '
    ' + diff --git a/docs/index.html b/docs/index.html index e7fbbee4..31493b09 100644 --- a/docs/index.html +++ b/docs/index.html @@ -72,7 +72,7 @@
  • - + Predict antimicrobial resistance @@ -93,14 +93,14 @@
  • - + Get properties of a microorganism
  • - + Get properties of an antibiotic @@ -195,7 +195,7 @@

    This package can be used for:

    Furthermore, the model itself is available as an attribute: attributes(x)$model, see Examples.

    + +

    Details

    +

    Valid options for the statistical model are:

    Read more on our website!

    @@ -348,36 +359,41 @@ On our website https://msberends.gitla library(dplyr) x <- septic_patients %>% filter_first_isolate() %>% - filter(mo_genus(mo) == "Staphylococcus") %>% + filter(mo_genus(mo) == "Staphylococcus") %>% resistance_predict("peni") plot(x) -# create nice plots with ggplot yourself +# get the model from the object +mymodel <- attributes(x)$model +summary(mymodel) + + +# create nice plots with ggplot2 yourself if (!require(ggplot2)) { data <- septic_patients %>% - filter(mo == as.mo("E. coli")) %>% + filter(mo == as.mo("E. coli")) %>% resistance_predict(col_ab = "amox", col_date = "date", info = FALSE, minimum = 15) - ggplot(data, - aes(x = year)) + - geom_col(aes(y = value), + ggplot(data, + aes(x = year)) + + geom_col(aes(y = value), fill = "grey75") + - geom_errorbar(aes(ymin = se_min, + geom_errorbar(aes(ymin = se_min, ymax = se_max), colour = "grey50") + - scale_y_continuous(limits = c(0, 1), + scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.1), labels = paste0(seq(0, 100, 10), "%")) + - labs(title = expression(paste("Forecast of amoxicillin resistance in ", + labs(title = expression(paste("Forecast of amoxicillin resistance in ", italic("E. coli"))), y = "%IR", x = "Year") + - theme_minimal(base_size = 13) + theme_minimal(base_size = 13) } # } @@ -388,6 +404,8 @@ On our website https://msberends.gitla
  • Value
  • +
  • Details
  • +
  • Read more on our website!
  • See also
  • diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 21751b23..c2f8bedc 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -138,14 +138,11 @@ https://msberends.gitlab.io/AMR/articles/G_test.html - - https://msberends.gitlab.io/AMR/articles/Predict.html - https://msberends.gitlab.io/AMR/articles/WHONET.html - https://msberends.gitlab.io/AMR/articles/ab_property.html + https://msberends.gitlab.io/AMR/articles/atc_property.html https://msberends.gitlab.io/AMR/articles/benchmarks.html @@ -156,4 +153,7 @@ https://msberends.gitlab.io/AMR/articles/mo_property.html + + https://msberends.gitlab.io/AMR/articles/resistance_predict.html + diff --git a/index.md b/index.md index 8317461a..85ed77ac 100644 --- a/index.md +++ b/index.md @@ -14,7 +14,7 @@ This R package is actively maintained and free software; you can freely use and This package can be used for: * Calculating antimicrobial resistance - * Calculating empiric susceptibility of both mono therapy and combination therapy + * Calculating empirical susceptibility of both mono therapy and combination therapy * Predicting future antimicrobial resistance using regression models * Getting properties for any microorganism (like Gram stain, species, genus or family) * Getting properties for any antibiotic (like name, ATC code, defined daily dose or trade name) diff --git a/man/resistance_predict.Rd b/man/resistance_predict.Rd index f27b6ce6..9459823f 100644 --- a/man/resistance_predict.Rd +++ b/man/resistance_predict.Rd @@ -21,7 +21,7 @@ rsi_predict(tbl, col_ab, col_date = NULL, year_min = NULL, main = paste("Resistance prediction of", attributes(x)$ab), ...) ggplot_rsi_predict(x, main = paste("Resistance prediction of", - attributes(x)$ab), ...) + attributes(x)$ab), ribbon = TRUE, ...) } \arguments{ \item{tbl}{a \code{data.frame} containing isolates.} @@ -53,6 +53,8 @@ ggplot_rsi_predict(x, main = paste("Resistance prediction of", \item{main}{title of the plot} \item{...}{parameters passed on to the \code{first_isolate} function} + +\item{ribbon}{a logical to indicate whether a ribbon should be shown (default) or error bars} } \value{ \code{data.frame} with extra class \code{"resistance_predict"} with columns: @@ -65,6 +67,7 @@ ggplot_rsi_predict(x, main = paste("Resistance prediction of", \item{\code{observed}, the original observed resistant percentages} \item{\code{estimated}, the estimated resistant percentages, calculated by the model} } +Furthermore, the model itself is available as an attribute: \code{attributes(x)$model}, see Examples. } \description{ Create a prediction model to predict antimicrobial resistance for the next years on statistical solid ground. Standard errors (SE) will be returned as columns \code{se_min} and \code{se_max}. See Examples for a real live example. @@ -97,7 +100,12 @@ x <- septic_patients \%>\% plot(x) -# create nice plots with ggplot yourself +# get the model from the object +mymodel <- attributes(x)$model +summary(mymodel) + + +# create nice plots with ggplot2 yourself if (!require(ggplot2)) { data <- septic_patients \%>\% diff --git a/pkgdown/extra.css b/pkgdown/extra.css index 7847e8ee..9d2cdc72 100644 --- a/pkgdown/extra.css +++ b/pkgdown/extra.css @@ -23,12 +23,12 @@ /* R for Data Science (r4ds) */ #r4ds a { - display: inline; + display: inline-flex; + align-items: center; + text-align: right; } -#r4ds * { - text-align: center; - display: block; - margin: 0 auto; +#r4ds img { + margin-left: 10px; } /* class for footer */ diff --git a/pkgdown/extra.js b/pkgdown/extra.js index dc4be477..10c05985 100644 --- a/pkgdown/extra.js +++ b/pkgdown/extra.js @@ -34,15 +34,19 @@ $( document ).ready(function() { window.location.replace(url_new); } - $('#sidebar').prepend( - '
    ' + - ' ' + - ' ' + - ' ' + - '

    Learn R reading this great book!

    ' + - '

    Or read it free online: r4ds.co.nz.

    ' + - '
    ' + - '
    '); + // PR for 'R for Data Science' on How To pages + if ($(".template-article").length > 0) { + $('#sidebar').prepend( + '
    ' + + ' Learn R reading this great book: R for Data Science.' + + '
    ' + + '
    ' + + ' Click to read it online - it was published for free.' + + ' ' + + '
    ' + + '
    ' + + '
    '); + } $('footer').html( '
    ' + diff --git a/vignettes/AMR.Rmd b/vignettes/AMR.Rmd index 56a16434..eb1a644e 100755 --- a/vignettes/AMR.Rmd +++ b/vignettes/AMR.Rmd @@ -17,9 +17,9 @@ editor_options: ```{r setup, include = FALSE, results = 'markup'} knitr::opts_chunk$set( collapse = TRUE, - comment = "#", + comment = "#>", fig.width = 7.5, - fig.height = 4.5 + fig.height = 5 ) ``` @@ -106,14 +106,21 @@ ab_interpretations <- c("S", "I", "R") Using the `sample()` function, we can randomly select items from all objects we defined earlier. To let our fake data reflect reality a bit, we will also approximately define the probabilities of bacteria and the antibiotic results with the `prob` parameter. ```{r merge data} -data <- data.frame(date = sample(dates, 5000, replace = TRUE), - patient_id = sample(patients, 5000, replace = TRUE), - hospital = sample(hospitals, 5000, replace = TRUE, prob = c(0.30, 0.35, 0.15, 0.20)), - bacteria = sample(bacteria, 5000, replace = TRUE, prob = c(0.50, 0.25, 0.15, 0.10)), - amox = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.60, 0.05, 0.35)), - amcl = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.75, 0.10, 0.15)), - cipr = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.80, 0.00, 0.20)), - gent = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.92, 0.00, 0.08)) +sample_size <- 20000 +data <- data.frame(date = sample(dates, size = sample_size, replace = TRUE), + patient_id = sample(patients, size = sample_size, replace = TRUE), + hospital = sample(hospitals, size = sample_size, replace = TRUE, + prob = c(0.30, 0.35, 0.15, 0.20)), + bacteria = sample(bacteria, size = sample_size, replace = TRUE, + prob = c(0.50, 0.25, 0.15, 0.10)), + amox = sample(ab_interpretations, size = sample_size, replace = TRUE, + prob = c(0.60, 0.05, 0.35)), + amcl = sample(ab_interpretations, size = sample_size, replace = TRUE, + prob = c(0.75, 0.10, 0.15)), + cipr = sample(ab_interpretations, size = sample_size, replace = TRUE, + prob = c(0.80, 0.00, 0.20)), + gent = sample(ab_interpretations, size = sample_size, replace = TRUE, + prob = c(0.92, 0.00, 0.08)) ) ``` @@ -124,6 +131,7 @@ data <- data %>% left_join(patients_table) ``` The resulting data set contains 5,000 blood culture isolates. With the `head()` function we can preview the first 6 values of this data set: + ```{r preview data set 1, eval = FALSE} head(data) ``` @@ -148,6 +156,7 @@ data %>% freq(gender, markdown = FALSE, header = TRUE) So, we can draw at least two conclusions immediately. From a data scientist perspective, the data looks clean: only values `M` and `F`. From a researcher perspective: there are slightly more men. Nothing we didn't already know. The data is already quite clean, but we still need to transform some variables. The `bacteria` column now consists of text, and we want to add more variables based on microbial IDs later on. So, we will transform this column to valid IDs. The `mutate()` function of the `dplyr` package makes this really easy: + ```{r transform mo 1} data <- data %>% mutate(bacteria = as.mo(bacteria)) @@ -202,6 +211,7 @@ data_1st <- data %>% ``` For future use, the above two syntaxes can be shortened with the `filter_first_isolate()` function: + ```{r 1st isolate filter 2, eval = FALSE} data_1st <- data %>% filter_first_isolate() @@ -263,6 +273,7 @@ data_1st <- data %>% So we end up with `r format(nrow(data_1st), big.mark = ",")` isolates for analysis. We can remove unneeded columns: + ```{r} data_1st <- data_1st %>% select(-c(first, keyab)) @@ -359,6 +370,7 @@ data_1st %>% ``` To make a transition to the next part, let's see how this difference could be plotted: + ```{r plot 1} data_1st %>% group_by(genus) %>% @@ -391,6 +403,7 @@ ggplot(a_data_set, ``` The `AMR` package contains functions to extend this `ggplot2` package, for example `geom_rsi()`. It automatically transforms data with `count_df()` or `portion_df()` and show results in stacked bars. Its simplest and shortest example: + ```{r plot 3} ggplot(data_1st) + geom_rsi(translate_ab = FALSE) @@ -424,6 +437,7 @@ ggplot(data_1st %>% group_by(genus)) + ``` To simplify this, we also created the `ggplot_rsi()` function, which combines almost all above functions: + ```{r plot 5} data_1st %>% group_by(genus) %>% diff --git a/vignettes/EUCAST.Rmd b/vignettes/EUCAST.Rmd index 4db83445..442793e1 100644 --- a/vignettes/EUCAST.Rmd +++ b/vignettes/EUCAST.Rmd @@ -17,7 +17,7 @@ editor_options: ```{r setup, include = FALSE, results = 'markup'} knitr::opts_chunk$set( collapse = TRUE, - comment = "#", + comment = "#>", fig.width = 7.5, fig.height = 4.5 ) diff --git a/vignettes/ab_property.Rmd b/vignettes/atc_property.Rmd similarity index 97% rename from vignettes/ab_property.Rmd rename to vignettes/atc_property.Rmd index bf17254f..e6cdc041 100755 --- a/vignettes/ab_property.Rmd +++ b/vignettes/atc_property.Rmd @@ -16,7 +16,7 @@ editor_options: ```{r setup, include = FALSE, results = 'markup'} knitr::opts_chunk$set( collapse = TRUE, - comment = "#" + comment = "#>" ) # set to original language (English) Sys.setlocale(locale = "C") diff --git a/vignettes/mo_property.Rmd b/vignettes/mo_property.Rmd index 71aee34c..527314fb 100755 --- a/vignettes/mo_property.Rmd +++ b/vignettes/mo_property.Rmd @@ -16,8 +16,10 @@ editor_options: ```{r setup, include = FALSE, results = 'markup'} knitr::opts_chunk$set( collapse = TRUE, - comment = "#" + comment = "#>" ) +# set to original language (English) +Sys.setlocale(locale = "C") ``` *(will be available soon)* diff --git a/vignettes/Predict.Rmd b/vignettes/resistance_predict.Rmd similarity index 73% rename from vignettes/Predict.Rmd rename to vignettes/resistance_predict.Rmd index 511e8410..463d1ae6 100755 --- a/vignettes/Predict.Rmd +++ b/vignettes/resistance_predict.Rmd @@ -16,9 +16,9 @@ editor_options: ```{r setup, include = FALSE, results = 'markup'} knitr::opts_chunk$set( collapse = TRUE, - comment = "#", + comment = "#>", fig.width = 7.5, - fig.height = 4.5 + fig.height = 4.75 ) ``` @@ -37,7 +37,7 @@ library(AMR) ``` ## Prediction analysis -Our package contains a function `resistance_predict()`, which takes the same input as functions for [other AMR analysis](./articles/AMR.html). Based on a date column, it calculates cases per year and uses a regression model to predict antimicrobial resistance. +Our package contains a function `resistance_predict()`, which takes the same input as functions for [other AMR analysis](./AMR.html). Based on a date column, it calculates cases per year and uses a regression model to predict antimicrobial resistance. It is basically as easy as: ```{r, eval = FALSE} @@ -53,27 +53,36 @@ predict_pita <- septic_patients %>% resistance_predict(col_ab = "pita") ``` +The function will look for a date column itself if `col_date` is not set. + +When running any of these commands, a summary of the regression model will be printed unless using `resistance_predict(..., info = FALSE)`. + ```{r, echo = FALSE} predict_pita <- septic_patients %>% resistance_predict(col_ab = "pita") ``` -The function will look for a data column itself if `col_date` is not set. The result is nothing more than a `data.frame`, containing the years, number of observations, actual observed resistance, the estimated resistance and the standard error below and above the estimation: +This text is only a printed summary - the actual result (output) of the function is a `data.frame` containing for each year: the number of observations, the actual observed resistance, the estimated resistance and the standard error below and above the estimation: + ```{r} predict_pita ``` The function `plot` is available in base R, and can be extended by other packages to depend the output based on the type of input. We extended its function to cope with resistance predictions: -```{r} +```{r, fig.height = 5.5} plot(predict_pita) ``` -We also support the `ggplot2` package with the function `ggplot_rsi_predict()`: +This is the fastest way to plot the result. It automatically adds the right axes, error bars, titles, number of available observations and type of model. + +We also support the `ggplot2` package with our custom function `ggplot_rsi_predict()` to create more appealing plots: ```{r} -library(ggplot2) ggplot_rsi_predict(predict_pita) + +# choose for error bars instead of a ribbon +ggplot_rsi_predict(predict_pita, ribbon = FALSE) ``` ### Choosing the right model @@ -84,7 +93,7 @@ Resistance is not easily predicted; if we look at vancomycin resistance in Gram septic_patients %>% filter(mo_gramstain(mo) == "Gram positive") %>% resistance_predict(col_ab = "vanc", year_min = 2010, info = FALSE) %>% - plot() + ggplot_rsi_predict() ``` Vancomycin resistance could be 100% in ten years, but might also stay around 0%. @@ -99,13 +108,22 @@ Valid values are: | `"loglin"` or `"poisson"` | `glm(..., family = poisson)` | Generalised linear model with poisson distribution | | `"lin"` or `"linear"` | `lm()` | Linear model | -For the vancomycin resistance in Gram positive bacteria, a linear model might be more appropriate since no (left half of a) binomial distribution is to be expected based on observed years: +For the vancomycin resistance in Gram positive bacteria, a linear model might be more appropriate since no (left half of a) binomial distribution is to be expected based on the observed years: ```{r} septic_patients %>% filter(mo_gramstain(mo) == "Gram positive") %>% resistance_predict(col_ab = "vanc", year_min = 2010, info = FALSE, model = "linear") %>% - plot() + ggplot_rsi_predict() ``` This seems more likely, doesn't it? + +The model itself is also available from the object, as an `attribute`: +```{r} +model <- attributes(predict_pita)$model + +summary(model)$family + +summary(model)$coefficients +```