diff --git a/DESCRIPTION b/DESCRIPTION index 5ef11236..3638d34f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 0.5.0.9014 -Date: 2019-01-28 +Version: 0.5.0.9015 +Date: 2019-01-29 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/NEWS.md b/NEWS.md index 75468aa8..e424788b 100755 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,9 @@ #### New * **BREAKING**: removed deprecated functions, parameters and references to 'bactid'. Use `as.mo()` to identify an MO code. +* Support for data from [WHONET](https://whonet.org/) and [EARS-Net](https://ecdc.europa.eu/en/about-us/partnerships-and-networks/disease-and-laboratory-networks/ears-net) (European Antimicrobial Resistance Surveillance Network): + * Exported files from WHONET can be read and used in this package. For functions like `first_isolate()` and `eucast_rules()`, all parameters will be filled in automatically. + * This package now knows all antibiotic abbrevations by EARS-Net (which are also being used by WHONET) - the `antibiotics` data set now contains a column `ears_net`. * All `ab_*` functions are deprecated and replaced by `atc_*` functions: ```r ab_property -> atc_property() @@ -75,10 +78,11 @@ * Merged data sets `microorganisms.certe` and `microorganisms.umcg` into `microorganisms.codes` * Function `mo_taxonomy()` now contains the kingdom too * Reduce false positives for `is.rsi.eligible()` +* New colours for `scale_rsi_colours()` * Summaries of class `mo` will now return the top 3 and the unique count, e.g. using `summary(mo)` * Small text updates to summaries of class `rsi` and `mic` * Frequency tables (`freq()` function): - * Support for tidyverse quasiquotation! So now you can create frequency tables of function outcomes: + * Support for tidyverse quasiquotation! Now you can create frequency tables of function outcomes: ```r # Determine genus of microorganisms (mo) in `septic_patients` data set: # OLD WAY diff --git a/R/atc.R b/R/atc.R index bcc791c0..b609dceb 100755 --- a/R/atc.R +++ b/R/atc.R @@ -60,10 +60,15 @@ as.atc <- function(x) { x[!x %like% "[A-Z][0-9]{2}[A-Z]{2}[0-9]{2}"] <- gsub("[^a-zA-Z]+", "", x[!x %like% "[A-Z][0-9]{2}[A-Z]{2}[0-9]{2}"]) x.bak <- x - x <- unique(x[!is.na(x)]) + x <- unique(x) failures <- character(0) for (i in 1:length(x)) { + if (is.na(x[i]) | is.null(x[i]) | identical(x[i], "")) { + x.new[i] <- x[i] + next + } + fail <- TRUE # first try atc @@ -80,6 +85,13 @@ as.atc <- function(x) { x.new[is.na(x.new) & x.bak == x[i]] <- x[i] } + # try abbreviation of EARS-Net/WHONET + found <- AMR::antibiotics[which(tolower(AMR::antibiotics$ears_net) == tolower(x[i])),]$atc + if (length(found) > 0) { + fail <- FALSE + x.new[is.na(x.new) & x.bak == x[i]] <- found[1L] + } + # try abbreviation of certe and glims found <- AMR::antibiotics[which(tolower(AMR::antibiotics$certe) == tolower(x[i])),]$atc if (length(found) > 0) { diff --git a/R/data.R b/R/data.R index 3793064e..0843ee78 100755 --- a/R/data.R +++ b/R/data.R @@ -22,9 +22,10 @@ #' Data set with ~500 antibiotics #' #' A data set containing all antibiotics with a J0 code and some other antimicrobial agents, with their DDDs. Except for trade names and abbreviations, all properties were downloaded from the WHO, see Source. -#' @format A \code{\link{data.frame}} with 488 observations and 16 variables: +#' @format A \code{\link{data.frame}} with 488 observations and 17 variables: #' \describe{ -#' \item{\code{atc}}{ATC code, like \code{J01CR02}} +#' \item{\code{atc}}{ATC code (Anatomical Therapeutic Chemical), like \code{J01CR02}} +#' \item{\code{ears_net}}{EARS-Net code (European Antimicrobial Resistance Surveillance Network), like \code{AMC}} #' \item{\code{certe}}{Certe code, like \code{amcl}} #' \item{\code{umcg}}{UMCG code, like \code{AMCL}} #' \item{\code{abbr}}{Abbreviation as used by many countries, used internally by \code{\link{as.atc}}} @@ -43,6 +44,8 @@ #' } #' @source - World Health Organization (WHO) Collaborating Centre for Drug Statistics Methodology: \url{https://www.whocc.no/atc_ddd_index/} #' +#' Table antibiotic coding EARSS (from WHONET 5.3): \url{http://www.madsonline.dk/Tutorials/landskoder_antibiotika_WM.pdf} +#' #' EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes Tables. Version 3.1, 2016: \url{http://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Expert_Rules/Expert_rules_intrinsic_exceptional_V3.1.pdf} #' #' European Commission Public Health PHARMACEUTICALS - COMMUNITY REGISTER: \url{http://ec.europa.eu/health/documents/community-register/html/atc.htm} diff --git a/R/first_isolate.R b/R/first_isolate.R index 20335186..8b5c13f8 100755 --- a/R/first_isolate.R +++ b/R/first_isolate.R @@ -220,6 +220,14 @@ first_isolate <- function(tbl, col_keyantibiotics <- NULL } + # -- specimen + if (is.null(col_specimen)) { + col_specimen <- search_type_in_df(tbl = tbl, type = "specimen") + } + if (isFALSE(col_specimen)) { + col_specimen <- NULL + } + # check if columns exist check_columns_existance <- function(column, tblname = tbl) { if (NROW(tblname) <= 1 | NCOL(tblname) <= 1) { diff --git a/R/ggplot_rsi.R b/R/ggplot_rsi.R index b5bccfc4..bb0961b2 100755 --- a/R/ggplot_rsi.R +++ b/R/ggplot_rsi.R @@ -313,8 +313,8 @@ scale_y_percent <- function(breaks = seq(0, 1, 0.1), limits = NULL) { #' @rdname ggplot_rsi #' @export scale_rsi_colours <- function() { - ggplot2::scale_fill_brewer(palette = "RdYlGn") - #ggplot2::scale_fill_gradient2(low = "#d5613e", mid = "#ae5ac0", high = "#7daf44") + #ggplot2::scale_fill_brewer(palette = "RdYlGn") + ggplot2::scale_fill_manual(values = c("#b22222", "#ae9c20", "#7cfc00")) } #' @rdname ggplot_rsi diff --git a/R/guess_ab_col.R b/R/guess_ab_col.R index ef25dc00..1c3e3b29 100755 --- a/R/guess_ab_col.R +++ b/R/guess_ab_col.R @@ -21,7 +21,7 @@ #' Guess antibiotic column #' -#' This tries to find a column name in a data set based on information from the \code{\link{antibiotics}} data set. You can look for an antibiotic (trade) name or abbreviation and it will search the \code{data.frame} for any column containing a name or ATC code of that antibiotic. +#' This tries to find a column name in a data set based on information from the \code{\link{antibiotics}} data set. Also supports WHONET abbreviations. You can look for an antibiotic (trade) name or abbreviation and it will search the \code{data.frame} for any column containing a name or ATC code of that antibiotic. #' @param tbl a \code{data.frame} #' @param col a character to look for #' @param verbose a logical to indicate whether additional info should be printed @@ -40,6 +40,16 @@ #' guess_ab_col(df, "J01AA07", verbose = TRUE) #' # using column `tetr` for col "J01AA07" #' # [1] "tetr" +#' +#' # WHONET codes +#' df <- data.frame(AMP_ND10 = "R", +#' AMC_ED20 = "S") +#' guess_ab_col(df, "ampicillin") +#' # [1] "AMP_ND10" +#' guess_ab_col(df, "J01CR02") +#' # [1] "AMC_ED20" +#' guess_ab_col(df, as.atc("augmentin")) +#' # [1] "AMC_ED20" guess_ab_col <- function(tbl = NULL, col = NULL, verbose = FALSE) { if (is.null(tbl) & is.null(col)) { return(as.name("guess_ab_col")) @@ -54,6 +64,11 @@ guess_ab_col <- function(tbl = NULL, col = NULL, verbose = FALSE) { } tbl_names <- colnames(tbl) + tbl_names_stripped <- colnames(tbl) %>% + strsplit("_") %>% + lapply(function(x) {x[1]}) %>% + unlist() + if (col %in% tbl_names) { return(col) } @@ -70,6 +85,15 @@ guess_ab_col <- function(tbl = NULL, col = NULL, verbose = FALSE) { filter_all(any_vars(. %in% tbl_names)) } + # WHONET + if (nrow(ab_result) == 0) { + # use like when col >= 5 characters + ab_result <- antibiotics %>% + select(atc:trade_name) %>% + filter_all(any_vars(tolower(.) == tolower(col))) %>% + filter_all(any_vars(. %in% tbl_names_stripped)) + } + if (nrow(ab_result) > 1) { # looking more and more for reliable hit ab_result_1 <- ab_result %>% filter(tolower(atc) == tolower(col)) @@ -95,6 +119,9 @@ guess_ab_col <- function(tbl = NULL, col = NULL, verbose = FALSE) { return(NULL) } else { result <- tbl_names[tbl_names %in% ab_result] + if (length(result) == 0) { + result <- tbl_names[tbl_names_stripped %in% ab_result] + } if (length(result) == 0) { if (verbose == TRUE) { message('no result found for col "', col, '"') diff --git a/R/misc.R b/R/misc.R index 846ebefe..4b0a696f 100755 --- a/R/misc.R +++ b/R/misc.R @@ -130,11 +130,18 @@ search_type_in_df <- function(tbl, type) { # try to find columns based on type found <- NULL + colnames(tbl) <- trimws(colnames(tbl)) + # -- mo if (type == "mo") { if ("mo" %in% lapply(tbl, class)) { found <- colnames(tbl)[lapply(tbl, class) == "mo"][1] + } else if (any(colnames(tbl) %like% "^(mo|microorganism|organism|bacteria)")) { + found <- colnames(tbl)[colnames(tbl) %like% "^(mo|microorganism|organism|bacteria)"][1] + } else if (any(colnames(tbl) %like% "species")) { + found <- colnames(tbl)[colnames(tbl) %like% "species"][1] } + } # -- key antibiotics if (type == "keyantibiotics") { @@ -154,15 +161,23 @@ search_type_in_df <- function(tbl, type) { } # -- patient id if (type == "patient_id") { - if (any(colnames(tbl) %like% "^(patient|patid)")) { - found <- colnames(tbl)[colnames(tbl) %like% "^(patient|patid)"][1] + if (any(colnames(tbl) %like% "^(identification |patient|patid)")) { + found <- colnames(tbl)[colnames(tbl) %like% "^(identification |patient|patid)"][1] + } + } + # -- specimen + if (type == "specimen") { + if (any(colnames(tbl) %like% "(specimen type)")) { + found <- colnames(tbl)[colnames(tbl) %like% "(specimen type)"][1] + } else if (any(colnames(tbl) %like% "^(specimen)")) { + found <- colnames(tbl)[colnames(tbl) %like% "^(specimen)"][1] } } if (!is.null(found)) { msg <- paste0("NOTE: Using column `", bold(found), "` as input for `col_", type, "`.") - if (type == "keyantibiotics") { - msg <- paste(msg, "Use", bold("col_keyantibiotics = FALSE"), "to prevent this.") + if (type %in% c("keyantibiotics", "specimen")) { + msg <- paste(msg, "Use", bold(paste0("col_", type), " = FALSE"), "to prevent this.") } message(blue(msg)) } diff --git a/R/mo_source.R b/R/mo_source.R index 60128ced..9748bd29 100644 --- a/R/mo_source.R +++ b/R/mo_source.R @@ -107,11 +107,7 @@ set_mo_source <- function(path) { if (!"readxl" %in% utils::installed.packages()) { stop("Install the 'readxl' package first.") } - if (path %like% '[.]xlsx$') { - df <- readxl::read_xlsx(path) - } else { - df <- readxl::read_xls(path) - } + df <- readxl::read_excel(path) } else { # try comma first diff --git a/_pkgdown.yml b/_pkgdown.yml index 29f376b6..c896b0ff 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -38,6 +38,9 @@ navbar: - text: 'Predict antimicrobial resistance' icon: 'fa-dice' href: 'articles/Predict.html' + - text: 'Work with WHONET data' + icon: 'fa-globe-americas' + href: 'articles/WHONET.html' - text: 'Apply EUCAST rules' icon: 'fa-exchange-alt' href: 'articles/EUCAST.html' diff --git a/data/antibiotics.rda b/data/antibiotics.rda index f5015913..6782d134 100755 Binary files a/data/antibiotics.rda and b/data/antibiotics.rda differ diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 79527f39..8c3c0770 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -78,7 +78,7 @@
@@ -114,6 +114,13 @@ Predict antimicrobial resistance +AMR.Rmd
Note: values on this page will change with every website update since they are based on randomly created values and the page was written in RMarkdown. However, the methodology remains unchanged. This page was generated on 27 January 2019.
+Note: values on this page will change with every website update since they are based on randomly created values and the page was written in RMarkdown. However, the methodology remains unchanged. This page was generated on 28 January 2019.
As with many uses in R, we need some additional packages for AMR analysis. Our package works closely together with the tidyverse packages dplyr
and ggplot2
by Dr Hadley Wickham. The tidyverse tremendously improves the way we conduct data science - it allows for a very natural way of writing syntaxes and creating beautiful plots in R.
Our AMR
package depends on these packages and even extends their use and functions.
library(dplyr)
-library(ggplot2)
-library(AMR)
-
-# (if not yet installed, install with:)
-# install.packages(c("tidyverse", "AMR"))
library(dplyr)
+library(ggplot2)
+library(AMR)
+
+# (if not yet installed, install with:)
+# install.packages(c("tidyverse", "AMR"))
To start with patients, we need a unique list of patients.
- +The LETTERS
object is available in R - it’s a vector with 26 characters: A
to Z
. The patients
object we just created is now a vector of length 260, with values (patient IDs) varying from A1
to Z10
. Now we we also set the gender of our patients, by putting the ID and the gender in a table:
patients_table <- data.frame(patient_id = patients,
- gender = c(rep("M", 135),
- rep("F", 125)))
The first 135 patient IDs are now male, the other 125 are female.
Let’s pretend that our data consists of blood cultures isolates from 1 January 2010 until 1 January 2018.
- +This dates
object now contains all days in our date range.
For this tutorial, we will uses four different microorganisms: Escherichia coli, Staphylococcus aureus, Streptococcus pneumoniae, and Klebsiella pneumoniae:
-bacteria <- c("Escherichia coli", "Staphylococcus aureus",
- "Streptococcus pneumoniae", "Klebsiella pneumoniae")
For completeness, we can also add the hospital where the patients was admitted and we need to define valid antibmicrobial results for our randomisation:
- +Using the sample()
function, we can randomly select items from all objects we defined earlier. To let our fake data reflect reality a bit, we will also approximately define the probabilities of bacteria and the antibiotic results with the prob
parameter.
data <- data.frame(date = sample(dates, 5000, replace = TRUE),
- patient_id = sample(patients, 5000, replace = TRUE),
- hospital = sample(hospitals, 5000, replace = TRUE, prob = c(0.30, 0.35, 0.15, 0.20)),
- bacteria = sample(bacteria, 5000, replace = TRUE, prob = c(0.50, 0.25, 0.15, 0.10)),
- amox = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.60, 0.05, 0.35)),
- amcl = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.75, 0.10, 0.15)),
- cipr = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.80, 0.00, 0.20)),
- gent = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.92, 0.00, 0.08))
- )
Using the left_join()
function from the dplyr
package, we can ‘map’ the gender to the patient ID using the patients_table
object we created earlier:
data <- data %>% left_join(patients_table)
Using the sample()
function, we can randomly select items from all objects we defined earlier. To let our fake data reflect reality a bit, we will also approximately define the probabilities of bacteria and the antibiotic results with the prob
parameter.
data <- data.frame(date = sample(dates, 5000, replace = TRUE),
+ patient_id = sample(patients, 5000, replace = TRUE),
+ hospital = sample(hospitals, 5000, replace = TRUE, prob = c(0.30, 0.35, 0.15, 0.20)),
+ bacteria = sample(bacteria, 5000, replace = TRUE, prob = c(0.50, 0.25, 0.15, 0.10)),
+ amox = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.60, 0.05, 0.35)),
+ amcl = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.75, 0.10, 0.15)),
+ cipr = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.80, 0.00, 0.20)),
+ gent = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.92, 0.00, 0.08))
+ )
Using the left_join()
function from the dplyr
package, we can ‘map’ the gender to the patient ID using the patients_table
object we created earlier:
The resulting data set contains 5,000 blood culture isolates. With the head()
function we can preview the first 6 values of this data set:
head(data)