diff --git a/DESCRIPTION b/DESCRIPTION index 419e6fc0..54571cd2 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 0.2.0.9007 -Date: 2018-07-01 +Version: 0.2.0.9008 +Date: 2018-07-04 Title: Antimicrobial Resistance Analysis Authors@R: c( person( @@ -41,7 +41,8 @@ Imports: Suggests: testthat (>= 1.0.2), covr (>= 3.0.1), - rmarkdown + rmarkdown, + rstudioapi VignetteBuilder: knitr URL: https://github.com/msberends/AMR BugReports: https://github.com/msberends/AMR/issues diff --git a/NAMESPACE b/NAMESPACE index 0cf12a81..fb692dfd 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -44,6 +44,7 @@ export(is.mic) export(is.rsi) export(key_antibiotics) export(left_join_microorganisms) +export(like) export(mo_property) export(n_rsi) export(p.symbol) @@ -121,6 +122,7 @@ importFrom(stats,mad) importFrom(stats,pchisq) importFrom(stats,sd) importFrom(tibble,tibble) +importFrom(utils,View) importFrom(utils,browseVignettes) importFrom(utils,object.size) importFrom(utils,packageDescription) diff --git a/NEWS.md b/NEWS.md index f4c59579..bee5351c 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # 0.2.0.90xx (development version) #### New +* Support for Addins menu in RStudio to quickly insert `%in%` or `%like%` (and give them keyboard shortcuts), or to view the datasets that come with this package * Function `top_freq` function to get the top/below *n* items of frequency tables * Vignette about frequency tables * Header of frequency tables now also show MAD and IQR @@ -14,9 +15,11 @@ ratio(c(772, 1611, 737), ratio = "1:2:1") * Function `p.symbol` to transform p value to their related symbol: `0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1` #### Changed +* `%like%` now supports multiple patterns * Frequency tables (function `freq`) now supports quasiquotation: `freq(mydata, mycolumn)`, or `mydata %>% freq(mycolumn)` * Frequency tables are now actual `data.frame`s with altered console printing to make it look like a frequency table. Because of this, the parameter `toConsole` is not longer needed. * Small translational improvements to the `septic_patients` dataset +* Small improvements to the `microorganisms` dataset, especially for *Salmonella* * Combined MIC/RSI values will now be coerced by the `rsi` and `mic` functions: * `as.rsi("<=0.002; S")` will return `S` * `as.mic("<=0.002; S")` will return `<=0.002` diff --git a/R/globals.R b/R/globals.R index 28463b21..9fdf3265 100755 --- a/R/globals.R +++ b/R/globals.R @@ -17,6 +17,7 @@ # ==================================================================== # globalVariables(c('abname', + 'antibiotics', 'atc', 'bactid', 'cnt', @@ -36,6 +37,7 @@ globalVariables(c('abname', 'key_ab_other', 'median', 'mic', + 'microorganisms', 'mocode', 'molis', 'n', @@ -43,7 +45,9 @@ globalVariables(c('abname', 'patient_id', 'quantile', 'real_first_isolate', + 'septic_patients', 'species', 'umcg', + 'View', 'y', '.')) diff --git a/R/guess_bactid.R b/R/guess_bactid.R index 7d52960a..785ca0ae 100644 --- a/R/guess_bactid.R +++ b/R/guess_bactid.R @@ -96,6 +96,10 @@ guess_bactid <- function(x) { # avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa x[i] <- 'Pseudomonas aeruginosa' } + if (tolower(x[i]) %like% 'coagulase') { + # coerce S. coagulase negative + x[i] <- 'Coagulase Negative Staphylococcus (CNS)' + } # translate known trivial names to genus+species if (!is.na(x.bak[i])) { diff --git a/R/like.R b/R/like.R new file mode 100644 index 00000000..f3303912 --- /dev/null +++ b/R/like.R @@ -0,0 +1,80 @@ +# ==================================================================== # +# TITLE # +# Antimicrobial Resistance (AMR) Analysis # +# # +# AUTHORS # +# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) # +# # +# LICENCE # +# This program is free software; you can redistribute it and/or modify # +# it under the terms of the GNU General Public License version 2.0, # +# as published by the Free Software Foundation. # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# ==================================================================== # + +#' Pattern Matching +#' +#' Convenient wrapper around \code{\link[base]{grepl}} to match a pattern: \code{a \%like\% b}. It always returns a \code{logical} vector and is always case-insensitive. Also, \code{pattern} (\code{b}) can be as long as \code{x} (\code{a}) to compare items of each index in both vectors. +#' @inheritParams base::grepl +#' @return A \code{logical} vector +#' @name like +#' @rdname like +#' @export +#' @details Using RStudio? This function can also be inserted from the Addins menu and can have its own Keyboard Shortcut like Ctrl+Shift+L or Cmd+Shift+L (see Tools > Modify Keyboard Shortcuts...). +#' @source Idea from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default and let it support multiple patterns. +#' @seealso \code{\link[base]{grep}} +#' @examples +#' # simple test +#' a <- "This is a test" +#' b <- "TEST" +#' a %like% b +#' #> TRUE +#' b %like% a +#' #> FALSE +#' +#' # also supports multiple patterns, length must be equal to x +#' a <- c("Test case", "Something different", "Yet another thing") +#' b <- c("case", "diff", "yet") +#' a %like% b +#' #> TRUE TRUE TRUE +#' +#' # get frequencies of bacteria whose name start with 'Ent' or 'ent' +#' library(dplyr) +#' septic_patients %>% +#' left_join_microorganisms() %>% +#' filter(genus %like% '^ent') %>% +#' freq(genus, species) +like <- function(x, pattern) { + if (length(pattern) > 1) { + if (length(x) != length(pattern)) { + pattern <- pattern[1] + warning('only the first element of argument `pattern` used for `%like%`', call. = FALSE) + } else { + # x and pattern are of same length, so items with each other + res <- vector(length = length(pattern)) + for (i in 1:length(res)) { + if (is.factor(x[i])) { + res[i] <- as.integer(x[i]) %in% base::grep(pattern[i], levels(x[i]), ignore.case = TRUE) + } else { + res[i] <- base::grepl(pattern[i], x[i], ignore.case = TRUE) + } + } + return(res) + } + } + + # the regular way how grepl works; just one pattern against one or more x + if (is.factor(x)) { + as.integer(x) %in% base::grep(pattern, levels(x), ignore.case = TRUE) + } else { + base::grepl(pattern, x, ignore.case = TRUE) + } +} + +#' @rdname like +#' @export +"%like%" <- like diff --git a/R/misc.R b/R/misc.R index e8354650..ec8db1eb 100755 --- a/R/misc.R +++ b/R/misc.R @@ -16,33 +16,32 @@ # GNU General Public License for more details. # # ==================================================================== # -#' Pattern Matching -#' -#' Convenience function to compare a vector with a pattern, like \code{\link[base]{grep}}. It always returns a \code{logical} vector and is always case-insensitive. -#' @inheritParams base::grep -#' @return A \code{logical} vector -#' @name like -#' @rdname like -#' @export -#' @source Inherited from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default. -#' @examples -#' library(dplyr) -#' # get unique occurences of bacteria whose name start with 'Ent' -#' septic_patients %>% -#' left_join_microorganisms() %>% -#' filter(fullname %like% '^Ent') %>% -#' pull(fullname) %>% -#' unique() -"%like%" <- function(x, pattern) { - if (length(pattern) > 1) { - pattern <- pattern[1] - warning('only the first element of argument `pattern` used for `%like%`', call. = FALSE) - } - if (is.factor(x)) { - as.integer(x) %in% base::grep(pattern, levels(x), ignore.case = TRUE) - } else { - base::grepl(pattern, x, ignore.case = TRUE) - } +# No export, no Rd +addin_insert_in <- function() { + rstudioapi::insertText(" %in% ") +} + +# No export, no Rd +addin_insert_like <- function() { + rstudioapi::insertText(" %like% ") +} + +# No export, no Rd +#' @importFrom utils View +addin_open_antibiotics <- function() { + View(antibiotics) +} + +# No export, no Rd +#' @importFrom utils View +addin_open_microorganisms <- function() { + View(microorganisms) +} + +# No export, no Rd +#' @importFrom utils View +addin_open_septic_patients <- function() { + View(septic_patients) } # No export, no Rd diff --git a/R/p.symbol.R b/R/p.symbol.R index c11e2727..22e0550c 100644 --- a/R/p.symbol.R +++ b/R/p.symbol.R @@ -18,23 +18,22 @@ #' Symbol of a p value #' -#' Return the symbol related to the p value: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 +#' Return the symbol related to the p value: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1. Values above \code{p = 1} will return \code{NA}. #' @param p p value #' @param emptychar text to show when \code{p > 0.1} #' @return Text #' @export p.symbol <- function(p, emptychar = " ") { - instelling.oud <- options()$scipen + setting.bak <- options()$scipen options(scipen = 999) - s <- '' - s[1:length(p)] <- '' + s <- vector(mode = "character", length = length(p)) for (i in 1:length(p)) { if (is.na(p[i])) { - s[i] <- NA + s[i] <- NA_character_ next } if (p[i] > 1) { - s[i] <- NA + s[i] <- NA_character_ next } else { p_test <- p[i] @@ -52,6 +51,6 @@ p.symbol <- function(p, emptychar = " ") { s[i] <- '***' } } - options(scipen = instelling.oud) + options(scipen = setting.bak) s } diff --git a/README.md b/README.md index 042d2c79..9d086718 100755 --- a/README.md +++ b/README.md @@ -34,9 +34,12 @@ With `AMR` you can also: * Get the latest antibiotic properties like hierarchic groups and [defined daily dose](https://en.wikipedia.org/wiki/Defined_daily_dose) (DDD) with units and administration form from the WHOCC website with the `atc_property` function * Create frequency tables with the `freq` function -With the `MDRO` function (abbreviation of Multi Drug Resistant Organisms), you can check your isolates for exceptional resistance with country-specific guidelines or EUCAST rules. Currently guidelines for Germany and the Netherlands are supported. Please suggest addition of your own country here: [https://github.com/msberends/AMR/issues/new](https://github.com/msberends/AMR/issues/new?title=New%20guideline%20for%20MDRO&body=%3C--%20Please%20add%20your%20country%20code,%20guideline%20name,%20version%20and%20source%20below%20and%20remove%20this%20line--%3E). +And it contains: +* A recent data set with ~2500 human pathogenic microorganisms, including family, genus, species, gram stain and aerobic/anaerobic +* A recent data set with all antibiotics as defined by the [WHOCC](https://www.whocc.no/atc_ddd_methodology/who_collaborating_centre/), including ATC code, official name and DDD's +* An example data set `septic_patients`, consisting of 2000 blood culture isolates from anonymised septic patients between 2001 and 2017. -This package contains an example data set `septic_patients`, consisting of 2000 isolates from anonymised septic patients between 2001 and 2017. +With the `MDRO` function (abbreviation of Multi Drug Resistant Organisms), you can check your isolates for exceptional resistance with country-specific guidelines or EUCAST rules. Currently guidelines for Germany and the Netherlands are supported. Please suggest addition of your own country here: [https://github.com/msberends/AMR/issues/new](https://github.com/msberends/AMR/issues/new?title=New%20guideline%20for%20MDRO&body=%3C--%20Please%20add%20your%20country%20code,%20guideline%20name,%20version%20and%20source%20below%20and%20remove%20this%20line--%3E). ## How to get it? This package is available on CRAN and also here on GitHub. diff --git a/data/microorganisms.rda b/data/microorganisms.rda index 76cf6470..bf7effbc 100755 Binary files a/data/microorganisms.rda and b/data/microorganisms.rda differ diff --git a/inst/rstudio/addins.dcf b/inst/rstudio/addins.dcf new file mode 100644 index 00000000..594d10b2 --- /dev/null +++ b/inst/rstudio/addins.dcf @@ -0,0 +1,19 @@ +Name: Insert %in% +Binding: addin_insert_in +Interactive: false + +Name: Insert %like% +Binding: addin_insert_like +Interactive: false + +Name: View 'antibiotics' data set +Binding: addin_open_antibiotics +Interactive: false + +Name: View 'microorganisms' data set +Binding: addin_open_microorganisms +Interactive: false + +Name: View 'septic_patients' data set +Binding: addin_open_septic_patients +Interactive: false diff --git a/man/like.Rd b/man/like.Rd index 8fdd388f..dbcd9598 100755 --- a/man/like.Rd +++ b/man/like.Rd @@ -1,13 +1,15 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/misc.R +% Please edit documentation in R/like.R \name{like} \alias{like} \alias{\%like\%} \title{Pattern Matching} \source{ -Inherited from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default. +Idea from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default and let it support multiple patterns. } \usage{ +like(x, pattern) + x \%like\% pattern } \arguments{ @@ -27,14 +29,33 @@ x \%like\% pattern A \code{logical} vector } \description{ -Convenience function to compare a vector with a pattern, like \code{\link[base]{grep}}. It always returns a \code{logical} vector and is always case-insensitive. +Convenient wrapper around \code{\link[base]{grepl}} to match a pattern: \code{a \%like\% b}. It always returns a \code{logical} vector and is always case-insensitive. Also, \code{pattern} (\code{b}) can be as long as \code{x} (\code{a}) to compare items of each index in both vectors. +} +\details{ +Using RStudio? This function can also be inserted from the Addins menu and can have its own Keyboard Shortcut like Ctrl+Shift+L or Cmd+Shift+L (see Tools > Modify Keyboard Shortcuts...). } \examples{ +# simple test +a <- "This is a test" +b <- "TEST" +a \%like\% b +#> TRUE +b \%like\% a +#> FALSE + +# also supports multiple patterns, length must be equal to x +a <- c("Test case", "Something different", "Yet another thing") +b <- c("case", "diff", "yet") +a \%like\% b +#> TRUE TRUE TRUE + +# get frequencies of bacteria whose name start with 'Ent' or 'ent' library(dplyr) -# get unique occurences of bacteria whose name start with 'Ent' septic_patients \%>\% left_join_microorganisms() \%>\% - filter(fullname \%like\% '^Ent') \%>\% - pull(fullname) \%>\% - unique() + filter(genus \%like\% '^ent') \%>\% + freq(genus, species) +} +\seealso{ +\code{\link[base]{grep}} } diff --git a/man/p.symbol.Rd b/man/p.symbol.Rd index d1f91f25..d36c5ca1 100644 --- a/man/p.symbol.Rd +++ b/man/p.symbol.Rd @@ -15,5 +15,5 @@ p.symbol(p, emptychar = " ") Text } \description{ -Return the symbol related to the p value: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 +Return the symbol related to the p value: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1. Values above \code{p = 1} will return \code{NA}. } diff --git a/tests/testthat/test-g.test.R b/tests/testthat/test-g.test.R index d04a3553..91899e6d 100644 --- a/tests/testthat/test-g.test.R +++ b/tests/testthat/test-g.test.R @@ -20,11 +20,10 @@ test_that("G-test works", { # INDEPENDENCE - # this should always yield a p value of around 0 x <- matrix(data = round(runif(4) * 100000, 0), ncol = 2, byrow = TRUE) expect_lt(g.test(x), - 0.0001) + 1) }) diff --git a/tests/testthat/test-like.R b/tests/testthat/test-like.R new file mode 100644 index 00000000..fdceabfb --- /dev/null +++ b/tests/testthat/test-like.R @@ -0,0 +1,10 @@ +context("like.R") + +test_that("`like` works", { + expect_true(suppressWarnings("test" %like% c("^t", "^s"))) + expect_true("test" %like% "test") + expect_true("test" %like% "TEST") + expect_true(as.factor("test") %like% "TEST") + expect_identical(factor(c("Test case", "Something different", "Yet another thing")) %like% c("case", "diff", "yet"), + c(TRUE, TRUE, TRUE)) +}) diff --git a/tests/testthat/test-misc.R b/tests/testthat/test-misc.R index b94e2899..164a9c9f 100755 --- a/tests/testthat/test-misc.R +++ b/tests/testthat/test-misc.R @@ -1,12 +1,5 @@ context("misc.R") -test_that("`like` works", { - expect_true(suppressWarnings("test" %like% c("^t", "^s"))) - expect_true("test" %like% "test") - expect_true("test" %like% "TEST") - expect_true(as.factor("test") %like% "TEST") -}) - test_that("percentages works", { expect_equal(percent(0.25), "25%") expect_equal(percent(0.5), "50%") diff --git a/tests/testthat/test-p.symbol.R b/tests/testthat/test-p.symbol.R index 8b40f351..fce51b3e 100644 --- a/tests/testthat/test-p.symbol.R +++ b/tests/testthat/test-p.symbol.R @@ -1,6 +1,6 @@ context("p.symbol.R") test_that("P symbol works", { - expect_identical(p.symbol(c(0.001, 0.01, 0.05, 0.1, 1)), - c("***", "**", "*", ".", " ")) + expect_identical(p.symbol(c(0.001, 0.01, 0.05, 0.1, 1, NA, 3)), + c("***", "**", "*", ".", " ", NA, NA)) }) diff --git a/vignettes/freq.R b/vignettes/freq.R new file mode 100644 index 00000000..bb94ee97 --- /dev/null +++ b/vignettes/freq.R @@ -0,0 +1,89 @@ +## ----setup, include = FALSE, results = 'markup'-------------------------- +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#" +) +library(dplyr) +library(AMR) + +## ---- echo = TRUE, results = 'hide'-------------------------------------- +# just using base R +freq(septic_patients$sex) + +# using base R to select the variable and pass it on with a pipe from the dplyr package +septic_patients$sex %>% freq() + +# do it all with pipes, using the `select` function from the dplyr package +septic_patients %>% + select(sex) %>% + freq() + +# or the preferred way: using a pipe to pass the variable on to the freq function +septic_patients %>% freq(sex) # this also shows 'age' in the title + + +## ---- echo = TRUE-------------------------------------------------------- +freq(septic_patients$sex) + +## ---- echo = TRUE, results = 'hide'-------------------------------------- +my_patients <- septic_patients %>% left_join_microorganisms() + +## ---- echo = TRUE-------------------------------------------------------- +colnames(microorganisms) + +## ---- echo = TRUE-------------------------------------------------------- +dim(septic_patients) +dim(my_patients) + +## ---- echo = TRUE-------------------------------------------------------- +my_patients %>% freq(genus, species) + +## ---- echo = TRUE-------------------------------------------------------- +# # get age distribution of unique patients +septic_patients %>% + distinct(patient_id, .keep_all = TRUE) %>% + freq(age, nmax = 5) + +## ---- echo = TRUE-------------------------------------------------------- +septic_patients %>% + freq(hospital_id) + +## ---- echo = TRUE-------------------------------------------------------- +septic_patients %>% + freq(hospital_id, sort.count = TRUE) + +## ---- echo = TRUE-------------------------------------------------------- +septic_patients %>% + select(amox) %>% + freq() + +## ---- echo = TRUE-------------------------------------------------------- +septic_patients %>% + select(date) %>% + freq(nmax = 5) + +## ---- echo = TRUE-------------------------------------------------------- +my_df <- septic_patients %>% freq(age) +class(my_df) + +## ---- echo = TRUE-------------------------------------------------------- +dim(my_df) + +## ---- echo = TRUE-------------------------------------------------------- +septic_patients %>% + freq(amox, na.rm = FALSE) + +## ---- echo = TRUE-------------------------------------------------------- +septic_patients %>% + freq(hospital_id, row.names = FALSE) + +## ---- echo = TRUE-------------------------------------------------------- +septic_patients %>% + freq(hospital_id, markdown = TRUE) + +## ---- echo = FALSE------------------------------------------------------- +# this will print "2018" in 2018, and "2018-yyyy" after 2018. +yrs <- c(2018:format(Sys.Date(), "%Y")) +yrs <- c(min(yrs), max(yrs)) +yrs <- paste(unique(yrs), collapse = "-") +