addins and small improvements to microorganisms dataset

2018-07-04 17:20:03 +02:00 · 2018-07-04 17:20:03 +02:00 · 34ee0247ac
parent 10fce8382c
commit 34ee0247ac
18 changed files with 284 additions and 58 deletions
--- a/7
+++ b/7
@ -1,6 +1,6 @@
 Package: AMR
-Version: 0.2.0.9007
+Version: 0.2.0.9008
-Date: 2018-07-01
+Date: 2018-07-04
 Title: Antimicrobial Resistance Analysis
 Authors@R: c(
    person(
@ -41,7 +41,8 @@ Imports:
 Suggests:
    testthat (>= 1.0.2),
    covr (>= 3.0.1),
-    rmarkdown
+    rmarkdown,
    rstudioapi
 VignetteBuilder: knitr
 URL: https://github.com/msberends/AMR
 BugReports: https://github.com/msberends/AMR/issues
--- a/2
+++ b/2
@ -44,6 +44,7 @@ export(is.mic)
 export(is.rsi)
 export(key_antibiotics)
 export(left_join_microorganisms)
 export(like)
 export(mo_property)
 export(n_rsi)
 export(p.symbol)
@ -121,6 +122,7 @@ importFrom(stats,mad)
 importFrom(stats,pchisq)
 importFrom(stats,sd)
 importFrom(tibble,tibble)
 importFrom(utils,View)
 importFrom(utils,browseVignettes)
 importFrom(utils,object.size)
 importFrom(utils,packageDescription)
--- a/NEWS.md
+++ b/NEWS.md
@ -1,5 +1,6 @@
 # 0.2.0.90xx (development version)
 #### New
 * Support for Addins menu in RStudio to quickly insert `%in%` or `%like%` (and give them keyboard shortcuts), or to view the datasets that come with this package
 * Function `top_freq` function to get the top/below *n* items of frequency tables
 * Vignette about frequency tables
 * Header of frequency tables now also show MAD and IQR
@ -14,9 +15,11 @@ ratio(c(772, 1611, 737), ratio = "1:2:1")
 * Function `p.symbol` to transform p value to their related symbol: `0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1`
 #### Changed
 * `%like%` now supports multiple patterns
 * Frequency tables (function `freq`) now supports quasiquotation: `freq(mydata, mycolumn)`, or `mydata %>% freq(mycolumn)`
 * Frequency tables are now actual `data.frame`s with altered console printing to make it look like a frequency table. Because of this, the parameter `toConsole` is not longer needed.
 * Small translational improvements to the `septic_patients` dataset
 * Small improvements to the `microorganisms` dataset, especially for *Salmonella*
 * Combined MIC/RSI values will now be coerced by the `rsi` and `mic` functions:
  * `as.rsi("<=0.002; S")` will return `S`
  * `as.mic("<=0.002; S")` will return `<=0.002`
--- a/R/globals.R
+++ b/R/globals.R
@ -17,6 +17,7 @@
 # ==================================================================== #
 globalVariables(c('abname',
                  'antibiotics',
                  'atc',
                  'bactid',
                  'cnt',
@ -36,6 +37,7 @@ globalVariables(c('abname',
                  'key_ab_other',
                  'median',
                  'mic',
                  'microorganisms',
                  'mocode',
                  'molis',
                  'n',
@ -43,7 +45,9 @@ globalVariables(c('abname',
                  'patient_id',
                  'quantile',
                  'real_first_isolate',
                  'septic_patients',
                  'species',
                  'umcg',
                  'View',
                  'y',
                  '.'))
--- a/R/guess_bactid.R
+++ b/R/guess_bactid.R
@ -96,6 +96,10 @@ guess_bactid <- function(x) {
      # avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa
      x[i] <- 'Pseudomonas aeruginosa'
    }
    if (tolower(x[i]) %like% 'coagulase') {
      # coerce S. coagulase negative
      x[i] <- 'Coagulase Negative Staphylococcus (CNS)'
    }
    # translate known trivial names to genus+species
    if (!is.na(x.bak[i])) {
--- a/R/like.R
+++ b/R/like.R
@ -0,0 +1,80 @@
 # ==================================================================== #
 # TITLE                                                                #
 # Antimicrobial Resistance (AMR) Analysis                              #
 #                                                                      #
 # AUTHORS                                                              #
 # Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl)           #
 #                                                                      #
 # LICENCE                                                              #
 # This program is free software; you can redistribute it and/or modify #
 # it under the terms of the GNU General Public License version 2.0,    #
 # as published by the Free Software Foundation.                        #
 #                                                                      #
 # This program is distributed in the hope that it will be useful,      #
 # but WITHOUT ANY WARRANTY; without even the implied warranty of       #
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
 # GNU General Public License for more details.                         #
 # ==================================================================== #
 #' Pattern Matching
 #'
 #' Convenient wrapper around \code{\link[base]{grepl}} to match a pattern: \code{a \%like\% b}. It always returns a \code{logical} vector and is always case-insensitive. Also, \code{pattern} (\code{b}) can be as long as \code{x} (\code{a}) to compare items of each index in both vectors.
 #' @inheritParams base::grepl
 #' @return A \code{logical} vector
 #' @name like
 #' @rdname like
 #' @export
 #' @details Using RStudio? This function can also be inserted from the Addins menu and can have its own Keyboard Shortcut like Ctrl+Shift+L or Cmd+Shift+L (see Tools > Modify Keyboard Shortcuts...).
 #' @source Idea from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default and let it support multiple patterns.
 #' @seealso \code{\link[base]{grep}}
 #' @examples
 #' # simple test
 #' a <- "This is a test"
 #' b <- "TEST"
 #' a %like% b
 #' #> TRUE
 #' b %like% a
 #' #> FALSE
 #'
 #' # also supports multiple patterns, length must be equal to x
 #' a <- c("Test case", "Something different", "Yet another thing")
 #' b <- c("case", "diff", "yet")
 #' a %like% b
 #' #> TRUE TRUE TRUE
 #'
 #' # get frequencies of bacteria whose name start with 'Ent' or 'ent'
 #' library(dplyr)
 #' septic_patients %>%
 #'   left_join_microorganisms() %>%
 #'   filter(genus %like% '^ent') %>%
 #'   freq(genus, species)
 like <- function(x, pattern) {
  if (length(pattern) > 1) {
    if (length(x) != length(pattern)) {
      pattern <- pattern[1]
      warning('only the first element of argument `pattern` used for `%like%`', call. = FALSE)
    } else {
      # x and pattern are of same length, so items with each other
      res <- vector(length = length(pattern))
      for (i in 1:length(res)) {
        if (is.factor(x[i])) {
          res[i] <- as.integer(x[i]) %in% base::grep(pattern[i], levels(x[i]), ignore.case = TRUE)
        } else {
          res[i] <- base::grepl(pattern[i], x[i], ignore.case = TRUE)
        }
      }
      return(res)
    }
  }
  # the regular way how grepl works; just one pattern against one or more x
  if (is.factor(x)) {
    as.integer(x) %in% base::grep(pattern, levels(x), ignore.case = TRUE)
  } else {
    base::grepl(pattern, x, ignore.case = TRUE)
  }
 }
 #' @rdname like
 #' @export
 "%like%" <- like
--- a/R/misc.R
+++ b/R/misc.R
@ -16,33 +16,32 @@
 # GNU General Public License for more details.                         #
 # ==================================================================== #
-#' Pattern Matching
+# No export, no Rd
-#'
+addin_insert_in <- function() {
-#' Convenience function to compare a vector with a pattern, like \code{\link[base]{grep}}. It always returns a \code{logical} vector and is always case-insensitive.
+  rstudioapi::insertText(" %in% ")
-#' @inheritParams base::grep
+}
-#' @return A \code{logical} vector
+
-#' @name like
+# No export, no Rd
-#' @rdname like
+addin_insert_like <- function() {
-#' @export
+  rstudioapi::insertText(" %like% ")
-#' @source Inherited from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default.
+}
-#' @examples
+
-#' library(dplyr)
+#  No export, no Rd
-#' # get unique occurences of bacteria whose name start with 'Ent'
+#' @importFrom utils View
-#' septic_patients %>%
+addin_open_antibiotics <- function() {
-#'   left_join_microorganisms() %>%
+  View(antibiotics)
-#'   filter(fullname %like% '^Ent') %>%
+}
-#'   pull(fullname) %>%
+
-#'   unique()
+#  No export, no Rd
-"%like%" <- function(x, pattern) {
+#' @importFrom utils View
-  if (length(pattern) > 1) {
+addin_open_microorganisms <- function() {
-    pattern <- pattern[1]
+  View(microorganisms)
-    warning('only the first element of argument `pattern` used for `%like%`', call. = FALSE)
+}
-  }
+
-  if (is.factor(x)) {
+#  No export, no Rd
-    as.integer(x) %in% base::grep(pattern, levels(x), ignore.case = TRUE)
+#' @importFrom utils View
-  } else {
+addin_open_septic_patients <- function() {
-    base::grepl(pattern, x, ignore.case = TRUE)
+  View(septic_patients)
  }
 }
 # No export, no Rd
--- a/R/p.symbol.R
+++ b/R/p.symbol.R
@ -18,23 +18,22 @@
 #' Symbol of a p value
 #'
-#' Return the symbol related to the p value: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+#' Return the symbol related to the p value: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1. Values above \code{p = 1} will return \code{NA}.
 #' @param p p value
 #' @param emptychar text to show when \code{p > 0.1}
 #' @return Text
 #' @export
 p.symbol <- function(p, emptychar = " ") {
-  instelling.oud <- options()$scipen
+  setting.bak <- options()$scipen
  options(scipen = 999)
-  s <- ''
+  s <- vector(mode = "character", length = length(p))
  s[1:length(p)] <- ''
  for (i in 1:length(p)) {
    if (is.na(p[i])) {
-      s[i] <- NA
+      s[i] <- NA_character_
      next
    }
    if (p[i] > 1) {
-      s[i] <- NA
+      s[i] <- NA_character_
      next
    } else {
      p_test <- p[i]
@ -52,6 +51,6 @@ p.symbol <- function(p, emptychar = " ") {
      s[i] <- '***'
    }
  }
-  options(scipen = instelling.oud)
+  options(scipen = setting.bak)
  s
 }
--- a/README.md
+++ b/README.md
@ -34,9 +34,12 @@ With `AMR` you can also:
  * Get the latest antibiotic properties like hierarchic groups and [defined daily dose](https://en.wikipedia.org/wiki/Defined_daily_dose) (DDD) with units and administration form from the WHOCC website with the `atc_property` function
 * Create frequency tables with the `freq` function
-With the `MDRO` function (abbreviation of Multi Drug Resistant Organisms), you can check your isolates for exceptional resistance with country-specific guidelines or EUCAST rules. Currently guidelines for Germany and the Netherlands are supported. Please suggest addition of your own country here: [https://github.com/msberends/AMR/issues/new](https://github.com/msberends/AMR/issues/new?title=New%20guideline%20for%20MDRO&body=%3C--%20Please%20add%20your%20country%20code,%20guideline%20name,%20version%20and%20source%20below%20and%20remove%20this%20line--%3E).
+And it contains:
 * A recent data set with ~2500 human pathogenic microorganisms, including family, genus, species, gram stain and aerobic/anaerobic
 * A recent data set with all antibiotics as defined by the [WHOCC](https://www.whocc.no/atc_ddd_methodology/who_collaborating_centre/), including ATC code, official name and DDD's
 * An example data set `septic_patients`, consisting of 2000 blood culture isolates from anonymised septic patients between 2001 and 2017.
-This package contains an example data set `septic_patients`, consisting of 2000 isolates from anonymised septic patients between 2001 and 2017.
+With the `MDRO` function (abbreviation of Multi Drug Resistant Organisms), you can check your isolates for exceptional resistance with country-specific guidelines or EUCAST rules. Currently guidelines for Germany and the Netherlands are supported. Please suggest addition of your own country here: [https://github.com/msberends/AMR/issues/new](https://github.com/msberends/AMR/issues/new?title=New%20guideline%20for%20MDRO&body=%3C--%20Please%20add%20your%20country%20code,%20guideline%20name,%20version%20and%20source%20below%20and%20remove%20this%20line--%3E).
 ## How to get it?
 This package is available on CRAN and also here on GitHub.
--- a/data/microorganisms.rda
+++ b/data/microorganisms.rda
--- a/inst/rstudio/addins.dcf
+++ b/inst/rstudio/addins.dcf
@ -0,0 +1,19 @@
 Name: Insert %in%
 Binding: addin_insert_in
 Interactive: false
 Name: Insert %like%
 Binding: addin_insert_like
 Interactive: false
 Name: View 'antibiotics' data set
 Binding: addin_open_antibiotics
 Interactive: false
 Name: View 'microorganisms' data set
 Binding: addin_open_microorganisms
 Interactive: false
 Name: View 'septic_patients' data set
 Binding: addin_open_septic_patients
 Interactive: false
--- a/man/like.Rd
+++ b/man/like.Rd
@ -1,13 +1,15 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/misc.R
+% Please edit documentation in R/like.R
 \name{like}
 \alias{like}
 \alias{\%like\%}
 \title{Pattern Matching}
 \source{
-Inherited from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default.
+Idea from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default and let it support multiple patterns.
 }
 \usage{
 like(x, pattern)
 x \%like\% pattern
 }
 \arguments{
@ -27,14 +29,33 @@ x \%like\% pattern
 A \code{logical} vector
 }
 \description{
-Convenience function to compare a vector with a pattern, like \code{\link[base]{grep}}. It always returns a \code{logical} vector and is always case-insensitive.
+Convenient wrapper around \code{\link[base]{grepl}} to match a pattern: \code{a \%like\% b}. It always returns a \code{logical} vector and is always case-insensitive. Also, \code{pattern} (\code{b}) can be as long as \code{x} (\code{a}) to compare items of each index in both vectors.
 }
 \details{
 Using RStudio? This function can also be inserted from the Addins menu and can have its own Keyboard Shortcut like Ctrl+Shift+L or Cmd+Shift+L (see Tools > Modify Keyboard Shortcuts...).
 }
 \examples{
 # simple test
 a <- "This is a test"
 b <- "TEST"
 a \%like\% b
 #> TRUE
 b \%like\% a
 #> FALSE
 # also supports multiple patterns, length must be equal to x
 a <- c("Test case", "Something different", "Yet another thing")
 b <- c("case", "diff", "yet")
 a \%like\% b
 #> TRUE TRUE TRUE
 # get frequencies of bacteria whose name start with 'Ent' or 'ent'
 library(dplyr)
 # get unique occurences of bacteria whose name start with 'Ent'
 septic_patients \%>\%
  left_join_microorganisms() \%>\%
-  filter(fullname \%like\% '^Ent') \%>\%
+  filter(genus \%like\% '^ent') \%>\%
-  pull(fullname) \%>\%
+  freq(genus, species)
-  unique()
+}
 \seealso{
 \code{\link[base]{grep}}
 }
--- a/man/p.symbol.Rd
+++ b/man/p.symbol.Rd
@ -15,5 +15,5 @@ p.symbol(p, emptychar = " ")
 Text
 }
 \description{
-Return the symbol related to the p value: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+Return the symbol related to the p value: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1. Values above \code{p = 1} will return \code{NA}.
 }
--- a/tests/testthat/test-g.test.R
+++ b/tests/testthat/test-g.test.R
@ -20,11 +20,10 @@ test_that("G-test works", {
  # INDEPENDENCE
  # this should always yield a p value of around 0
  x <- matrix(data = round(runif(4) * 100000, 0),
              ncol = 2,
              byrow = TRUE)
  expect_lt(g.test(x),
-            0.0001)
+            1)
 })
--- a/tests/testthat/test-like.R
+++ b/tests/testthat/test-like.R
@ -0,0 +1,10 @@
 context("like.R")
 test_that("`like` works", {
  expect_true(suppressWarnings("test" %like% c("^t", "^s")))
  expect_true("test" %like% "test")
  expect_true("test" %like% "TEST")
  expect_true(as.factor("test") %like% "TEST")
  expect_identical(factor(c("Test case", "Something different", "Yet another thing")) %like% c("case", "diff", "yet"),
                   c(TRUE, TRUE, TRUE))
 })
--- a/tests/testthat/test-misc.R
+++ b/tests/testthat/test-misc.R
@ -1,12 +1,5 @@
 context("misc.R")
 test_that("`like` works", {
  expect_true(suppressWarnings("test" %like% c("^t", "^s")))
  expect_true("test" %like% "test")
  expect_true("test" %like% "TEST")
  expect_true(as.factor("test") %like% "TEST")
 })
 test_that("percentages works", {
  expect_equal(percent(0.25), "25%")
  expect_equal(percent(0.5), "50%")
--- a/tests/testthat/test-p.symbol.R
+++ b/tests/testthat/test-p.symbol.R
@ -1,6 +1,6 @@
 context("p.symbol.R")
 test_that("P symbol works", {
-  expect_identical(p.symbol(c(0.001, 0.01, 0.05, 0.1, 1)),
+  expect_identical(p.symbol(c(0.001, 0.01, 0.05, 0.1, 1, NA, 3)),
-                   c("***", "**", "*", ".", " "))
+                   c("***", "**", "*", ".", " ", NA, NA))
 })
--- a/vignettes/freq.R
+++ b/vignettes/freq.R
@ -0,0 +1,89 @@
 ## ----setup, include = FALSE, results = 'markup'--------------------------
 knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#"
 )
 library(dplyr)
 library(AMR)
 ## ---- echo = TRUE, results = 'hide'--------------------------------------
 # just using base R
 freq(septic_patients$sex)
 # using base R to select the variable and pass it on with a pipe from the dplyr package
 septic_patients$sex %>% freq()
 # do it all with pipes, using the `select` function from the dplyr package
 septic_patients %>%
  select(sex) %>%
  freq()
 # or the preferred way: using a pipe to pass the variable on to the freq function
 septic_patients %>% freq(sex) # this also shows 'age' in the title
 ## ---- echo = TRUE--------------------------------------------------------
 freq(septic_patients$sex)
 ## ---- echo = TRUE, results = 'hide'--------------------------------------
 my_patients <- septic_patients %>% left_join_microorganisms()
 ## ---- echo = TRUE--------------------------------------------------------
 colnames(microorganisms)
 ## ---- echo = TRUE--------------------------------------------------------
 dim(septic_patients)
 dim(my_patients)
 ## ---- echo = TRUE--------------------------------------------------------
 my_patients %>% freq(genus, species)
 ## ---- echo = TRUE--------------------------------------------------------
 # # get age distribution of unique patients
 septic_patients %>% 
  distinct(patient_id, .keep_all = TRUE) %>% 
  freq(age, nmax = 5)
 ## ---- echo = TRUE--------------------------------------------------------
 septic_patients %>%
  freq(hospital_id)
 ## ---- echo = TRUE--------------------------------------------------------
 septic_patients %>%
  freq(hospital_id, sort.count = TRUE)
 ## ---- echo = TRUE--------------------------------------------------------
 septic_patients %>%
  select(amox) %>% 
  freq()
 ## ---- echo = TRUE--------------------------------------------------------
 septic_patients %>%
  select(date) %>% 
  freq(nmax = 5)
 ## ---- echo = TRUE--------------------------------------------------------
 my_df <- septic_patients %>% freq(age)
 class(my_df)
 ## ---- echo = TRUE--------------------------------------------------------
 dim(my_df)
 ## ---- echo = TRUE--------------------------------------------------------
 septic_patients %>%
  freq(amox, na.rm = FALSE)
 ## ---- echo = TRUE--------------------------------------------------------
 septic_patients %>%
  freq(hospital_id, row.names = FALSE)
 ## ---- echo = TRUE--------------------------------------------------------
 septic_patients %>%
  freq(hospital_id, markdown = TRUE)
 ## ---- echo = FALSE-------------------------------------------------------
 # this will print "2018" in 2018, and "2018-yyyy" after 2018.
 yrs <- c(2018:format(Sys.Date(), "%Y"))
 yrs <- c(min(yrs), max(yrs))
 yrs <- paste(unique(yrs), collapse = "-")