addins and small improvements to microorganisms dataset

2025-07-23 06:23:15 +02:00 · 2018-07-04 17:20:03 +02:00
parent 10fce8382c
commit 34ee0247ac
18 changed files with 284 additions and 58 deletions
--- a/7
+++ b/7
@ -1,6 +1,6 @@
 Package: AMR
-Version: 0.2.0.9007
-Date: 2018-07-01
+Version: 0.2.0.9008
+Date: 2018-07-04
 Title: Antimicrobial Resistance Analysis
 Authors@R: c(
    person(
@ -41,7 +41,8 @@ Imports:
 Suggests:
    testthat (>= 1.0.2),
    covr (>= 3.0.1),
-    rmarkdown
+    rmarkdown,
+    rstudioapi
 VignetteBuilder: knitr
 URL: https://github.com/msberends/AMR
 BugReports: https://github.com/msberends/AMR/issues
--- a/2
+++ b/2
@ -44,6 +44,7 @@ export(is.mic)
 export(is.rsi)
 export(key_antibiotics)
 export(left_join_microorganisms)
+export(like)
 export(mo_property)
 export(n_rsi)
 export(p.symbol)
@ -121,6 +122,7 @@ importFrom(stats,mad)
 importFrom(stats,pchisq)
 importFrom(stats,sd)
 importFrom(tibble,tibble)
+importFrom(utils,View)
 importFrom(utils,browseVignettes)
 importFrom(utils,object.size)
 importFrom(utils,packageDescription)
--- a/NEWS.md
+++ b/NEWS.md
@ -1,5 +1,6 @@
 # 0.2.0.90xx (development version)
 #### New
+* Support for Addins menu in RStudio to quickly insert `%in%` or `%like%` (and give them keyboard shortcuts), or to view the datasets that come with this package
 * Function `top_freq` function to get the top/below *n* items of frequency tables
 * Vignette about frequency tables
 * Header of frequency tables now also show MAD and IQR
@ -14,9 +15,11 @@ ratio(c(772, 1611, 737), ratio = "1:2:1")
 * Function `p.symbol` to transform p value to their related symbol: `0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1`

 #### Changed
+* `%like%` now supports multiple patterns
 * Frequency tables (function `freq`) now supports quasiquotation: `freq(mydata, mycolumn)`, or `mydata %>% freq(mycolumn)`
 * Frequency tables are now actual `data.frame`s with altered console printing to make it look like a frequency table. Because of this, the parameter `toConsole` is not longer needed.
 * Small translational improvements to the `septic_patients` dataset
+* Small improvements to the `microorganisms` dataset, especially for *Salmonella*
 * Combined MIC/RSI values will now be coerced by the `rsi` and `mic` functions:
  * `as.rsi("<=0.002; S")` will return `S`
  * `as.mic("<=0.002; S")` will return `<=0.002`
--- a/R/globals.R
+++ b/R/globals.R
@ -17,6 +17,7 @@
 # ==================================================================== #

 globalVariables(c('abname',
+                  'antibiotics',
                  'atc',
                  'bactid',
                  'cnt',
@ -36,6 +37,7 @@ globalVariables(c('abname',
                  'key_ab_other',
                  'median',
                  'mic',
+                  'microorganisms',
                  'mocode',
                  'molis',
                  'n',
@ -43,7 +45,9 @@ globalVariables(c('abname',
                  'patient_id',
                  'quantile',
                  'real_first_isolate',
+                  'septic_patients',
                  'species',
                  'umcg',
+                  'View',
                  'y',
                  '.'))
--- a/R/guess_bactid.R
+++ b/R/guess_bactid.R
@ -96,6 +96,10 @@ guess_bactid <- function(x) {
      # avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa
      x[i] <- 'Pseudomonas aeruginosa'
    }
+    if (tolower(x[i]) %like% 'coagulase') {
+      # coerce S. coagulase negative
+      x[i] <- 'Coagulase Negative Staphylococcus (CNS)'
+    }

    # translate known trivial names to genus+species
    if (!is.na(x.bak[i])) {
--- a/R/like.R
+++ b/R/like.R
@ -0,0 +1,80 @@
+# ==================================================================== #
+# TITLE                                                                #
+# Antimicrobial Resistance (AMR) Analysis                              #
+#                                                                      #
+# AUTHORS                                                              #
+# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl)           #
+#                                                                      #
+# LICENCE                                                              #
+# This program is free software; you can redistribute it and/or modify #
+# it under the terms of the GNU General Public License version 2.0,    #
+# as published by the Free Software Foundation.                        #
+#                                                                      #
+# This program is distributed in the hope that it will be useful,      #
+# but WITHOUT ANY WARRANTY; without even the implied warranty of       #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
+# GNU General Public License for more details.                         #
+# ==================================================================== #
+
+#' Pattern Matching
+#'
+#' Convenient wrapper around \code{\link[base]{grepl}} to match a pattern: \code{a \%like\% b}. It always returns a \code{logical} vector and is always case-insensitive. Also, \code{pattern} (\code{b}) can be as long as \code{x} (\code{a}) to compare items of each index in both vectors.
+#' @inheritParams base::grepl
+#' @return A \code{logical} vector
+#' @name like
+#' @rdname like
+#' @export
+#' @details Using RStudio? This function can also be inserted from the Addins menu and can have its own Keyboard Shortcut like Ctrl+Shift+L or Cmd+Shift+L (see Tools > Modify Keyboard Shortcuts...).
+#' @source Idea from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default and let it support multiple patterns.
+#' @seealso \code{\link[base]{grep}}
+#' @examples
+#' # simple test
+#' a <- "This is a test"
+#' b <- "TEST"
+#' a %like% b
+#' #> TRUE
+#' b %like% a
+#' #> FALSE
+#'
+#' # also supports multiple patterns, length must be equal to x
+#' a <- c("Test case", "Something different", "Yet another thing")
+#' b <- c("case", "diff", "yet")
+#' a %like% b
+#' #> TRUE TRUE TRUE
+#'
+#' # get frequencies of bacteria whose name start with 'Ent' or 'ent'
+#' library(dplyr)
+#' septic_patients %>%
+#'   left_join_microorganisms() %>%
+#'   filter(genus %like% '^ent') %>%
+#'   freq(genus, species)
+like <- function(x, pattern) {
+  if (length(pattern) > 1) {
+    if (length(x) != length(pattern)) {
+      pattern <- pattern[1]
+      warning('only the first element of argument `pattern` used for `%like%`', call. = FALSE)
+    } else {
+      # x and pattern are of same length, so items with each other
+      res <- vector(length = length(pattern))
+      for (i in 1:length(res)) {
+        if (is.factor(x[i])) {
+          res[i] <- as.integer(x[i]) %in% base::grep(pattern[i], levels(x[i]), ignore.case = TRUE)
+        } else {
+          res[i] <- base::grepl(pattern[i], x[i], ignore.case = TRUE)
+        }
+      }
+      return(res)
+    }
+  }
+
+  # the regular way how grepl works; just one pattern against one or more x
+  if (is.factor(x)) {
+    as.integer(x) %in% base::grep(pattern, levels(x), ignore.case = TRUE)
+  } else {
+    base::grepl(pattern, x, ignore.case = TRUE)
+  }
+}
+
+#' @rdname like
+#' @export
+"%like%" <- like
--- a/R/misc.R
+++ b/R/misc.R
@ -16,33 +16,32 @@
 # GNU General Public License for more details.                         #
 # ==================================================================== #

-#' Pattern Matching
-#'
-#' Convenience function to compare a vector with a pattern, like \code{\link[base]{grep}}. It always returns a \code{logical} vector and is always case-insensitive.
-#' @inheritParams base::grep
-#' @return A \code{logical} vector
-#' @name like
-#' @rdname like
-#' @export
-#' @source Inherited from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default.
-#' @examples
-#' library(dplyr)
-#' # get unique occurences of bacteria whose name start with 'Ent'
-#' septic_patients %>%
-#'   left_join_microorganisms() %>%
-#'   filter(fullname %like% '^Ent') %>%
-#'   pull(fullname) %>%
-#'   unique()
-"%like%" <- function(x, pattern) {
-  if (length(pattern) > 1) {
-    pattern <- pattern[1]
-    warning('only the first element of argument `pattern` used for `%like%`', call. = FALSE)
-  }
-  if (is.factor(x)) {
-    as.integer(x) %in% base::grep(pattern, levels(x), ignore.case = TRUE)
-  } else {
-    base::grepl(pattern, x, ignore.case = TRUE)
-  }
+# No export, no Rd
+addin_insert_in <- function() {
+  rstudioapi::insertText(" %in% ")
+}
+
+# No export, no Rd
+addin_insert_like <- function() {
+  rstudioapi::insertText(" %like% ")
+}
+
+#  No export, no Rd
+#' @importFrom utils View
+addin_open_antibiotics <- function() {
+  View(antibiotics)
+}
+
+#  No export, no Rd
+#' @importFrom utils View
+addin_open_microorganisms <- function() {
+  View(microorganisms)
+}
+
+#  No export, no Rd
+#' @importFrom utils View
+addin_open_septic_patients <- function() {
+  View(septic_patients)
 }

 # No export, no Rd
--- a/R/p.symbol.R
+++ b/R/p.symbol.R
@ -18,23 +18,22 @@

 #' Symbol of a p value
 #'
-#' Return the symbol related to the p value: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+#' Return the symbol related to the p value: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1. Values above \code{p = 1} will return \code{NA}.
 #' @param p p value
 #' @param emptychar text to show when \code{p > 0.1}
 #' @return Text
 #' @export
 p.symbol <- function(p, emptychar = " ") {
-  instelling.oud <- options()$scipen
+  setting.bak <- options()$scipen
  options(scipen = 999)
-  s <- ''
-  s[1:length(p)] <- ''
+  s <- vector(mode = "character", length = length(p))
  for (i in 1:length(p)) {
    if (is.na(p[i])) {
-      s[i] <- NA
+      s[i] <- NA_character_
      next
    }
    if (p[i] > 1) {
-      s[i] <- NA
+      s[i] <- NA_character_
      next
    } else {
      p_test <- p[i]
@ -52,6 +51,6 @@ p.symbol <- function(p, emptychar = " ") {
      s[i] <- '***'
    }
  }
-  options(scipen = instelling.oud)
+  options(scipen = setting.bak)
  s
 }
--- a/README.md
+++ b/README.md
@ -34,9 +34,12 @@ With `AMR` you can also:
  * Get the latest antibiotic properties like hierarchic groups and [defined daily dose](https://en.wikipedia.org/wiki/Defined_daily_dose) (DDD) with units and administration form from the WHOCC website with the `atc_property` function
 * Create frequency tables with the `freq` function

-With the `MDRO` function (abbreviation of Multi Drug Resistant Organisms), you can check your isolates for exceptional resistance with country-specific guidelines or EUCAST rules. Currently guidelines for Germany and the Netherlands are supported. Please suggest addition of your own country here: [https://github.com/msberends/AMR/issues/new](https://github.com/msberends/AMR/issues/new?title=New%20guideline%20for%20MDRO&body=%3C--%20Please%20add%20your%20country%20code,%20guideline%20name,%20version%20and%20source%20below%20and%20remove%20this%20line--%3E).
+And it contains:
+* A recent data set with ~2500 human pathogenic microorganisms, including family, genus, species, gram stain and aerobic/anaerobic
+* A recent data set with all antibiotics as defined by the [WHOCC](https://www.whocc.no/atc_ddd_methodology/who_collaborating_centre/), including ATC code, official name and DDD's
+* An example data set `septic_patients`, consisting of 2000 blood culture isolates from anonymised septic patients between 2001 and 2017.

-This package contains an example data set `septic_patients`, consisting of 2000 isolates from anonymised septic patients between 2001 and 2017.
+With the `MDRO` function (abbreviation of Multi Drug Resistant Organisms), you can check your isolates for exceptional resistance with country-specific guidelines or EUCAST rules. Currently guidelines for Germany and the Netherlands are supported. Please suggest addition of your own country here: [https://github.com/msberends/AMR/issues/new](https://github.com/msberends/AMR/issues/new?title=New%20guideline%20for%20MDRO&body=%3C--%20Please%20add%20your%20country%20code,%20guideline%20name,%20version%20and%20source%20below%20and%20remove%20this%20line--%3E).

 ## How to get it?
 This package is available on CRAN and also here on GitHub.
--- a/data/microorganisms.rda
+++ b/data/microorganisms.rda
--- a/inst/rstudio/addins.dcf
+++ b/inst/rstudio/addins.dcf
@ -0,0 +1,19 @@
+Name: Insert %in%
+Binding: addin_insert_in
+Interactive: false
+
+Name: Insert %like%
+Binding: addin_insert_like
+Interactive: false
+
+Name: View 'antibiotics' data set
+Binding: addin_open_antibiotics
+Interactive: false
+
+Name: View 'microorganisms' data set
+Binding: addin_open_microorganisms
+Interactive: false
+
+Name: View 'septic_patients' data set
+Binding: addin_open_septic_patients
+Interactive: false
--- a/man/like.Rd
+++ b/man/like.Rd
@ -1,13 +1,15 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/misc.R
+% Please edit documentation in R/like.R
 \name{like}
 \alias{like}
 \alias{\%like\%}
 \title{Pattern Matching}
 \source{
-Inherited from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default.
+Idea from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default and let it support multiple patterns.
 }
 \usage{
+like(x, pattern)
+
 x \%like\% pattern
 }
 \arguments{
@ -27,14 +29,33 @@ x \%like\% pattern
 A \code{logical} vector
 }
 \description{
-Convenience function to compare a vector with a pattern, like \code{\link[base]{grep}}. It always returns a \code{logical} vector and is always case-insensitive.
+Convenient wrapper around \code{\link[base]{grepl}} to match a pattern: \code{a \%like\% b}. It always returns a \code{logical} vector and is always case-insensitive. Also, \code{pattern} (\code{b}) can be as long as \code{x} (\code{a}) to compare items of each index in both vectors.
+}
+\details{
+Using RStudio? This function can also be inserted from the Addins menu and can have its own Keyboard Shortcut like Ctrl+Shift+L or Cmd+Shift+L (see Tools > Modify Keyboard Shortcuts...).
 }
 \examples{
+# simple test
+a <- "This is a test"
+b <- "TEST"
+a \%like\% b
+#> TRUE
+b \%like\% a
+#> FALSE
+
+# also supports multiple patterns, length must be equal to x
+a <- c("Test case", "Something different", "Yet another thing")
+b <- c("case", "diff", "yet")
+a \%like\% b
+#> TRUE TRUE TRUE
+
+# get frequencies of bacteria whose name start with 'Ent' or 'ent'
 library(dplyr)
-# get unique occurences of bacteria whose name start with 'Ent'
 septic_patients \%>\%
  left_join_microorganisms() \%>\%
-  filter(fullname \%like\% '^Ent') \%>\%
-  pull(fullname) \%>\%
-  unique()
+  filter(genus \%like\% '^ent') \%>\%
+  freq(genus, species)
+}
+\seealso{
+\code{\link[base]{grep}}
 }
--- a/man/p.symbol.Rd
+++ b/man/p.symbol.Rd
@ -15,5 +15,5 @@ p.symbol(p, emptychar = " ")
 Text
 }
 \description{
-Return the symbol related to the p value: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+Return the symbol related to the p value: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1. Values above \code{p = 1} will return \code{NA}.
 }
--- a/tests/testthat/test-g.test.R
+++ b/tests/testthat/test-g.test.R
@ -20,11 +20,10 @@ test_that("G-test works", {

  # INDEPENDENCE

-  # this should always yield a p value of around 0
  x <- matrix(data = round(runif(4) * 100000, 0),
              ncol = 2,
              byrow = TRUE)
  expect_lt(g.test(x),
-            0.0001)
+            1)

 })
--- a/tests/testthat/test-like.R
+++ b/tests/testthat/test-like.R
@ -0,0 +1,10 @@
+context("like.R")
+
+test_that("`like` works", {
+  expect_true(suppressWarnings("test" %like% c("^t", "^s")))
+  expect_true("test" %like% "test")
+  expect_true("test" %like% "TEST")
+  expect_true(as.factor("test") %like% "TEST")
+  expect_identical(factor(c("Test case", "Something different", "Yet another thing")) %like% c("case", "diff", "yet"),
+                   c(TRUE, TRUE, TRUE))
+})
--- a/tests/testthat/test-misc.R
+++ b/tests/testthat/test-misc.R
@ -1,12 +1,5 @@
 context("misc.R")

-test_that("`like` works", {
-  expect_true(suppressWarnings("test" %like% c("^t", "^s")))
-  expect_true("test" %like% "test")
-  expect_true("test" %like% "TEST")
-  expect_true(as.factor("test") %like% "TEST")
-})
-
 test_that("percentages works", {
  expect_equal(percent(0.25), "25%")
  expect_equal(percent(0.5), "50%")
--- a/tests/testthat/test-p.symbol.R
+++ b/tests/testthat/test-p.symbol.R
@ -1,6 +1,6 @@
 context("p.symbol.R")

 test_that("P symbol works", {
-  expect_identical(p.symbol(c(0.001, 0.01, 0.05, 0.1, 1)),
-                   c("***", "**", "*", ".", " "))
+  expect_identical(p.symbol(c(0.001, 0.01, 0.05, 0.1, 1, NA, 3)),
+                   c("***", "**", "*", ".", " ", NA, NA))
 })
--- a/vignettes/freq.R
+++ b/vignettes/freq.R
@ -0,0 +1,89 @@
+## ----setup, include = FALSE, results = 'markup'--------------------------
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#"
+)
+library(dplyr)
+library(AMR)
+
+## ---- echo = TRUE, results = 'hide'--------------------------------------
+# just using base R
+freq(septic_patients$sex)
+
+# using base R to select the variable and pass it on with a pipe from the dplyr package
+septic_patients$sex %>% freq()
+
+# do it all with pipes, using the `select` function from the dplyr package
+septic_patients %>%
+  select(sex) %>%
+  freq()
+
+# or the preferred way: using a pipe to pass the variable on to the freq function
+septic_patients %>% freq(sex) # this also shows 'age' in the title
+
+
+## ---- echo = TRUE--------------------------------------------------------
+freq(septic_patients$sex)
+
+## ---- echo = TRUE, results = 'hide'--------------------------------------
+my_patients <- septic_patients %>% left_join_microorganisms()
+
+## ---- echo = TRUE--------------------------------------------------------
+colnames(microorganisms)
+
+## ---- echo = TRUE--------------------------------------------------------
+dim(septic_patients)
+dim(my_patients)
+
+## ---- echo = TRUE--------------------------------------------------------
+my_patients %>% freq(genus, species)
+
+## ---- echo = TRUE--------------------------------------------------------
+# # get age distribution of unique patients
+septic_patients %>% 
+  distinct(patient_id, .keep_all = TRUE) %>% 
+  freq(age, nmax = 5)
+
+## ---- echo = TRUE--------------------------------------------------------
+septic_patients %>%
+  freq(hospital_id)
+
+## ---- echo = TRUE--------------------------------------------------------
+septic_patients %>%
+  freq(hospital_id, sort.count = TRUE)
+
+## ---- echo = TRUE--------------------------------------------------------
+septic_patients %>%
+  select(amox) %>% 
+  freq()
+
+## ---- echo = TRUE--------------------------------------------------------
+septic_patients %>%
+  select(date) %>% 
+  freq(nmax = 5)
+
+## ---- echo = TRUE--------------------------------------------------------
+my_df <- septic_patients %>% freq(age)
+class(my_df)
+
+## ---- echo = TRUE--------------------------------------------------------
+dim(my_df)
+
+## ---- echo = TRUE--------------------------------------------------------
+septic_patients %>%
+  freq(amox, na.rm = FALSE)
+
+## ---- echo = TRUE--------------------------------------------------------
+septic_patients %>%
+  freq(hospital_id, row.names = FALSE)
+
+## ---- echo = TRUE--------------------------------------------------------
+septic_patients %>%
+  freq(hospital_id, markdown = TRUE)
+
+## ---- echo = FALSE-------------------------------------------------------
+# this will print "2018" in 2018, and "2018-yyyy" after 2018.
+yrs <- c(2018:format(Sys.Date(), "%Y"))
+yrs <- c(min(yrs), max(yrs))
+yrs <- paste(unique(yrs), collapse = "-")
+