addins and small improvements to microorganisms dataset

This commit is contained in:
dr. M.S. (Matthijs) Berends 2018-07-04 17:20:03 +02:00
parent 10fce8382c
commit 34ee0247ac
18 changed files with 284 additions and 58 deletions

View File

@ -1,6 +1,6 @@
Package: AMR Package: AMR
Version: 0.2.0.9007 Version: 0.2.0.9008
Date: 2018-07-01 Date: 2018-07-04
Title: Antimicrobial Resistance Analysis Title: Antimicrobial Resistance Analysis
Authors@R: c( Authors@R: c(
person( person(
@ -41,7 +41,8 @@ Imports:
Suggests: Suggests:
testthat (>= 1.0.2), testthat (>= 1.0.2),
covr (>= 3.0.1), covr (>= 3.0.1),
rmarkdown rmarkdown,
rstudioapi
VignetteBuilder: knitr VignetteBuilder: knitr
URL: https://github.com/msberends/AMR URL: https://github.com/msberends/AMR
BugReports: https://github.com/msberends/AMR/issues BugReports: https://github.com/msberends/AMR/issues

View File

@ -44,6 +44,7 @@ export(is.mic)
export(is.rsi) export(is.rsi)
export(key_antibiotics) export(key_antibiotics)
export(left_join_microorganisms) export(left_join_microorganisms)
export(like)
export(mo_property) export(mo_property)
export(n_rsi) export(n_rsi)
export(p.symbol) export(p.symbol)
@ -121,6 +122,7 @@ importFrom(stats,mad)
importFrom(stats,pchisq) importFrom(stats,pchisq)
importFrom(stats,sd) importFrom(stats,sd)
importFrom(tibble,tibble) importFrom(tibble,tibble)
importFrom(utils,View)
importFrom(utils,browseVignettes) importFrom(utils,browseVignettes)
importFrom(utils,object.size) importFrom(utils,object.size)
importFrom(utils,packageDescription) importFrom(utils,packageDescription)

View File

@ -1,5 +1,6 @@
# 0.2.0.90xx (development version) # 0.2.0.90xx (development version)
#### New #### New
* Support for Addins menu in RStudio to quickly insert `%in%` or `%like%` (and give them keyboard shortcuts), or to view the datasets that come with this package
* Function `top_freq` function to get the top/below *n* items of frequency tables * Function `top_freq` function to get the top/below *n* items of frequency tables
* Vignette about frequency tables * Vignette about frequency tables
* Header of frequency tables now also show MAD and IQR * Header of frequency tables now also show MAD and IQR
@ -14,9 +15,11 @@ ratio(c(772, 1611, 737), ratio = "1:2:1")
* Function `p.symbol` to transform p value to their related symbol: `0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1` * Function `p.symbol` to transform p value to their related symbol: `0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1`
#### Changed #### Changed
* `%like%` now supports multiple patterns
* Frequency tables (function `freq`) now supports quasiquotation: `freq(mydata, mycolumn)`, or `mydata %>% freq(mycolumn)` * Frequency tables (function `freq`) now supports quasiquotation: `freq(mydata, mycolumn)`, or `mydata %>% freq(mycolumn)`
* Frequency tables are now actual `data.frame`s with altered console printing to make it look like a frequency table. Because of this, the parameter `toConsole` is not longer needed. * Frequency tables are now actual `data.frame`s with altered console printing to make it look like a frequency table. Because of this, the parameter `toConsole` is not longer needed.
* Small translational improvements to the `septic_patients` dataset * Small translational improvements to the `septic_patients` dataset
* Small improvements to the `microorganisms` dataset, especially for *Salmonella*
* Combined MIC/RSI values will now be coerced by the `rsi` and `mic` functions: * Combined MIC/RSI values will now be coerced by the `rsi` and `mic` functions:
* `as.rsi("<=0.002; S")` will return `S` * `as.rsi("<=0.002; S")` will return `S`
* `as.mic("<=0.002; S")` will return `<=0.002` * `as.mic("<=0.002; S")` will return `<=0.002`

View File

@ -17,6 +17,7 @@
# ==================================================================== # # ==================================================================== #
globalVariables(c('abname', globalVariables(c('abname',
'antibiotics',
'atc', 'atc',
'bactid', 'bactid',
'cnt', 'cnt',
@ -36,6 +37,7 @@ globalVariables(c('abname',
'key_ab_other', 'key_ab_other',
'median', 'median',
'mic', 'mic',
'microorganisms',
'mocode', 'mocode',
'molis', 'molis',
'n', 'n',
@ -43,7 +45,9 @@ globalVariables(c('abname',
'patient_id', 'patient_id',
'quantile', 'quantile',
'real_first_isolate', 'real_first_isolate',
'septic_patients',
'species', 'species',
'umcg', 'umcg',
'View',
'y', 'y',
'.')) '.'))

View File

@ -96,6 +96,10 @@ guess_bactid <- function(x) {
# avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa # avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa
x[i] <- 'Pseudomonas aeruginosa' x[i] <- 'Pseudomonas aeruginosa'
} }
if (tolower(x[i]) %like% 'coagulase') {
# coerce S. coagulase negative
x[i] <- 'Coagulase Negative Staphylococcus (CNS)'
}
# translate known trivial names to genus+species # translate known trivial names to genus+species
if (!is.na(x.bak[i])) { if (!is.na(x.bak[i])) {

80
R/like.R Normal file
View File

@ -0,0 +1,80 @@
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# AUTHORS #
# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) #
# #
# LICENCE #
# This program is free software; you can redistribute it and/or modify #
# it under the terms of the GNU General Public License version 2.0, #
# as published by the Free Software Foundation. #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# ==================================================================== #
#' Pattern Matching
#'
#' Convenient wrapper around \code{\link[base]{grepl}} to match a pattern: \code{a \%like\% b}. It always returns a \code{logical} vector and is always case-insensitive. Also, \code{pattern} (\code{b}) can be as long as \code{x} (\code{a}) to compare items of each index in both vectors.
#' @inheritParams base::grepl
#' @return A \code{logical} vector
#' @name like
#' @rdname like
#' @export
#' @details Using RStudio? This function can also be inserted from the Addins menu and can have its own Keyboard Shortcut like Ctrl+Shift+L or Cmd+Shift+L (see Tools > Modify Keyboard Shortcuts...).
#' @source Idea from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default and let it support multiple patterns.
#' @seealso \code{\link[base]{grep}}
#' @examples
#' # simple test
#' a <- "This is a test"
#' b <- "TEST"
#' a %like% b
#' #> TRUE
#' b %like% a
#' #> FALSE
#'
#' # also supports multiple patterns, length must be equal to x
#' a <- c("Test case", "Something different", "Yet another thing")
#' b <- c("case", "diff", "yet")
#' a %like% b
#' #> TRUE TRUE TRUE
#'
#' # get frequencies of bacteria whose name start with 'Ent' or 'ent'
#' library(dplyr)
#' septic_patients %>%
#' left_join_microorganisms() %>%
#' filter(genus %like% '^ent') %>%
#' freq(genus, species)
like <- function(x, pattern) {
if (length(pattern) > 1) {
if (length(x) != length(pattern)) {
pattern <- pattern[1]
warning('only the first element of argument `pattern` used for `%like%`', call. = FALSE)
} else {
# x and pattern are of same length, so items with each other
res <- vector(length = length(pattern))
for (i in 1:length(res)) {
if (is.factor(x[i])) {
res[i] <- as.integer(x[i]) %in% base::grep(pattern[i], levels(x[i]), ignore.case = TRUE)
} else {
res[i] <- base::grepl(pattern[i], x[i], ignore.case = TRUE)
}
}
return(res)
}
}
# the regular way how grepl works; just one pattern against one or more x
if (is.factor(x)) {
as.integer(x) %in% base::grep(pattern, levels(x), ignore.case = TRUE)
} else {
base::grepl(pattern, x, ignore.case = TRUE)
}
}
#' @rdname like
#' @export
"%like%" <- like

View File

@ -16,33 +16,32 @@
# GNU General Public License for more details. # # GNU General Public License for more details. #
# ==================================================================== # # ==================================================================== #
#' Pattern Matching # No export, no Rd
#' addin_insert_in <- function() {
#' Convenience function to compare a vector with a pattern, like \code{\link[base]{grep}}. It always returns a \code{logical} vector and is always case-insensitive. rstudioapi::insertText(" %in% ")
#' @inheritParams base::grep }
#' @return A \code{logical} vector
#' @name like # No export, no Rd
#' @rdname like addin_insert_like <- function() {
#' @export rstudioapi::insertText(" %like% ")
#' @source Inherited from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default. }
#' @examples
#' library(dplyr) # No export, no Rd
#' # get unique occurences of bacteria whose name start with 'Ent' #' @importFrom utils View
#' septic_patients %>% addin_open_antibiotics <- function() {
#' left_join_microorganisms() %>% View(antibiotics)
#' filter(fullname %like% '^Ent') %>% }
#' pull(fullname) %>%
#' unique() # No export, no Rd
"%like%" <- function(x, pattern) { #' @importFrom utils View
if (length(pattern) > 1) { addin_open_microorganisms <- function() {
pattern <- pattern[1] View(microorganisms)
warning('only the first element of argument `pattern` used for `%like%`', call. = FALSE) }
}
if (is.factor(x)) { # No export, no Rd
as.integer(x) %in% base::grep(pattern, levels(x), ignore.case = TRUE) #' @importFrom utils View
} else { addin_open_septic_patients <- function() {
base::grepl(pattern, x, ignore.case = TRUE) View(septic_patients)
}
} }
# No export, no Rd # No export, no Rd

View File

@ -18,23 +18,22 @@
#' Symbol of a p value #' Symbol of a p value
#' #'
#' Return the symbol related to the p value: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 #' Return the symbol related to the p value: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1. Values above \code{p = 1} will return \code{NA}.
#' @param p p value #' @param p p value
#' @param emptychar text to show when \code{p > 0.1} #' @param emptychar text to show when \code{p > 0.1}
#' @return Text #' @return Text
#' @export #' @export
p.symbol <- function(p, emptychar = " ") { p.symbol <- function(p, emptychar = " ") {
instelling.oud <- options()$scipen setting.bak <- options()$scipen
options(scipen = 999) options(scipen = 999)
s <- '' s <- vector(mode = "character", length = length(p))
s[1:length(p)] <- ''
for (i in 1:length(p)) { for (i in 1:length(p)) {
if (is.na(p[i])) { if (is.na(p[i])) {
s[i] <- NA s[i] <- NA_character_
next next
} }
if (p[i] > 1) { if (p[i] > 1) {
s[i] <- NA s[i] <- NA_character_
next next
} else { } else {
p_test <- p[i] p_test <- p[i]
@ -52,6 +51,6 @@ p.symbol <- function(p, emptychar = " ") {
s[i] <- '***' s[i] <- '***'
} }
} }
options(scipen = instelling.oud) options(scipen = setting.bak)
s s
} }

View File

@ -34,9 +34,12 @@ With `AMR` you can also:
* Get the latest antibiotic properties like hierarchic groups and [defined daily dose](https://en.wikipedia.org/wiki/Defined_daily_dose) (DDD) with units and administration form from the WHOCC website with the `atc_property` function * Get the latest antibiotic properties like hierarchic groups and [defined daily dose](https://en.wikipedia.org/wiki/Defined_daily_dose) (DDD) with units and administration form from the WHOCC website with the `atc_property` function
* Create frequency tables with the `freq` function * Create frequency tables with the `freq` function
With the `MDRO` function (abbreviation of Multi Drug Resistant Organisms), you can check your isolates for exceptional resistance with country-specific guidelines or EUCAST rules. Currently guidelines for Germany and the Netherlands are supported. Please suggest addition of your own country here: [https://github.com/msberends/AMR/issues/new](https://github.com/msberends/AMR/issues/new?title=New%20guideline%20for%20MDRO&body=%3C--%20Please%20add%20your%20country%20code,%20guideline%20name,%20version%20and%20source%20below%20and%20remove%20this%20line--%3E). And it contains:
* A recent data set with ~2500 human pathogenic microorganisms, including family, genus, species, gram stain and aerobic/anaerobic
* A recent data set with all antibiotics as defined by the [WHOCC](https://www.whocc.no/atc_ddd_methodology/who_collaborating_centre/), including ATC code, official name and DDD's
* An example data set `septic_patients`, consisting of 2000 blood culture isolates from anonymised septic patients between 2001 and 2017.
This package contains an example data set `septic_patients`, consisting of 2000 isolates from anonymised septic patients between 2001 and 2017. With the `MDRO` function (abbreviation of Multi Drug Resistant Organisms), you can check your isolates for exceptional resistance with country-specific guidelines or EUCAST rules. Currently guidelines for Germany and the Netherlands are supported. Please suggest addition of your own country here: [https://github.com/msberends/AMR/issues/new](https://github.com/msberends/AMR/issues/new?title=New%20guideline%20for%20MDRO&body=%3C--%20Please%20add%20your%20country%20code,%20guideline%20name,%20version%20and%20source%20below%20and%20remove%20this%20line--%3E).
## How to get it? ## How to get it?
This package is available on CRAN and also here on GitHub. This package is available on CRAN and also here on GitHub.

Binary file not shown.

19
inst/rstudio/addins.dcf Normal file
View File

@ -0,0 +1,19 @@
Name: Insert %in%
Binding: addin_insert_in
Interactive: false
Name: Insert %like%
Binding: addin_insert_like
Interactive: false
Name: View 'antibiotics' data set
Binding: addin_open_antibiotics
Interactive: false
Name: View 'microorganisms' data set
Binding: addin_open_microorganisms
Interactive: false
Name: View 'septic_patients' data set
Binding: addin_open_septic_patients
Interactive: false

View File

@ -1,13 +1,15 @@
% Generated by roxygen2: do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/misc.R % Please edit documentation in R/like.R
\name{like} \name{like}
\alias{like} \alias{like}
\alias{\%like\%} \alias{\%like\%}
\title{Pattern Matching} \title{Pattern Matching}
\source{ \source{
Inherited from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default. Idea from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default and let it support multiple patterns.
} }
\usage{ \usage{
like(x, pattern)
x \%like\% pattern x \%like\% pattern
} }
\arguments{ \arguments{
@ -27,14 +29,33 @@ x \%like\% pattern
A \code{logical} vector A \code{logical} vector
} }
\description{ \description{
Convenience function to compare a vector with a pattern, like \code{\link[base]{grep}}. It always returns a \code{logical} vector and is always case-insensitive. Convenient wrapper around \code{\link[base]{grepl}} to match a pattern: \code{a \%like\% b}. It always returns a \code{logical} vector and is always case-insensitive. Also, \code{pattern} (\code{b}) can be as long as \code{x} (\code{a}) to compare items of each index in both vectors.
}
\details{
Using RStudio? This function can also be inserted from the Addins menu and can have its own Keyboard Shortcut like Ctrl+Shift+L or Cmd+Shift+L (see Tools > Modify Keyboard Shortcuts...).
} }
\examples{ \examples{
# simple test
a <- "This is a test"
b <- "TEST"
a \%like\% b
#> TRUE
b \%like\% a
#> FALSE
# also supports multiple patterns, length must be equal to x
a <- c("Test case", "Something different", "Yet another thing")
b <- c("case", "diff", "yet")
a \%like\% b
#> TRUE TRUE TRUE
# get frequencies of bacteria whose name start with 'Ent' or 'ent'
library(dplyr) library(dplyr)
# get unique occurences of bacteria whose name start with 'Ent'
septic_patients \%>\% septic_patients \%>\%
left_join_microorganisms() \%>\% left_join_microorganisms() \%>\%
filter(fullname \%like\% '^Ent') \%>\% filter(genus \%like\% '^ent') \%>\%
pull(fullname) \%>\% freq(genus, species)
unique() }
\seealso{
\code{\link[base]{grep}}
} }

View File

@ -15,5 +15,5 @@ p.symbol(p, emptychar = " ")
Text Text
} }
\description{ \description{
Return the symbol related to the p value: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Return the symbol related to the p value: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1. Values above \code{p = 1} will return \code{NA}.
} }

View File

@ -20,11 +20,10 @@ test_that("G-test works", {
# INDEPENDENCE # INDEPENDENCE
# this should always yield a p value of around 0
x <- matrix(data = round(runif(4) * 100000, 0), x <- matrix(data = round(runif(4) * 100000, 0),
ncol = 2, ncol = 2,
byrow = TRUE) byrow = TRUE)
expect_lt(g.test(x), expect_lt(g.test(x),
0.0001) 1)
}) })

View File

@ -0,0 +1,10 @@
context("like.R")
test_that("`like` works", {
expect_true(suppressWarnings("test" %like% c("^t", "^s")))
expect_true("test" %like% "test")
expect_true("test" %like% "TEST")
expect_true(as.factor("test") %like% "TEST")
expect_identical(factor(c("Test case", "Something different", "Yet another thing")) %like% c("case", "diff", "yet"),
c(TRUE, TRUE, TRUE))
})

View File

@ -1,12 +1,5 @@
context("misc.R") context("misc.R")
test_that("`like` works", {
expect_true(suppressWarnings("test" %like% c("^t", "^s")))
expect_true("test" %like% "test")
expect_true("test" %like% "TEST")
expect_true(as.factor("test") %like% "TEST")
})
test_that("percentages works", { test_that("percentages works", {
expect_equal(percent(0.25), "25%") expect_equal(percent(0.25), "25%")
expect_equal(percent(0.5), "50%") expect_equal(percent(0.5), "50%")

View File

@ -1,6 +1,6 @@
context("p.symbol.R") context("p.symbol.R")
test_that("P symbol works", { test_that("P symbol works", {
expect_identical(p.symbol(c(0.001, 0.01, 0.05, 0.1, 1)), expect_identical(p.symbol(c(0.001, 0.01, 0.05, 0.1, 1, NA, 3)),
c("***", "**", "*", ".", " ")) c("***", "**", "*", ".", " ", NA, NA))
}) })

89
vignettes/freq.R Normal file
View File

@ -0,0 +1,89 @@
## ----setup, include = FALSE, results = 'markup'--------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#"
)
library(dplyr)
library(AMR)
## ---- echo = TRUE, results = 'hide'--------------------------------------
# just using base R
freq(septic_patients$sex)
# using base R to select the variable and pass it on with a pipe from the dplyr package
septic_patients$sex %>% freq()
# do it all with pipes, using the `select` function from the dplyr package
septic_patients %>%
select(sex) %>%
freq()
# or the preferred way: using a pipe to pass the variable on to the freq function
septic_patients %>% freq(sex) # this also shows 'age' in the title
## ---- echo = TRUE--------------------------------------------------------
freq(septic_patients$sex)
## ---- echo = TRUE, results = 'hide'--------------------------------------
my_patients <- septic_patients %>% left_join_microorganisms()
## ---- echo = TRUE--------------------------------------------------------
colnames(microorganisms)
## ---- echo = TRUE--------------------------------------------------------
dim(septic_patients)
dim(my_patients)
## ---- echo = TRUE--------------------------------------------------------
my_patients %>% freq(genus, species)
## ---- echo = TRUE--------------------------------------------------------
# # get age distribution of unique patients
septic_patients %>%
distinct(patient_id, .keep_all = TRUE) %>%
freq(age, nmax = 5)
## ---- echo = TRUE--------------------------------------------------------
septic_patients %>%
freq(hospital_id)
## ---- echo = TRUE--------------------------------------------------------
septic_patients %>%
freq(hospital_id, sort.count = TRUE)
## ---- echo = TRUE--------------------------------------------------------
septic_patients %>%
select(amox) %>%
freq()
## ---- echo = TRUE--------------------------------------------------------
septic_patients %>%
select(date) %>%
freq(nmax = 5)
## ---- echo = TRUE--------------------------------------------------------
my_df <- septic_patients %>% freq(age)
class(my_df)
## ---- echo = TRUE--------------------------------------------------------
dim(my_df)
## ---- echo = TRUE--------------------------------------------------------
septic_patients %>%
freq(amox, na.rm = FALSE)
## ---- echo = TRUE--------------------------------------------------------
septic_patients %>%
freq(hospital_id, row.names = FALSE)
## ---- echo = TRUE--------------------------------------------------------
septic_patients %>%
freq(hospital_id, markdown = TRUE)
## ---- echo = FALSE-------------------------------------------------------
# this will print "2018" in 2018, and "2018-yyyy" after 2018.
yrs <- c(2018:format(Sys.Date(), "%Y"))
yrs <- c(min(yrs), max(yrs))
yrs <- paste(unique(yrs), collapse = "-")