addins and small improvements to microorganisms dataset

This commit is contained in:
dr. M.S. (Matthijs) Berends 2018-07-04 17:20:03 +02:00
parent 10fce8382c
commit 34ee0247ac
18 changed files with 284 additions and 58 deletions

View File

@ -1,6 +1,6 @@
Package: AMR
Version: 0.2.0.9007
Date: 2018-07-01
Version: 0.2.0.9008
Date: 2018-07-04
Title: Antimicrobial Resistance Analysis
Authors@R: c(
person(
@ -41,7 +41,8 @@ Imports:
Suggests:
testthat (>= 1.0.2),
covr (>= 3.0.1),
rmarkdown
rmarkdown,
rstudioapi
VignetteBuilder: knitr
URL: https://github.com/msberends/AMR
BugReports: https://github.com/msberends/AMR/issues

View File

@ -44,6 +44,7 @@ export(is.mic)
export(is.rsi)
export(key_antibiotics)
export(left_join_microorganisms)
export(like)
export(mo_property)
export(n_rsi)
export(p.symbol)
@ -121,6 +122,7 @@ importFrom(stats,mad)
importFrom(stats,pchisq)
importFrom(stats,sd)
importFrom(tibble,tibble)
importFrom(utils,View)
importFrom(utils,browseVignettes)
importFrom(utils,object.size)
importFrom(utils,packageDescription)

View File

@ -1,5 +1,6 @@
# 0.2.0.90xx (development version)
#### New
* Support for Addins menu in RStudio to quickly insert `%in%` or `%like%` (and give them keyboard shortcuts), or to view the datasets that come with this package
* Function `top_freq` function to get the top/below *n* items of frequency tables
* Vignette about frequency tables
* Header of frequency tables now also show MAD and IQR
@ -14,9 +15,11 @@ ratio(c(772, 1611, 737), ratio = "1:2:1")
* Function `p.symbol` to transform p value to their related symbol: `0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1`
#### Changed
* `%like%` now supports multiple patterns
* Frequency tables (function `freq`) now supports quasiquotation: `freq(mydata, mycolumn)`, or `mydata %>% freq(mycolumn)`
* Frequency tables are now actual `data.frame`s with altered console printing to make it look like a frequency table. Because of this, the parameter `toConsole` is not longer needed.
* Small translational improvements to the `septic_patients` dataset
* Small improvements to the `microorganisms` dataset, especially for *Salmonella*
* Combined MIC/RSI values will now be coerced by the `rsi` and `mic` functions:
* `as.rsi("<=0.002; S")` will return `S`
* `as.mic("<=0.002; S")` will return `<=0.002`

View File

@ -17,6 +17,7 @@
# ==================================================================== #
globalVariables(c('abname',
'antibiotics',
'atc',
'bactid',
'cnt',
@ -36,6 +37,7 @@ globalVariables(c('abname',
'key_ab_other',
'median',
'mic',
'microorganisms',
'mocode',
'molis',
'n',
@ -43,7 +45,9 @@ globalVariables(c('abname',
'patient_id',
'quantile',
'real_first_isolate',
'septic_patients',
'species',
'umcg',
'View',
'y',
'.'))

View File

@ -96,6 +96,10 @@ guess_bactid <- function(x) {
# avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa
x[i] <- 'Pseudomonas aeruginosa'
}
if (tolower(x[i]) %like% 'coagulase') {
# coerce S. coagulase negative
x[i] <- 'Coagulase Negative Staphylococcus (CNS)'
}
# translate known trivial names to genus+species
if (!is.na(x.bak[i])) {

80
R/like.R Normal file
View File

@ -0,0 +1,80 @@
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# AUTHORS #
# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) #
# #
# LICENCE #
# This program is free software; you can redistribute it and/or modify #
# it under the terms of the GNU General Public License version 2.0, #
# as published by the Free Software Foundation. #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# ==================================================================== #
#' Pattern Matching
#'
#' Convenient wrapper around \code{\link[base]{grepl}} to match a pattern: \code{a \%like\% b}. It always returns a \code{logical} vector and is always case-insensitive. Also, \code{pattern} (\code{b}) can be as long as \code{x} (\code{a}) to compare items of each index in both vectors.
#' @inheritParams base::grepl
#' @return A \code{logical} vector
#' @name like
#' @rdname like
#' @export
#' @details Using RStudio? This function can also be inserted from the Addins menu and can have its own Keyboard Shortcut like Ctrl+Shift+L or Cmd+Shift+L (see Tools > Modify Keyboard Shortcuts...).
#' @source Idea from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default and let it support multiple patterns.
#' @seealso \code{\link[base]{grep}}
#' @examples
#' # simple test
#' a <- "This is a test"
#' b <- "TEST"
#' a %like% b
#' #> TRUE
#' b %like% a
#' #> FALSE
#'
#' # also supports multiple patterns, length must be equal to x
#' a <- c("Test case", "Something different", "Yet another thing")
#' b <- c("case", "diff", "yet")
#' a %like% b
#' #> TRUE TRUE TRUE
#'
#' # get frequencies of bacteria whose name start with 'Ent' or 'ent'
#' library(dplyr)
#' septic_patients %>%
#' left_join_microorganisms() %>%
#' filter(genus %like% '^ent') %>%
#' freq(genus, species)
like <- function(x, pattern) {
if (length(pattern) > 1) {
if (length(x) != length(pattern)) {
pattern <- pattern[1]
warning('only the first element of argument `pattern` used for `%like%`', call. = FALSE)
} else {
# x and pattern are of same length, so items with each other
res <- vector(length = length(pattern))
for (i in 1:length(res)) {
if (is.factor(x[i])) {
res[i] <- as.integer(x[i]) %in% base::grep(pattern[i], levels(x[i]), ignore.case = TRUE)
} else {
res[i] <- base::grepl(pattern[i], x[i], ignore.case = TRUE)
}
}
return(res)
}
}
# the regular way how grepl works; just one pattern against one or more x
if (is.factor(x)) {
as.integer(x) %in% base::grep(pattern, levels(x), ignore.case = TRUE)
} else {
base::grepl(pattern, x, ignore.case = TRUE)
}
}
#' @rdname like
#' @export
"%like%" <- like

View File

@ -16,33 +16,32 @@
# GNU General Public License for more details. #
# ==================================================================== #
#' Pattern Matching
#'
#' Convenience function to compare a vector with a pattern, like \code{\link[base]{grep}}. It always returns a \code{logical} vector and is always case-insensitive.
#' @inheritParams base::grep
#' @return A \code{logical} vector
#' @name like
#' @rdname like
#' @export
#' @source Inherited from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default.
#' @examples
#' library(dplyr)
#' # get unique occurences of bacteria whose name start with 'Ent'
#' septic_patients %>%
#' left_join_microorganisms() %>%
#' filter(fullname %like% '^Ent') %>%
#' pull(fullname) %>%
#' unique()
"%like%" <- function(x, pattern) {
if (length(pattern) > 1) {
pattern <- pattern[1]
warning('only the first element of argument `pattern` used for `%like%`', call. = FALSE)
}
if (is.factor(x)) {
as.integer(x) %in% base::grep(pattern, levels(x), ignore.case = TRUE)
} else {
base::grepl(pattern, x, ignore.case = TRUE)
}
# No export, no Rd
addin_insert_in <- function() {
rstudioapi::insertText(" %in% ")
}
# No export, no Rd
addin_insert_like <- function() {
rstudioapi::insertText(" %like% ")
}
# No export, no Rd
#' @importFrom utils View
addin_open_antibiotics <- function() {
View(antibiotics)
}
# No export, no Rd
#' @importFrom utils View
addin_open_microorganisms <- function() {
View(microorganisms)
}
# No export, no Rd
#' @importFrom utils View
addin_open_septic_patients <- function() {
View(septic_patients)
}
# No export, no Rd

View File

@ -18,23 +18,22 @@
#' Symbol of a p value
#'
#' Return the symbol related to the p value: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#' Return the symbol related to the p value: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1. Values above \code{p = 1} will return \code{NA}.
#' @param p p value
#' @param emptychar text to show when \code{p > 0.1}
#' @return Text
#' @export
p.symbol <- function(p, emptychar = " ") {
instelling.oud <- options()$scipen
setting.bak <- options()$scipen
options(scipen = 999)
s <- ''
s[1:length(p)] <- ''
s <- vector(mode = "character", length = length(p))
for (i in 1:length(p)) {
if (is.na(p[i])) {
s[i] <- NA
s[i] <- NA_character_
next
}
if (p[i] > 1) {
s[i] <- NA
s[i] <- NA_character_
next
} else {
p_test <- p[i]
@ -52,6 +51,6 @@ p.symbol <- function(p, emptychar = " ") {
s[i] <- '***'
}
}
options(scipen = instelling.oud)
options(scipen = setting.bak)
s
}

View File

@ -34,9 +34,12 @@ With `AMR` you can also:
* Get the latest antibiotic properties like hierarchic groups and [defined daily dose](https://en.wikipedia.org/wiki/Defined_daily_dose) (DDD) with units and administration form from the WHOCC website with the `atc_property` function
* Create frequency tables with the `freq` function
With the `MDRO` function (abbreviation of Multi Drug Resistant Organisms), you can check your isolates for exceptional resistance with country-specific guidelines or EUCAST rules. Currently guidelines for Germany and the Netherlands are supported. Please suggest addition of your own country here: [https://github.com/msberends/AMR/issues/new](https://github.com/msberends/AMR/issues/new?title=New%20guideline%20for%20MDRO&body=%3C--%20Please%20add%20your%20country%20code,%20guideline%20name,%20version%20and%20source%20below%20and%20remove%20this%20line--%3E).
And it contains:
* A recent data set with ~2500 human pathogenic microorganisms, including family, genus, species, gram stain and aerobic/anaerobic
* A recent data set with all antibiotics as defined by the [WHOCC](https://www.whocc.no/atc_ddd_methodology/who_collaborating_centre/), including ATC code, official name and DDD's
* An example data set `septic_patients`, consisting of 2000 blood culture isolates from anonymised septic patients between 2001 and 2017.
This package contains an example data set `septic_patients`, consisting of 2000 isolates from anonymised septic patients between 2001 and 2017.
With the `MDRO` function (abbreviation of Multi Drug Resistant Organisms), you can check your isolates for exceptional resistance with country-specific guidelines or EUCAST rules. Currently guidelines for Germany and the Netherlands are supported. Please suggest addition of your own country here: [https://github.com/msberends/AMR/issues/new](https://github.com/msberends/AMR/issues/new?title=New%20guideline%20for%20MDRO&body=%3C--%20Please%20add%20your%20country%20code,%20guideline%20name,%20version%20and%20source%20below%20and%20remove%20this%20line--%3E).
## How to get it?
This package is available on CRAN and also here on GitHub.

Binary file not shown.

19
inst/rstudio/addins.dcf Normal file
View File

@ -0,0 +1,19 @@
Name: Insert %in%
Binding: addin_insert_in
Interactive: false
Name: Insert %like%
Binding: addin_insert_like
Interactive: false
Name: View 'antibiotics' data set
Binding: addin_open_antibiotics
Interactive: false
Name: View 'microorganisms' data set
Binding: addin_open_microorganisms
Interactive: false
Name: View 'septic_patients' data set
Binding: addin_open_septic_patients
Interactive: false

View File

@ -1,13 +1,15 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/misc.R
% Please edit documentation in R/like.R
\name{like}
\alias{like}
\alias{\%like\%}
\title{Pattern Matching}
\source{
Inherited from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default.
Idea from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default and let it support multiple patterns.
}
\usage{
like(x, pattern)
x \%like\% pattern
}
\arguments{
@ -27,14 +29,33 @@ x \%like\% pattern
A \code{logical} vector
}
\description{
Convenience function to compare a vector with a pattern, like \code{\link[base]{grep}}. It always returns a \code{logical} vector and is always case-insensitive.
Convenient wrapper around \code{\link[base]{grepl}} to match a pattern: \code{a \%like\% b}. It always returns a \code{logical} vector and is always case-insensitive. Also, \code{pattern} (\code{b}) can be as long as \code{x} (\code{a}) to compare items of each index in both vectors.
}
\details{
Using RStudio? This function can also be inserted from the Addins menu and can have its own Keyboard Shortcut like Ctrl+Shift+L or Cmd+Shift+L (see Tools > Modify Keyboard Shortcuts...).
}
\examples{
# simple test
a <- "This is a test"
b <- "TEST"
a \%like\% b
#> TRUE
b \%like\% a
#> FALSE
# also supports multiple patterns, length must be equal to x
a <- c("Test case", "Something different", "Yet another thing")
b <- c("case", "diff", "yet")
a \%like\% b
#> TRUE TRUE TRUE
# get frequencies of bacteria whose name start with 'Ent' or 'ent'
library(dplyr)
# get unique occurences of bacteria whose name start with 'Ent'
septic_patients \%>\%
left_join_microorganisms() \%>\%
filter(fullname \%like\% '^Ent') \%>\%
pull(fullname) \%>\%
unique()
filter(genus \%like\% '^ent') \%>\%
freq(genus, species)
}
\seealso{
\code{\link[base]{grep}}
}

View File

@ -15,5 +15,5 @@ p.symbol(p, emptychar = " ")
Text
}
\description{
Return the symbol related to the p value: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Return the symbol related to the p value: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1. Values above \code{p = 1} will return \code{NA}.
}

View File

@ -20,11 +20,10 @@ test_that("G-test works", {
# INDEPENDENCE
# this should always yield a p value of around 0
x <- matrix(data = round(runif(4) * 100000, 0),
ncol = 2,
byrow = TRUE)
expect_lt(g.test(x),
0.0001)
1)
})

View File

@ -0,0 +1,10 @@
context("like.R")
test_that("`like` works", {
expect_true(suppressWarnings("test" %like% c("^t", "^s")))
expect_true("test" %like% "test")
expect_true("test" %like% "TEST")
expect_true(as.factor("test") %like% "TEST")
expect_identical(factor(c("Test case", "Something different", "Yet another thing")) %like% c("case", "diff", "yet"),
c(TRUE, TRUE, TRUE))
})

View File

@ -1,12 +1,5 @@
context("misc.R")
test_that("`like` works", {
expect_true(suppressWarnings("test" %like% c("^t", "^s")))
expect_true("test" %like% "test")
expect_true("test" %like% "TEST")
expect_true(as.factor("test") %like% "TEST")
})
test_that("percentages works", {
expect_equal(percent(0.25), "25%")
expect_equal(percent(0.5), "50%")

View File

@ -1,6 +1,6 @@
context("p.symbol.R")
test_that("P symbol works", {
expect_identical(p.symbol(c(0.001, 0.01, 0.05, 0.1, 1)),
c("***", "**", "*", ".", " "))
expect_identical(p.symbol(c(0.001, 0.01, 0.05, 0.1, 1, NA, 3)),
c("***", "**", "*", ".", " ", NA, NA))
})

89
vignettes/freq.R Normal file
View File

@ -0,0 +1,89 @@
## ----setup, include = FALSE, results = 'markup'--------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#"
)
library(dplyr)
library(AMR)
## ---- echo = TRUE, results = 'hide'--------------------------------------
# just using base R
freq(septic_patients$sex)
# using base R to select the variable and pass it on with a pipe from the dplyr package
septic_patients$sex %>% freq()
# do it all with pipes, using the `select` function from the dplyr package
septic_patients %>%
select(sex) %>%
freq()
# or the preferred way: using a pipe to pass the variable on to the freq function
septic_patients %>% freq(sex) # this also shows 'age' in the title
## ---- echo = TRUE--------------------------------------------------------
freq(septic_patients$sex)
## ---- echo = TRUE, results = 'hide'--------------------------------------
my_patients <- septic_patients %>% left_join_microorganisms()
## ---- echo = TRUE--------------------------------------------------------
colnames(microorganisms)
## ---- echo = TRUE--------------------------------------------------------
dim(septic_patients)
dim(my_patients)
## ---- echo = TRUE--------------------------------------------------------
my_patients %>% freq(genus, species)
## ---- echo = TRUE--------------------------------------------------------
# # get age distribution of unique patients
septic_patients %>%
distinct(patient_id, .keep_all = TRUE) %>%
freq(age, nmax = 5)
## ---- echo = TRUE--------------------------------------------------------
septic_patients %>%
freq(hospital_id)
## ---- echo = TRUE--------------------------------------------------------
septic_patients %>%
freq(hospital_id, sort.count = TRUE)
## ---- echo = TRUE--------------------------------------------------------
septic_patients %>%
select(amox) %>%
freq()
## ---- echo = TRUE--------------------------------------------------------
septic_patients %>%
select(date) %>%
freq(nmax = 5)
## ---- echo = TRUE--------------------------------------------------------
my_df <- septic_patients %>% freq(age)
class(my_df)
## ---- echo = TRUE--------------------------------------------------------
dim(my_df)
## ---- echo = TRUE--------------------------------------------------------
septic_patients %>%
freq(amox, na.rm = FALSE)
## ---- echo = TRUE--------------------------------------------------------
septic_patients %>%
freq(hospital_id, row.names = FALSE)
## ---- echo = TRUE--------------------------------------------------------
septic_patients %>%
freq(hospital_id, markdown = TRUE)
## ---- echo = FALSE-------------------------------------------------------
# this will print "2018" in 2018, and "2018-yyyy" after 2018.
yrs <- c(2018:format(Sys.Date(), "%Y"))
yrs <- c(min(yrs), max(yrs))
yrs <- paste(unique(yrs), collapse = "-")