1
0
mirror of https://github.com/msberends/AMR.git synced 2024-12-26 06:06:12 +01:00

MDRO, freq tables, new print format for tibbles

This commit is contained in:
dr. M.S. (Matthijs) Berends 2018-04-18 12:24:54 +02:00
parent 3165c50d06
commit 2509e2413d
23 changed files with 1453 additions and 85 deletions

View File

@ -1,6 +1,6 @@
Package: AMR Package: AMR
Version: 0.1.2 Version: 0.2.0
Date: 2018-04-02 Date: 2018-04-18
Title: Antimicrobial Resistance Analysis Title: Antimicrobial Resistance Analysis
Authors@R: c( Authors@R: c(
person( person(
@ -28,9 +28,12 @@ Depends:
R (>= 3.2.0) R (>= 3.2.0)
Imports: Imports:
dplyr (>= 0.7.0), dplyr (>= 0.7.0),
data.table (>= 1.10.0),
reshape2 (>= 1.4.0), reshape2 (>= 1.4.0),
xml2 (>= 1.0.0), xml2 (>= 1.0.0),
rvest (>= 0.3.2) knitr (>= 1.0.0),
rvest (>= 0.3.2),
tibble
Suggests: Suggests:
testthat (>= 1.0.2), testthat (>= 1.0.2),
covr (>= 3.0.1) covr (>= 3.0.1)

View File

@ -7,11 +7,18 @@ S3method(barplot,mic)
S3method(barplot,rsi) S3method(barplot,rsi)
S3method(plot,mic) S3method(plot,mic)
S3method(plot,rsi) S3method(plot,rsi)
S3method(print,data.table)
S3method(print,mic) S3method(print,mic)
S3method(print,rsi) S3method(print,rsi)
S3method(print,tbl)
S3method(print,tbl_df)
S3method(summary,mic) S3method(summary,mic)
S3method(summary,rsi) S3method(summary,rsi)
export("%like%")
export(BRMO)
export(EUCAST_rules) export(EUCAST_rules)
export(MDRO)
export(MRGN)
export(abname) export(abname)
export(anti_join_microorganisms) export(anti_join_microorganisms)
export(as.mic) export(as.mic)
@ -20,6 +27,8 @@ export(atc_property)
export(clipboard_export) export(clipboard_export)
export(clipboard_import) export(clipboard_import)
export(first_isolate) export(first_isolate)
export(freq)
export(frequency_tbl)
export(full_join_microorganisms) export(full_join_microorganisms)
export(guess_bactid) export(guess_bactid)
export(inner_join_microorganisms) export(inner_join_microorganisms)
@ -41,8 +50,11 @@ exportMethods(barplot.mic)
exportMethods(barplot.rsi) exportMethods(barplot.rsi)
exportMethods(plot.mic) exportMethods(plot.mic)
exportMethods(plot.rsi) exportMethods(plot.rsi)
exportMethods(print.data.table)
exportMethods(print.mic) exportMethods(print.mic)
exportMethods(print.rsi) exportMethods(print.rsi)
exportMethods(print.tbl)
exportMethods(print.tbl_df)
exportMethods(summary.mic) exportMethods(summary.mic)
exportMethods(summary.rsi) exportMethods(summary.rsi)
importFrom(dplyr,"%>%") importFrom(dplyr,"%>%")
@ -52,15 +64,19 @@ importFrom(dplyr,arrange)
importFrom(dplyr,arrange_at) importFrom(dplyr,arrange_at)
importFrom(dplyr,as_tibble) importFrom(dplyr,as_tibble)
importFrom(dplyr,between) importFrom(dplyr,between)
importFrom(dplyr,desc)
importFrom(dplyr,filter) importFrom(dplyr,filter)
importFrom(dplyr,filter_at) importFrom(dplyr,filter_at)
importFrom(dplyr,group_by) importFrom(dplyr,group_by)
importFrom(dplyr,group_by_at) importFrom(dplyr,group_by_at)
importFrom(dplyr,group_size)
importFrom(dplyr,group_vars)
importFrom(dplyr,if_else) importFrom(dplyr,if_else)
importFrom(dplyr,lag) importFrom(dplyr,lag)
importFrom(dplyr,left_join) importFrom(dplyr,left_join)
importFrom(dplyr,mutate) importFrom(dplyr,mutate)
importFrom(dplyr,n_distinct) importFrom(dplyr,n_distinct)
importFrom(dplyr,n_groups)
importFrom(dplyr,progress_estimated) importFrom(dplyr,progress_estimated)
importFrom(dplyr,pull) importFrom(dplyr,pull)
importFrom(dplyr,row_number) importFrom(dplyr,row_number)
@ -69,6 +85,7 @@ importFrom(dplyr,slice)
importFrom(dplyr,summarise) importFrom(dplyr,summarise)
importFrom(dplyr,tibble) importFrom(dplyr,tibble)
importFrom(dplyr,vars) importFrom(dplyr,vars)
importFrom(grDevices,boxplot.stats)
importFrom(graphics,axis) importFrom(graphics,axis)
importFrom(graphics,barplot) importFrom(graphics,barplot)
importFrom(graphics,plot) importFrom(graphics,plot)
@ -76,6 +93,9 @@ importFrom(graphics,text)
importFrom(reshape2,dcast) importFrom(reshape2,dcast)
importFrom(rvest,html_nodes) importFrom(rvest,html_nodes)
importFrom(rvest,html_table) importFrom(rvest,html_table)
importFrom(stats,fivenum)
importFrom(stats,quantile)
importFrom(stats,sd)
importFrom(utils,object.size) importFrom(utils,object.size)
importFrom(utils,packageDescription) importFrom(utils,packageDescription)
importFrom(utils,read.delim) importFrom(utils,read.delim)

28
NEWS
View File

@ -1,28 +0,0 @@
## 0.1.2
- Added full support for Windows, Linux and macOS
- New function `guess_bactid` to determine the ID of a microorganism based on genus/species or known abbreviations like MRSA
- New functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS
- New algorithm to determine weighted isolates, can now be `"points"` or `"keyantibiotics"`, see `?first_isolate`
- Renamed dataset `ablist` to `antibiotics`
- Renamed dataset `bactlist` to `microorganisms`
- Added more microorganisms to `bactlist`
- Added analysis examples on help page of dataset `septic_patients`
- Added support for character vector in join functions
- Added warnings when applying a join results in more rows after than before the join
- Altered `%like%` to make it case insensitive
- For parameters of functions `first_isolate`, `EUCAST_rules` the column names are now case-insensitive
- Functions `as.rsi` and `as.mic` now add the package name and version as attribute
- Expanded README.md
- Added unit testing with the `testthat` package
- Added build tests for Linux and macOS using Travis CI (https://travis-ci.org/msberends/AMR)
- Added Line coverage checking using CodeCov (https://codecov.io/gh/msberends/AMR/tree/master/R)
## 0.1.1
- `EUCAST_rules` applies for amoxicillin even if ampicillin is missing
- Edited column names to comply with GLIMS, the laboratory information system
- Added more valid MIC values
- Renamed 'Daily Defined Dose' to 'Defined Daily Dose'
- Added barplots for `rsi` and `mic` classes
## 0.1.0
- First submission to CRAN.

37
NEWS.md Normal file
View File

@ -0,0 +1,37 @@
## 0.2.0
#### New
- Full support for Windows, Linux and macOS
- Function `guess_bactid` to determine the ID of a microorganism based on genus/species or known abbreviations like MRSA
- Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS
- Function `MDRO` to determine Multi Drug Resistant Organisms (MDRO) with support for country-specific guidelines. Suggest your own via https://github.com/msberends/AMR/issues/new. Functions `BRMO` and `MRGN` are wrappers for Dutch and German guidelines, respectively
- Function `freq` to create frequency tables, with additional info in a header
- New algorithm to determine weighted isolates, can now be `"points"` or `"keyantibiotics"`, see `?first_isolate`
- New print format for tibbles and data.tables
#### Changed
- Renamed dataset `ablist` to `antibiotics`
- Renamed dataset `bactlist` to `microorganisms`
- Added more microorganisms to `bactlist`
- Added analysis examples on help page of dataset `septic_patients`
- Added support for character vector in `join` functions
- Added warnings when a join results in more rows after than before the join
- Altered `%like%` to make it case insensitive
- For parameters of functions `first_isolate` and `EUCAST_rules` the column names are now case-insensitive
- Functions `as.rsi` and `as.mic` now add the package name and version as attribute
#### Other
- Expanded README.md with more examples
- Added ORC IDs of authors to DESCRIPTION file
- Added unit testing with the `testthat` package
- Added build tests for Linux and macOS using Travis CI (https://travis-ci.org/msberends/AMR)
- Added Line coverage checking using CodeCov (https://codecov.io/gh/msberends/AMR/tree/master/R)
## 0.1.1
- `EUCAST_rules` applies for amoxicillin even if ampicillin is missing
- Edited column names to comply with GLIMS, the laboratory information system
- Added more valid MIC values
- Renamed 'Daily Defined Dose' to 'Defined Daily Dose'
- Added barplots for `rsi` and `mic` classes
## 0.1.0
- First submission to CRAN.

View File

@ -439,45 +439,26 @@ key_antibiotics <- function(tbl,
col.list <- c(amox, cfot, cfta, cftr, cfur, cipr, clar, col.list <- c(amox, cfot, cfta, cftr, cfur, cipr, clar,
clin, clox, doxy, gent, line, mero, peni, clin, clox, doxy, gent, line, mero, peni,
pita, rifa, teic, trsu, vanc) pita, rifa, teic, trsu, vanc)
col.list <- col.list[!is.na(col.list)] col.list <- check_available_columns(tbl = tbl, col.list = col.list, info = info)
col.list.bak <- col.list amox <- col.list[amox]
# are they available as upper case or lower case then? cfot <- col.list[cfot]
for (i in 1:length(col.list)) { cfta <- col.list[cfta]
if (toupper(col.list[i]) %in% colnames(tbl)) { cftr <- col.list[cftr]
col.list[i] <- toupper(col.list[i]) cfur <- col.list[cfur]
} else if (tolower(col.list[i]) %in% colnames(tbl)) { cipr <- col.list[cipr]
col.list[i] <- tolower(col.list[i]) clar <- col.list[clar]
} else if (!col.list[i] %in% colnames(tbl)) { clin <- col.list[clin]
col.list[i] <- NA clox <- col.list[clox]
} doxy <- col.list[doxy]
} gent <- col.list[gent]
if (!all(col.list %in% colnames(tbl))) { line <- col.list[line]
if (info == TRUE) { mero <- col.list[mero]
warning('These columns do not exist and will be ignored: ', peni <- col.list[peni]
col.list.bak[!(col.list %in% colnames(tbl))] %>% toString(), pita <- col.list[pita]
immediate. = TRUE, rifa <- col.list[rifa]
call. = FALSE) teic <- col.list[teic]
} trsu <- col.list[trsu]
} vanc <- col.list[vanc]
amox <- col.list[1]
cfot <- col.list[2]
cfta <- col.list[3]
cftr <- col.list[4]
cfur <- col.list[5]
cipr <- col.list[6]
clar <- col.list[7]
clin <- col.list[8]
clox <- col.list[9]
doxy <- col.list[10]
gent <- col.list[11]
line <- col.list[12]
mero <- col.list[13]
peni <- col.list[14]
pita <- col.list[15]
rifa <- col.list[16]
teic <- col.list[17]
trsu <- col.list[18]
vanc <- col.list[19]
# join microorganisms # join microorganisms
tbl <- tbl %>% left_join_microorganisms(col_bactid) tbl <- tbl %>% left_join_microorganisms(col_bactid)

361
R/freq.R Normal file
View File

@ -0,0 +1,361 @@
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# AUTHORS #
# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) #
# #
# LICENCE #
# This program is free software; you can redistribute it and/or modify #
# it under the terms of the GNU General Public License version 2.0, #
# as published by the Free Software Foundation. #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# ==================================================================== #
#' Frequency table
#'
#' Create a frequency table of a vector of data, a single column or a maximum of 9 columns of a data frame. Supports markdown for reports.
#' @param x data
#' @param sort.count Sort on count. Use \code{FALSE} to sort alphabetically on item.
#' @param nmax number of row to print. Use \code{nmax = 0} or \code{nmax = NA} to print all rows.
#' @param na.rm a logical value indicating whether NA values should be removed from the frequency table. The header will always print the amount of\code{NA}s.
#' @param markdown print table in markdown format (this forces \code{nmax = NA})
#' @param toConsole Print table to the console. Use \code{FALSE} to assign the table to an object.
#' @param digits how many significant digits are to be used for numeric values (not for the items themselves, that depends on \code{\link{getOption}("digits")})
#' @param sep a character string to separate the terms when selecting multiple columns
#' @details For numeric values, the next values will be calculated and shown into the header:
#' \itemize{
#' \item{Mean, using \code{\link[base]{mean}}}
#' \item{Standard deviation, using \code{\link[stats]{sd}}}
#' \item{Five numbers of Tukey (min, Q1, median, Q3, max), using \code{\link[stats]{fivenum}}}
#' \item{Outliers (count and list), using \code{\link{boxplot.stats}}}
#' \item{Coefficient of variation (CV), the standard deviation divided by the mean}
#' \item{Coefficient of quartile variation (CQV, sometimes called coefficient of dispersion), calculated as \code{(Q3 - Q1) / (Q3 + Q1)} using \code{\link{quantile}} with \code{type = 6} as quantile algorithm to comply with SPSS standards}
#' }
#' @importFrom stats fivenum sd quantile
#' @importFrom grDevices boxplot.stats
#' @importFrom dplyr %>% select pull n_distinct group_by arrange desc mutate summarise
#' @keywords summary summarise frequency freq
#' @rdname freq
#' @export
#' @examples
#' library(dplyr)
#'
#' freq(septic_patients$hospital_id)
#'
#' septic_patients %>%
#' filter(hospital_id == "A") %>%
#' select(bactid) %>%
#' freq()
#'
#' # select multiple columns; they will be pasted together
#' septic_patients %>%
#' left_join_microorganisms %>%
#' filter(hospital_id == "A") %>%
#' select(genus, species) %>%
#' freq()
#'
#' # save frequency table to an object
#' years <- septic_patients %>%
#' mutate(year = format(date, "%Y")) %>%
#' select(year) %>%
#' freq(toConsole = FALSE)
freq <- function(x,
sort.count = TRUE,
nmax = 15,
na.rm = TRUE,
markdown = FALSE,
toConsole = TRUE,
digits = 2,
sep = " ") {
mult.columns <- 0
if (NROW(x) == 0) {
cat('\nNo observations.\n')
return(invisible())
}
if (!is.null(ncol(x))) {
if (ncol(x) == 1 & any(class(x) == 'data.frame')) {
x <- x %>% pull(1)
} else if (ncol(x) < 10) {
mult.columns <- ncol(x)
colnames(x) <- LETTERS[1:ncol(x)]
if (ncol(x) == 2) {
x$total <- paste(x$A %>% as.character(),
x$B %>% as.character(),
sep = sep)
} else if (ncol(x) == 3) {
x$total <- paste(x$A %>% as.character(),
x$B %>% as.character(),
x$C %>% as.character(),
sep = sep)
} else if (ncol(x) == 4) {
x$total <- paste(x$A %>% as.character(),
x$B %>% as.character(),
x$C %>% as.character(),
x$D %>% as.character(),
sep = sep)
} else if (ncol(x) == 5) {
x$total <- paste(x$A %>% as.character(),
x$B %>% as.character(),
x$C %>% as.character(),
x$D %>% as.character(),
x$E %>% as.character(),
sep = sep)
} else if (ncol(x) == 6) {
x$total <- paste(x$A %>% as.character(),
x$B %>% as.character(),
x$C %>% as.character(),
x$D %>% as.character(),
x$E %>% as.character(),
x$F %>% as.character(),
sep = sep)
} else if (ncol(x) == 7) {
x$total <- paste(x$A %>% as.character(),
x$B %>% as.character(),
x$C %>% as.character(),
x$D %>% as.character(),
x$E %>% as.character(),
x$F %>% as.character(),
x$G %>% as.character(),
sep = sep)
} else if (ncol(x) == 8) {
x$total <- paste(x$A %>% as.character(),
x$B %>% as.character(),
x$C %>% as.character(),
x$D %>% as.character(),
x$E %>% as.character(),
x$F %>% as.character(),
x$G %>% as.character(),
x$H %>% as.character(),
sep = sep)
} else if (ncol(x) == 9) {
x$total <- paste(x$A %>% as.character(),
x$B %>% as.character(),
x$C %>% as.character(),
x$D %>% as.character(),
x$E %>% as.character(),
x$F %>% as.character(),
x$G %>% as.character(),
x$H %>% as.character(),
x$I %>% as.character(),
sep = sep)
}
x <- x$total
} else {
stop('A maximum of 9 columns can be analysed at the same time.', call. = FALSE)
}
}
if (markdown == TRUE & toConsole == FALSE) {
warning('`toConsole = FALSE` will be ignored when `markdown = TRUE`.')
}
if (mult.columns > 1) {
NAs <- x[is.na(x) | x == trimws(strrep('NA ', mult.columns))]
} else {
NAs <- x[is.na(x)]
}
if (na.rm == TRUE) {
x <- x[!x %in% NAs]
}
if (missing(sort.count) & any(class(x) %in% c('double', 'integer', 'numeric', 'raw', 'single', 'factor'))) {
# sort on item/level at default when x is numeric or a factor and sort.count is not set
sort.count <- FALSE
}
header <- character(0)
markdown_line <- ''
if (markdown == TRUE) {
markdown_line <- '\n'
}
x_align <- 'l'
if (mult.columns > 0) {
header <- header %>% paste0(markdown_line, 'Columns: ', mult.columns)
} else {
header <- header %>% paste0(markdown_line, 'Class: ', class(x) %>% rev() %>% paste(collapse = " > "))
}
if (is.list(x) | is.matrix(x) | is.environment(x) | is.function(x)) {
cat(header, "\n")
stop('`freq()` does not support lists, matrices, environments or functions.', call. = FALSE)
}
header <- header %>% paste0(markdown_line, '\nLength: ', (NAs %>% length() + x %>% length()) %>% format(),
' (of which NA: ', NAs %>% length() %>% format(),
' = ', (NAs %>% length() / (NAs %>% length() + x %>% length())) %>% percent(force_zero = TRUE), ')')
header <- header %>% paste0(markdown_line, '\nUnique: ', x %>% n_distinct() %>% format())
header.numbers.done <- FALSE
if (any(class(x) %in% c('double', 'integer', 'numeric', 'raw', 'single'))) {
# right align number
x_align <- 'r'
header <- header %>% paste0('\n')
header <- header %>% paste(markdown_line, '\nMean: ', x %>% base::mean(na.rm = TRUE) %>% format(digits = digits))
header <- header %>% paste0(markdown_line, '\nStd. dev.: ', x %>% stats::sd(na.rm = TRUE) %>% format(digits = digits),
' (CV: ', x %>% cv(na.rm = TRUE) %>% format(digits = digits), ')')
header <- header %>% paste0(markdown_line, '\nFive-Num: ', x %>% stats::fivenum(na.rm = TRUE) %>% format(digits = digits) %>% trimws() %>% paste(collapse = ' | '),
' (CQV: ', x %>% cqv(na.rm = TRUE) %>% format(digits = digits), ')')
outlier_length <- length(boxplot.stats(x)$out)
header <- header %>% paste0(markdown_line, '\nOutliers: ', outlier_length)
if (outlier_length > 0) {
header <- header %>% paste0(' (unique: ', boxplot.stats(x)$out %>% unique() %>% length(), ')')
}
}
formatdates <- "%e %B %Y" # = d mmmm yyyy
if (any(class(x) == 'hms')) {
x <- x %>% as.POSIXlt()
formatdates <- "%H:%M:%S"
}
if (any(class(x) %in% c('Date', 'POSIXct', 'POSIXlt'))) {
header <- header %>% paste0('\n')
mindatum <- x %>% min()
maxdatum <- x %>% max()
header <- header %>% paste0(markdown_line, '\nOldest: ', mindatum %>% format(formatdates) %>% trimws())
header <- header %>% paste0(markdown_line, '\nNewest: ', maxdatum %>% format(formatdates) %>% trimws(),
' (+', difftime(maxdatum, mindatum, units = 'auto') %>% as.double() %>% format(), ')')
}
if (any(class(x) == 'POSIXlt')) {
x <- x %>% format(formatdates)
}
if (toConsole == TRUE) {
cat(header)
}
if (all(is.na(x))) {
cat('\n\nNo observations.\n')
return(invisible())
}
if (n_distinct(x) == length(x)) {
warning('All observations are unique.', call. = FALSE)
}
if (nmax == 0 | is.na(nmax)) {
nmax <- length(x)
}
nmax.1 <- min(length(x), nmax + 1)
# create table with counts and percentages
if (any(class(x) == 'factor')) {
df <- tibble::tibble(Item = x,
Fctlvl = x %>% as.integer()) %>%
group_by(Item, Fctlvl)
column_names <- c('Item', 'Count', 'Percent', 'Cum. Count', 'Cum. Percent', '(Factor Level)')
column_align <- c('l', 'r', 'r', 'r', 'r', 'r')
} else {
df <- tibble::tibble(Item = x) %>%
group_by(Item)
column_names <- c('Item', 'Count', 'Percent', 'Cum. Count', 'Cum. Percent')
column_align <- c(x_align, 'r', 'r', 'r', 'r')
}
df <- df %>%
summarise(Count = n(),
Percent = (n() / length(x)) %>% percent(force_zero = TRUE))
if (df$Item %>% paste(collapse = ',') %like% '\033') {
df <- df %>%
mutate(Item = Item %>%
# remove escape char
# see https://en.wikipedia.org/wiki/Escape_character#ASCII_escape_character
gsub('\033', ' ', ., fixed = TRUE))
}
# sort according to setting
if (sort.count == TRUE) {
df <- df %>% arrange(desc(Count))
} else {
if (any(class(x) == 'factor')) {
df <- df %>% arrange(Fctlvl)
} else {
df <- df %>% arrange(Item)
}
}
# add cumulative values
df$Cum <- cumsum(df$Count)
df$CumTot <- (df$Cum / sum(df$Count, na.rm = TRUE)) %>% percent(force_zero = TRUE)
df$Cum <- df$Cum %>% format()
if (any(class(x) == 'factor')) {
# put factor last
df <- df %>% select(Item, Count, Percent, Cum, CumTot, Fctlvl)
}
if (markdown == TRUE) {
tblformat <- 'markdown'
} else {
tblformat <- 'pandoc'
}
if (toConsole == FALSE) {
# assign to object
df[, 3] <- df[, 2] / sum(df[, 2], na.rm = TRUE)
df[, 4] <- cumsum(df[, 2])
df[, 5] <- df[, 4] / sum(df[, 2], na.rm = TRUE)
return(df)
} else {
# save old NA setting for kable
opt.old <- options()$knitr.kable.NA
options(knitr.kable.NA = "<NA>")
Count.rest <- sum(df[nmax.1:nrow(df), 'Count'], na.rm = TRUE)
if (any(class(x) %in% c('double', 'integer', 'numeric', 'raw', 'single'))) {
df <- df %>% mutate(Item = format(Item))
}
df <- df %>% mutate(Count = format(Count))
if (nrow(df) > nmax.1 & markdown == FALSE) {
df2 <- df[1:nmax,]
print(
knitr::kable(df2,
format = tblformat,
col.names = column_names,
align = column_align,
padding = 1)
)
cat('... and ',
format(nrow(df) - nmax),
' more ',
paste0('(n = ',
format(Count.rest),
'; ',
(Count.rest / length(x)) %>% percent(force_zero = TRUE),
')'),
'. Use `nmax` to show more rows.\n\n', sep = '')
} else {
print(
knitr::kable(df,
format = tblformat,
col.names = column_names,
align = column_align,
padding = 1)
)
}
cat('\n')
# reset old kable setting
options(knitr.kable.NA = opt.old)
return(invisible())
}
}
#' @rdname freq
#' @export
frequency_tbl <- freq

View File

@ -16,16 +16,20 @@
# GNU General Public License for more details. # # GNU General Public License for more details. #
# ==================================================================== # # ==================================================================== #
globalVariables(c('.', globalVariables(c('abname',
'abname',
'bactid', 'bactid',
'cnt', 'cnt',
'Count',
'Cum',
'CumTot',
'date_lab', 'date_lab',
'days_diff', 'days_diff',
'Fctlvl',
'first_isolate_row_index', 'first_isolate_row_index',
'fullname', 'fullname',
'genus', 'genus',
'gramstain', 'gramstain',
'Item',
'key_ab', 'key_ab',
'key_ab_lag', 'key_ab_lag',
'key_ab_other', 'key_ab_other',
@ -34,6 +38,9 @@ globalVariables(c('.',
'n', 'n',
'other_pat_or_mo', 'other_pat_or_mo',
'patient_id', 'patient_id',
'Percent',
'quantile',
'real_first_isolate', 'real_first_isolate',
'species', 'species',
'y')) 'y',
'.'))

203
R/mdro.R Normal file
View File

@ -0,0 +1,203 @@
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# AUTHORS #
# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) #
# #
# LICENCE #
# This program is free software; you can redistribute it and/or modify #
# it under the terms of the GNU General Public License version 2.0, #
# as published by the Free Software Foundation. #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# ==================================================================== #
#' Determine multidrug-resistant organisms (MDRO)
#'
#' Determine which isolates are multidrug-resistant organisms (MDRO) according to country-specific guidelines.
#' @param tbl table with antibiotic columns, like e.g. \code{amox} and \code{amcl}
#' @param country country to determine guidelines. Should be a code from the \href{https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2#Officially_assigned_code_elements}{list of ISO 3166-1 alpha-2 country codes}. Case-insensitive. Currently supported are \code{de} (Germany) and \code{nl} (the Netherlands).
#' @param col_bactid column name of the bacteria ID in \code{tbl} - values of this column should be present in \code{microorganisms$bactid}, see \code{\link{microorganisms}}
#' @param info print progress
#' @param aminoglycosides,quinolones,carbapenems character vector with column names of antibiotics
#' @param ceftazidime,piperacillin,trimethoprim_sulfa,penicillin,vancomycin column names of antibiotics
#' @param ... parameters that are passed on to \code{MDR}
#' @return Ordered factor with values \code{Positive}, \code{Unconfirmed}, \code{Negative}.
#' @rdname MDRO
#' @export
MDRO <- function(tbl,
country,
col_bactid = 'bactid',
info = TRUE,
aminoglycosides = c('gent', 'tobr', 'kana'),
quinolones = c('cipr', 'norf'),
carbapenems = c('imip', 'mero', 'erta'),
ceftazidime = 'cfta',
piperacillin = 'pita',
trimethoprim_sulfa = 'trsu',
penicillin = 'peni',
vancomycin = 'vanc') {
# strip whitespaces
country <- trimws(country)
if (length(country) > 1) {
stop('`country` must be a length one character string.', call. = FALSE)
}
if (!country %like% '^[a-z]{2}$') {
stop('This is not a valid ISO 3166-1 alpha-2 country code: "', country, '". Please see ?MDRO.', call. = FALSE)
}
# create list and make country code case-independent
guideline <- list(country = list(code = tolower(country)))
# support per country
if (guideline$country$code == 'de') {
guideline$country$name <- 'Germany'
guideline$name <- ''
guideline$version <- ''
guideline$source <- ''
} else if (guideline$country$code == 'nl') {
guideline$country$name <- 'The Netherlands'
guideline$name <- 'WIP-Richtlijn BRMO'
guideline$version <- 'Revision of December 2017'
guideline$source <- 'https://www.rivm.nl/Documenten_en_publicaties/Professioneel_Praktisch/Richtlijnen/Infectieziekten/WIP_Richtlijnen/WIP_Richtlijnen/Ziekenhuizen/WIP_richtlijn_BRMO_Bijzonder_Resistente_Micro_Organismen_ZKH'
# add here more countries like this:
# } else if (country$code == 'AA') {
# country$name <- 'country name'
} else {
stop('This country code is currently unsupported: ', guideline$country$code, call. = FALSE)
}
# Console colours
# source: http://www.tldp.org/HOWTO/Bash-Prompt-HOWTO/x329.html
ANSI_red <- "\033[31m"
ANSI_blue <- "\033[34m"
ANSI_reset <- "\033[0m"
if (info == TRUE) {
cat("Determining Highly Resistant Microorganisms (MDRO), according to:\n",
"Guideline: ", ANSI_red, guideline$name, ", ", guideline$version, ANSI_reset, "\n",
"Country : ", ANSI_red, guideline$country$name, ANSI_reset, "\n",
"Source : ", ANSI_blue, guideline$source, ANSI_reset, "\n",
"\n", sep = "")
}
# join microorganisms
tbl <- tbl %>% left_join_microorganisms(col_bactid)
tbl$MDRO <- 1
if (guideline$country$code == 'nl') {
# BRMO; Bijzonder Resistente Micro-Organismen
aminoglycosides <- aminoglycosides[aminoglycosides %in% colnames(tbl)]
quinolones <- quinolones[quinolones %in% colnames(tbl)]
carbapenems <- carbapenems[carbapenems %in% colnames(tbl)]
if (!ceftazidime %in% colnames(tbl)) { ceftazidime <- NA }
if (!piperacillin %in% colnames(tbl)) { piperacillin <- NA }
if (!trimethoprim_sulfa %in% colnames(tbl)) { trimethoprim_sulfa <- NA }
if (!penicillin %in% colnames(tbl)) { penicillin <- NA }
if (!vancomycin %in% colnames(tbl)) { vancomycin <- NA }
# Table 1
tbl[which(
tbl$family == 'Enterobacteriaceae'
& rowSums(tbl[, aminoglycosides] == 'R', na.rm = TRUE) >= 1
& rowSums(tbl[, quinolones] == 'R', na.rm = TRUE) >= 1
), 'MDRO'] <- 4
tbl[which(
tbl$family == 'Enterobacteriaceae'
& rowSums(tbl[, carbapenems] == 'R', na.rm = TRUE) >= 1
), 'MDRO'] <- 3
# rest is negative
tbl[which(
tbl$family == 'Enterobacteriaceae'
& tbl$MDRO == 1
), 'MDRO'] <- 2
# Table 2
tbl[which(
tbl$genus == 'Acinetobacter'
& rowSums(tbl[, carbapenems] == 'R', na.rm = TRUE) >= 1
), 'MDRO'] <- 3
tbl[which(
tbl$genus == 'Acinetobacter'
& rowSums(tbl[, aminoglycosides] == 'R', na.rm = TRUE) >= 1
& rowSums(tbl[, quinolones] == 'R', na.rm = TRUE) >= 1
), 'MDRO'] <- 4
# rest of Acinetobacter is negative
tbl[which(
tbl$genus == 'Acinetobacter'
& tbl$MDRO == 1
), 'MDRO'] <- 2
tbl[which(
tbl$fullname %like% 'Stenotrophomonas maltophilia'
& tbl[, trimethoprim_sulfa] == 'R'
), 'MDRO'] <- 4
# rest of Stenotrophomonas is negative
tbl[which(
tbl$fullname %like% 'Stenotrophomonas maltophilia'
& tbl$MDRO == 1
), 'MDRO'] <- 2
tbl[which(
tbl$fullname %like% 'Pseudomonas aeruginosa'
& sum(rowSums(tbl[, carbapenems] == 'R', na.rm = TRUE) >= 1,
rowSums(tbl[, aminoglycosides] == 'R', na.rm = TRUE) >= 1,
rowSums(tbl[, quinolones] == 'R', na.rm = TRUE) >= 1,
tbl[, ceftazidime] == 'R',
tbl[, piperacillin] == 'R') >= 3
), 'MDRO'] <- 4
# rest of Pseudomonas is negative
tbl[which(
tbl$fullname %like% 'Pseudomonas aeruginosa'
& tbl$MDRO == 1
), 'MDRO'] <- 2
# Table 3
tbl[which(
tbl$fullname %like% 'Streptococcus pneumoniae'
& tbl[, penicillin] == 'R'
), 'MDRO'] <- 4
tbl[which(
tbl$fullname %like% 'Streptococcus pneumoniae'
& tbl[, vancomycin] == 'R'
), 'MDRO'] <- 4
# rest of Streptococcus pneumoniae is negative
tbl[which(
tbl$fullname %like% 'Streptococcus pneumoniae'
& tbl$MDRO == 1
), 'MDRO'] <- 2
tbl[which(
tbl$fullname %like% 'Enterococcus faecium'
& rowSums(tbl[, c(penicillin, vancomycin)] == 'R', na.rm = TRUE) >= 1
), 'MDRO'] <- 4
# rest of Enterococcus faecium is negative
tbl[which(
tbl$fullname %like% 'Enterococcus faecium'
& tbl$MDRO == 1
), 'MDRO'] <- 2
}
factor(x = tbl$MDRO,
levels = c(1:4),
labels = c('Unknown', 'Negative', 'Unconfirmed', 'Positive'),
ordered = TRUE)
}
#' @rdname MDRO
#' @export
BRMO <- function(tbl, country = "nl", ...) {
MDRO(tbl = tbl, country = country, ...)
}
#' @rdname MDRO
#' @export
MRGN <- function(tbl, country = "de", ...) {
MDRO(tbl = tbl, country = country, ...)
}

View File

@ -16,18 +16,98 @@
# GNU General Public License for more details. # # GNU General Public License for more details. #
# ==================================================================== # # ==================================================================== #
# No export, no Rd #' Pattern Matching
"%like%" <- function(vector, pattern) { #'
# Source: https://github.com/Rdatatable/data.table/blob/master/R/like.R #' Convenience function to compare a vector with a pattern, like \code{\link[base]{grep}}. It always returns a \code{logical} vector and is always case-insensitive.
# But made it case insensitive #' @inheritParams base::grep
if (is.factor(vector)) { #' @return A \code{logical} vector
as.integer(vector) %in% grep(pattern, levels(vector), ignore.case = TRUE) #' @name like
#' @rdname like
#' @export
#' @source Inherited from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default.
#' @examples
#' library(dplyr)
#' # get unique occurences of bacteria whose name start with 'Ent'
#' septic_patients %>%
#' left_join_microorganisms() %>%
#' filter(fullname %like% '^Ent') %>%
#' pull(fullname) %>%
#' unique()
"%like%" <- function(x, pattern) {
if (length(pattern) > 1) {
pattern <- pattern[1]
warning('only the first element of argument `pattern` used for `%like%`', call. = FALSE)
}
if (is.factor(x)) {
as.integer(x) %in% base::grep(pattern, levels(x), ignore.case = TRUE)
} else { } else {
grepl(pattern, vector, ignore.case = TRUE) base::grepl(pattern, x, ignore.case = TRUE)
} }
} }
# No export, no Rd # No export, no Rd
percent <- function(x, round = 1, ...) { percent <- function(x, round = 1, force_zero = FALSE, ...) {
base::paste0(base::round(x * 100, digits = round), "%") val <- base::round(x * 100, digits = round)
if (force_zero & any(val == as.integer(val))) {
val[val == as.integer(val)] <- paste0(val[val == as.integer(val)], ".", base::strrep(0, round))
}
base::paste0(val, "%")
}
check_available_columns <- function(tbl, col.list, info = TRUE) {
# check columns
col.list <- col.list[!is.na(col.list)]
names(col.list) <- col.list
col.list.bak <- col.list
# are they available as upper case or lower case then?
for (i in 1:length(col.list)) {
if (toupper(col.list[i]) %in% colnames(tbl)) {
col.list[i] <- toupper(col.list[i])
} else if (tolower(col.list[i]) %in% colnames(tbl)) {
col.list[i] <- tolower(col.list[i])
} else if (!col.list[i] %in% colnames(tbl)) {
col.list[i] <- NA
}
}
if (!all(col.list %in% colnames(tbl))) {
if (info == TRUE) {
warning('These columns do not exist and will be ignored: ',
col.list.bak[!(col.list %in% colnames(tbl))] %>% toString(),
immediate. = TRUE,
call. = FALSE)
}
}
col.list
}
# Coefficient of variation (CV)
cv <- function(x, na.rm = TRUE) {
cv.x <- sd(x, na.rm = na.rm) / abs(mean(x, na.rm = na.rm))
cv.x
}
# Coefficient of dispersion, or coefficient of quartile variation (CQV).
# (Bonett et al., 2006: Confidence interval for a coefficient of quartile variation).
cqv <- function(x, na.rm = TRUE) {
cqv.x <-
(quantile(x, 0.75, na.rm = na.rm, type = 6) - quantile(x, 0.25, na.rm = na.rm, type = 6)) /
(quantile(x, 0.75, na.rm = na.rm, type = 6) + quantile(x, 0.25, na.rm = na.rm, type = 6))
unname(cqv.x)
}
# show bytes as kB/MB/GB
# size_humanreadable(123456) # 121 kB
# size_humanreadable(12345678) # 11.8 MB
size_humanreadable <- function(bytes, decimals = 1) {
bytes <- bytes %>% as.double()
# Adapted from:
# http://jeffreysambells.com/2012/10/25/human-readable-filesize-php
size <- c('B','kB','MB','GB','TB','PB','EB','ZB','YB')
factor <- floor((nchar(bytes) - 1) / 3)
# added slight improvement; no decimals for B and kB:
decimals <- rep(decimals, length(bytes))
decimals[size[factor + 1] %in% c('B', 'kB')] <- 0
out <- paste(sprintf(paste0("%.", decimals, "f"), bytes / (1024 ^ factor)), size[factor + 1])
out
} }

349
R/print.R Normal file
View File

@ -0,0 +1,349 @@
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# AUTHORS #
# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) #
# #
# LICENCE #
# This program is free software; you can redistribute it and/or modify #
# it under the terms of the GNU General Public License version 2.0, #
# as published by the Free Software Foundation. #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# ==================================================================== #
#' Printing Data Tables and Tibbles
#'
#' Print a data table or tibble. It prints: \cr- The \strong{first and last rows} like \code{data.table}s are printed by the \code{data.table} package,\cr- A \strong{header} and \strong{left aligned text} like \code{tibble}s are printed by the \code{tibble} package with info about grouped variables,\cr- \strong{Unchanged values} and \strong{support for row names} like \code{data.frame}s are printed by the \code{base} package.
#' @inheritParams base::print.data.frame
#' @param nmax amount of rows to print in total. When the total amount of rows exceeds this limit, the first and last \code{nmax / 2} rows will be printed. Use \code{nmax = NA} to print all rows.
#' @param header print header with information about data size and tibble grouping
#' @param print.keys print keys for \code{data.table}
#' @param na value to print instead of NA
#' @param width amount of white spaces to keep between columns, must be at least 1
#' @rdname print
#' @name print
#' @importFrom dplyr %>% n_groups group_vars group_size filter pull select
#' @exportMethod print.tbl_df
#' @export
#' @examples
#' # still showing all values unchanged:
#' library(dplyr)
#' starwars
#' print(starwars, width = 3)
#'
#' # compare the significance notation of this
#' starwars %>% select(birth_year)
#' # with this
#' tibble:::print.tbl_df(starwars %>% select(birth_year))
#'
#' # supports info about groups (look at header)
#' starwars %>% group_by(homeworld, gender)
print.tbl_df <- function(x,
nmax = 10,
header = TRUE,
row.names = TRUE,
right = FALSE,
width = 1,
na = "<NA>",
...) {
prettyprint_df(x = x,
nmax = nmax,
header = header,
row.names = row.names,
print.keys = FALSE,
right = right,
width = width,
na = na,
...)
}
#' @rdname print
#' @exportMethod print.tbl
#' @export
print.tbl <- function(x, ...) {
prettyprint_df(x, ...)
}
#' @rdname print
#' @exportMethod print.data.table
#' @export
print.data.table <- function(x,
nmax = 10,
header = TRUE,
row.names = TRUE,
print.keys = FALSE,
right = FALSE,
width = 1,
na = "<NA>",
...) {
prettyprint_df(x = x,
nmax = nmax,
header = header,
row.names = row.names,
print.keys = print.keys,
right = right,
width = width,
na = na,
...)
}
printDT <- data.table:::print.data.table
prettyprint_df <- function(x,
nmax = 10,
header = TRUE,
row.names = TRUE,
print.keys = FALSE,
right = FALSE,
width = 1,
na = "<NA>",
...) {
ansi_reset <- "\u001B[0m"
ansi_black <- "\u001B[30m"
ansi_red <- "\u001B[31m"
ansi_green <- "\u001B[32m"
ansi_yellow <- "\u001B[33m"
ansi_blue <- "\u001B[34m"
ansi_purple <- "\u001B[35m"
ansi_cyan <- "\u001B[36m"
ansi_white <- "\u001B[37m"
ansi_gray <- "\u001B[38;5;246m"
if (width < 1) {
stop('`width` must be at least 1.', call. = FALSE)
}
if (is.na(nmax)) {
nmax <- NROW(x)
}
n <- nmax
if (n %% 2 == 1) {
# odd number; add 1
n <- n + 1
}
width <- width - 1
if ('tbl_df' %in% class(x)) {
type <- 'tibble'
} else if ('data.table' %in% class(x)) {
type <- 'data.table'
} else {
type <- 'data.frame'
}
if (header == TRUE) {
if (NCOL(x) == 1) {
vars <- 'variable'
} else {
vars <- 'variables'
}
size <- object.size(x) %>% as.double() %>% size_humanreadable()
cat(paste0("A ", type,": ",
format(NROW(x)),
" obs. of ",
format(NCOL(x)),
" ", vars,
ansi_gray, " (", size, ")\n", ansi_reset))
if ('grouped_df' %in% class(x) & n_groups(x) > 0) {
cat(paste0("Grouped by ",
x %>% group_vars() %>% paste0(ansi_red, ., ansi_reset) %>% paste0(collapse = " and "),
ansi_gray,
" (",
x %>% n_groups(),
" groups with sizes between ",
x %>% group_size() %>% min(),
" and ",
x %>% group_size() %>% max(),
")\n",
ansi_reset))
}
if (!is.null(attributes(x)$qry)) {
cat(paste0(ansi_gray, "This data contains a query. Use qry() to view it.\n", ansi_reset))
}
cat("\n")
}
# data.table where keys should be printed
if (print.keys == TRUE) {
printDT(x,
class = header,
row.names = row.names,
print.keys = TRUE,
right = right,
...
)
return(invisible())
}
# tibbles give warning when setting column names
x <- x %>% base::as.data.frame(stringsAsFactors = FALSE)
# extra space of 3 chars, right to row name or number
if (NROW(x) > 0) {
maxrowchars <- rownames(x) %>% nchar() %>% max() + 3
rownames(x) <- paste0(rownames(x), strrep(" ", maxrowchars - nchar(rownames(x))))
} else {
maxrowchars <- 0
}
x.bak <- x
if (n + 1 < nrow(x)) {
# remove in between part, 1 extra for ~~~~ between first and last part
rows_list <- c(1:(n / 2 + 1), (nrow(x) - (n / 2) + 1):nrow(x))
x <- x %>% filter(row_number() %in% rows_list)
rownames(x) <- rownames(x.bak)[rows_list]
# set inbetweener between parts
rownames(x)[n / 2 + 1] <- strrep("~", maxrowchars)
}
if (header == TRUE) {
# add 1 row for classes
# class will be marked up per column
if (NROW(x.bak) > 0) {
rownames.x <- rownames(x)
x <- x %>% filter(row_number() == 1) %>% rbind(x, stringsAsFactors = FALSE)
rownames(x) <- c('*', rownames.x)
}
# select 1st class per column and abbreviate
classes <- x.bak %>%
sapply(class) %>%
lapply(
function(c) {
# do print all POSIX classes like "POSct/t"
if ('POSIXct' %in% c) {
paste0('POS',
c %>%
gsub('POSIX', '', .) %>%
paste0(collapse = '/'))
} else {
c[[1]]
}
}) %>%
unlist() %>%
gsub("character", "chr", ., fixed = TRUE) %>%
gsub("complex", "cplx", ., fixed = TRUE) %>%
gsub("Date", "Date", ., fixed = TRUE) %>%
gsub("double", "dbl", ., fixed = TRUE) %>%
gsub("expression", "expr", ., fixed = TRUE) %>%
gsub("factor", "fct", ., fixed = TRUE) %>%
gsub("IDate", "IDat", ., fixed = TRUE) %>%
gsub("integer", "int", ., fixed = TRUE) %>%
gsub("integer64", "i64", ., fixed = TRUE) %>%
gsub("list", "list", ., fixed = TRUE) %>%
gsub("logical", "lgl", ., fixed = TRUE) %>%
gsub("numeric", "dbl", ., fixed = TRUE) %>%
gsub("ordered", "ord", ., fixed = TRUE) %>%
gsub("percent", "pct", ., fixed = TRUE) %>%
gsub("single", "sgl", ., fixed = TRUE) %>%
paste0("<", ., ">")
}
# markup cols
for (i in 1:ncol(x)) {
if (all(!class(x[, i]) %in% class(x.bak[, i]))) {
class(x[, i]) <- class(x.bak[, i])
}
try(x[, i] <- format(x %>% pull(i)), silent = TRUE)
# replace NAs
if (nchar(na) < 2) {
# make as long as the text "NA"
na <- paste0(na, strrep(" ", 2 - nchar(na)))
}
try(x[, i] <- gsub("^NA$", na, trimws(x[, i], 'both')), silent = TRUE)
# place class into 1st row
if (header == TRUE) {
x[1, i] <- classes[i]
}
# dashes between two parts when exceeding nmax
maxvalchars <- max(colnames(x)[i] %>% nchar(), x[, i] %>% nchar() %>% max())
if (n + 1 < nrow(x.bak)) {
x[n / 2 + if_else(header == TRUE, 2, 1), i] <- strrep("~", maxvalchars)
}
# align according to `right` parameter, but only factors and text, but not MICs
if (any(x.bak %>% pull(i) %>% class() %in% c('factor', 'character'))
& !("mic" %in% (x.bak %>% pull(i) %>% class()))) {
vals <- x %>% pull(i) %>% trimws('both')
colname <- colnames(x)[i] %>% trimws('both')
if (right == FALSE) {
vals <- paste0(vals, strrep(" ", maxvalchars - nchar(vals)))
colname <- paste0(colname, strrep(" ", maxvalchars - nchar(colname)))
} else {
vals <- paste0(strrep(" ", maxvalchars - nchar(vals)), vals)
colname <- paste0(strrep(" ", maxvalchars - nchar(colname)), colname)
}
x[, i] <- vals
colnames(x)[i] <- colname
}
# add left padding according to `width` parameter
# but not in 1st col when row names are off
if (row.names == TRUE | i > 1) {
x[, i] <- paste0(strrep(" ", width), x[, i])
colnames(x)[i] <- paste0(strrep(" ", width), colnames(x)[i])
}
# strip columns that do not fit (3 chars as margin)
width_console <- options()$width
width_until_col <- x %>%
select(1:i) %>%
apply(1, paste, collapse = strrep(" ", width + 1)) %>%
nchar() %>%
max()
width_until_col_before <- x %>%
select(1:(max(i, 2) - 1)) %>%
apply(1, paste, collapse = strrep(" ", width + 1)) %>%
nchar() %>%
max()
extraspace <- maxrowchars + nchar(rownames(x)[length(rownames(x))])
width_until_colnames <- colnames(x)[1:i] %>% paste0(collapse = strrep(" ", width + 1)) %>% nchar() + extraspace
width_until_colnames_before <- colnames(x)[1:(max(i, 2) - 1)] %>% paste0(collapse = strrep(" ", width + 1)) %>% nchar() + extraspace
if (i > 1 &
(width_until_col > width_console
| width_until_colnames > width_console)) {
if (width_until_col_before > width_console
| width_until_colnames_before > width_console) {
x <- x[, 1:(i - 2)]
} else {
x <- x[, 1:(i - 1)]
}
break
}
}
# empty table, row name of header should be "*"
if (NROW(x.bak) == 0) {
rownames(x) <- '* '
}
# and here it is...
suppressWarnings(
base::print.data.frame(x, row.names = row.names, ...)
)
# print rest of col names when they were stripped
if (ncol(x) < ncol(x.bak)) {
x.notshown <- x.bak %>% select((ncol(x) + 1):ncol(x.bak))
if (ncol(x.notshown) == 1) {
cat('...and 1 more column: ')
} else {
cat('...and', ncol(x.notshown), 'more columns: ')
}
cat(x.notshown %>%
colnames() %>%
paste0(' ', ansi_gray, classes[(ncol(x) + 1):ncol(x.bak)], ansi_reset) %>%
paste0(collapse = ", "), '\n')
}
}

View File

@ -90,6 +90,104 @@ after
# 5 PSEAER R R - - R # 5 PSEAER R R - - R
``` ```
### Frequency tables
Base R lacks a simple function to create frequency tables. We created such a function that works with almost all data types: `freq()` (or `frequency_tbl()`).
```r
## Factors sort on item by default:
freq(septic_patients$hospital_id)
# Class: factor
# Length: 2000 (of which NA: 0 = 0.0%)
# Unique: 5
#
# Item Count Percent Cum. Count Cum. Percent (Factor Level)
# ----- ------ -------- ----------- ------------- ---------------
# A 233 11.7% 233 11.7% 1
# B 583 29.1% 816 40.8% 2
# C 221 11.1% 1037 51.8% 3
# D 650 32.5% 1687 84.4% 4
# E 313 15.7% 2000 100.0% 5
## This can be changed with the `sort.count` parameter:
freq(septic_patients$hospital_id, sort.count = TRUE)
# Class: factor
# Length: 2000 (of which NA: 0 = 0.0%)
# Unique: 5
#
# Item Count Percent Cum. Count Cum. Percent (Factor Level)
# ----- ------ -------- ----------- ------------- ---------------
# D 650 32.5% 650 32.5% 4
# B 583 29.1% 1233 61.7% 2
# E 313 15.7% 1546 77.3% 5
# A 233 11.7% 1779 88.9% 1
# C 221 11.1% 2000 100.0% 3
## Other types, like numbers or dates, sort on count by default:
> freq(septic_patients$date)
# Class: Date
# Length: 2000 (of which NA: 0 = 0.0%)
# Unique: 1662
#
# Oldest: 2 January 2001
# Newest: 18 October 2017 (+6133)
#
# Item Count Percent Cum. Count Cum. Percent
# ----------- ------ -------- ----------- -------------
# 2008-12-24 5 0.2% 5 0.2%
# 2010-12-10 4 0.2% 9 0.4%
# 2011-03-03 4 0.2% 13 0.6%
# 2013-06-24 4 0.2% 17 0.8%
# 2017-09-01 4 0.2% 21 1.1%
# 2002-09-02 3 0.2% 24 1.2%
# 2003-10-14 3 0.2% 27 1.4%
# 2004-06-25 3 0.2% 30 1.5%
# 2004-06-27 3 0.2% 33 1.7%
# 2004-10-29 3 0.2% 36 1.8%
# 2005-09-27 3 0.2% 39 2.0%
# 2006-08-01 3 0.2% 42 2.1%
# 2006-10-10 3 0.2% 45 2.2%
# 2007-11-16 3 0.2% 48 2.4%
# 2008-03-09 3 0.2% 51 2.5%
# ... and 1647 more (n = 1949; 97.5%). Use `nmax` to show more rows.
## For numeric values, some extra descriptive statistics will be calculated:
> freq(runif(n = 10, min = 1, max = 5))
# Class: numeric
# Length: 10 (of which NA: 0 = 0.0%)
# Unique: 10
#
# Mean: 3
# Std. dev.: 0.93 (CV: 0.31)
# Five-Num: 1.1 | 2.3 | 3.1 | 3.8 | 4.0 (CQV: 0.25)
# Outliers: 0
#
# Item Count Percent Cum. Count Cum. Percent
# --------- ------ -------- ----------- -------------
# 1.132033 1 10.0% 1 10.0%
# 2.226903 1 10.0% 2 20.0%
# 2.280779 1 10.0% 3 30.0%
# 2.640898 1 10.0% 4 40.0%
# 2.913462 1 10.0% 5 50.0%
# 3.364201 1 10.0% 6 60.0%
# 3.771975 1 10.0% 7 70.0%
# 3.802861 1 10.0% 8 80.0%
# 3.803547 1 10.0% 9 90.0%
# 3.985691 1 10.0% 10 100.0%
#
# Warning message:
# All observations are unique.
```
Learn more about this function with:
```r
?freq
```
### New classes ### New classes
This package contains two new S3 classes: `mic` for MIC values (e.g. from Vitek or Phoenix) and `rsi` for antimicrobial drug interpretations (i.e. S, I and R). Both are actually ordered factors under the hood (an MIC of `2` being higher than `<=1` but lower than `>=32`, and for class `rsi` factors are ordered as `S < I < R`). This package contains two new S3 classes: `mic` for MIC values (e.g. from Vitek or Phoenix) and `rsi` for antimicrobial drug interpretations (i.e. S, I and R). Both are actually ordered factors under the hood (an MIC of `2` being higher than `<=1` but lower than `>=32`, and for class `rsi` factors are ordered as `S < I < R`).
Both classes have extensions for existing generic functions like `print`, `summary` and `plot`. Both classes have extensions for existing generic functions like `print`, `summary` and `plot`.

View File

@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/EUCAST.R % Please edit documentation in R/eucast.R
\name{EUCAST_rules} \name{EUCAST_rules}
\alias{EUCAST_rules} \alias{EUCAST_rules}
\alias{interpretive_reading} \alias{interpretive_reading}

39
man/MDRO.Rd Normal file
View File

@ -0,0 +1,39 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mdro.R
\name{MDRO}
\alias{MDRO}
\alias{BRMO}
\alias{MRGN}
\title{Determine multidrug-resistant organisms (MDRO)}
\usage{
MDRO(tbl, country, col_bactid = "bactid", info = TRUE,
aminoglycosides = c("gent", "tobr", "kana"), quinolones = c("cipr",
"norf"), carbapenems = c("imip", "mero", "erta"), ceftazidime = "cfta",
piperacillin = "pita", trimethoprim_sulfa = "trsu", penicillin = "peni",
vancomycin = "vanc")
BRMO(tbl, country = "nl", ...)
MRGN(tbl, country = "de", ...)
}
\arguments{
\item{tbl}{table with antibiotic columns, like e.g. \code{amox} and \code{amcl}}
\item{country}{country to determine guidelines. Should be a code from the \href{https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2#Officially_assigned_code_elements}{list of ISO 3166-1 alpha-2 country codes}. Case-insensitive. Currently supported are \code{de} (Germany) and \code{nl} (the Netherlands).}
\item{col_bactid}{column name of the bacteria ID in \code{tbl} - values of this column should be present in \code{microorganisms$bactid}, see \code{\link{microorganisms}}}
\item{info}{print progress}
\item{aminoglycosides, quinolones, carbapenems}{character vector with column names of antibiotics}
\item{ceftazidime, piperacillin, trimethoprim_sulfa, penicillin, vancomycin}{column names of antibiotics}
\item{...}{parameters that are passed on to \code{MDR}}
}
\value{
Ordered factor with values \code{Positive}, \code{Unconfirmed}, \code{Negative}.
}
\description{
Determine which isolates are multidrug-resistant organisms (MDRO) according to country-specific guidelines.
}

71
man/freq.Rd Normal file
View File

@ -0,0 +1,71 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/freq.R
\name{freq}
\alias{freq}
\alias{frequency_tbl}
\title{Frequency table}
\usage{
freq(x, sort.count = TRUE, nmax = 15, na.rm = TRUE, markdown = FALSE,
toConsole = TRUE, digits = 2, sep = " ")
frequency_tbl(x, sort.count = TRUE, nmax = 15, na.rm = TRUE,
markdown = FALSE, toConsole = TRUE, digits = 2, sep = " ")
}
\arguments{
\item{x}{data}
\item{sort.count}{Sort on count. Use \code{FALSE} to sort alphabetically on item.}
\item{nmax}{number of row to print. Use \code{nmax = 0} or \code{nmax = NA} to print all rows.}
\item{na.rm}{a logical value indicating whether NA values should be removed from the frequency table. The header will always print the amount of\code{NA}s.}
\item{markdown}{print table in markdown format (this forces \code{nmax = NA})}
\item{toConsole}{Print table to the console. Use \code{FALSE} to assign the table to an object.}
\item{digits}{how many significant digits are to be used for numeric values (not for the items themselves, that depends on \code{\link{getOption}("digits")})}
\item{sep}{a character string to separate the terms when selecting multiple columns}
}
\description{
Create a frequency table of a vector of data, a single column or a maximum of 9 columns of a data frame. Supports markdown for reports.
}
\details{
For numeric values, the next values will be calculated and shown into the header:
\itemize{
\item{Mean, using \code{\link[base]{mean}}}
\item{Standard deviation, using \code{\link[stats]{sd}}}
\item{Five numbers of Tukey (min, Q1, median, Q3, max), using \code{\link[stats]{fivenum}}}
\item{Outliers (count and list), using \code{\link{boxplot.stats}}}
\item{Coefficient of variation (CV), the standard deviation divided by the mean}
\item{Coefficient of quartile variation (CQV, sometimes called coefficient of dispersion), calculated as \code{(Q3 - Q1) / (Q3 + Q1)} using \code{\link{quantile}} with \code{type = 6} as quantile algorithm to comply with SPSS standards}
}
}
\examples{
library(dplyr)
freq(septic_patients$hospital_id)
septic_patients \%>\%
filter(hospital_id == "A") \%>\%
select(bactid) \%>\%
freq()
# select multiple columns; they will be pasted together
septic_patients \%>\%
left_join_microorganisms \%>\%
filter(hospital_id == "A") \%>\%
select(genus, species) \%>\%
freq()
# save frequency table to an object
years <- septic_patients \%>\%
mutate(year = format(date, "\%Y")) \%>\%
select(year) \%>\%
freq(toConsole = FALSE)
}
\keyword{freq}
\keyword{frequency}
\keyword{summarise}
\keyword{summary}

40
man/like.Rd Normal file
View File

@ -0,0 +1,40 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/misc.R
\name{like}
\alias{like}
\alias{\%like\%}
\title{Pattern Matching}
\source{
Inherited from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default.
}
\usage{
x \%like\% pattern
}
\arguments{
\item{x}{a character vector where matches are sought, or an
object which can be coerced by \code{as.character} to a character
vector. \link{Long vectors} are supported.}
\item{pattern}{character string containing a \link{regular expression}
(or character string for \code{fixed = TRUE}) to be matched
in the given character vector. Coerced by
\code{\link{as.character}} to a character string if possible. If a
character vector of length 2 or more is supplied, the first element
is used with a warning. Missing values are allowed except for
\code{regexpr} and \code{gregexpr}.}
}
\value{
A \code{logical} vector
}
\description{
Convenience function to compare a vector with a pattern, like \code{\link[base]{grep}}. It always returns a \code{logical} vector and is always case-insensitive.
}
\examples{
library(dplyr)
# get unique occurences of bacteria whose name start with 'Ent'
septic_patients \%>\%
left_join_microorganisms() \%>\%
filter(fullname \%like\% '^Ent') \%>\%
pull(fullname) \%>\%
unique()
}

View File

@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/EUCAST.R % Please edit documentation in R/eucast.R
\name{mo_property} \name{mo_property}
\alias{mo_property} \alias{mo_property}
\title{Poperties of a microorganism} \title{Poperties of a microorganism}

55
man/print.Rd Normal file
View File

@ -0,0 +1,55 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/print.R
\name{print}
\alias{print}
\alias{print.tbl_df}
\alias{print.tbl}
\alias{print.data.table}
\title{Printing Data Tables and Tibbles}
\usage{
\method{print}{tbl_df}(x, nmax = 10, header = TRUE, row.names = TRUE,
right = FALSE, width = 1, na = "<NA>", ...)
\method{print}{tbl}(x, ...)
\method{print}{data.table}(x, nmax = 10, header = TRUE, row.names = TRUE,
print.keys = FALSE, right = FALSE, width = 1, na = "<NA>", ...)
}
\arguments{
\item{x}{object of class \code{data.frame}.}
\item{nmax}{amount of rows to print in total. When the total amount of rows exceeds this limit, the first and last \code{nmax / 2} rows will be printed. Use \code{nmax = NA} to print all rows.}
\item{header}{print header with information about data size and tibble grouping}
\item{row.names}{logical (or character vector), indicating whether (or
what) row names should be printed.}
\item{right}{logical, indicating whether or not strings should be
right-aligned. The default is right-alignment.}
\item{width}{amount of white spaces to keep between columns, must be at least 1}
\item{na}{value to print instead of NA}
\item{...}{optional arguments to \code{print} or \code{plot} methods.}
\item{print.keys}{print keys for \code{data.table}}
}
\description{
Print a data table or tibble. It prints: \cr- The \strong{first and last rows} like \code{data.table}s are printed by the \code{data.table} package,\cr- A \strong{header} and \strong{left aligned text} like \code{tibble}s are printed by the \code{tibble} package with info about grouped variables,\cr- \strong{Unchanged values} and \strong{support for row names} like \code{data.frame}s are printed by the \code{base} package.
}
\examples{
# still showing all values unchanged:
library(dplyr)
starwars
print(starwars, width = 3)
# compare the significance notation of this
starwars \%>\% select(birth_year)
# with this
tibble:::print.tbl_df(starwars \%>\% select(birth_year))
# supports info about groups (look at header)
starwars \%>\% group_by(homeworld, gender)
}

View File

@ -1,4 +1,4 @@
context("EUCAST.R") context("eucast.R")
test_that("EUCAST rules work", { test_that("EUCAST rules work", {
a <- data.frame(bactid = c("KLEPNE", # Klebsiella pneumoniae a <- data.frame(bactid = c("KLEPNE", # Klebsiella pneumoniae

View File

@ -0,0 +1,26 @@
context("freq.R")
test_that("frequency table works", {
expect_equal(nrow(freq(c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5), toConsole = FALSE)), 5)
expect_equal(nrow(frequency_tbl(c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5), toConsole = FALSE)), 5)
# date column of septic_patients should contain 1662 unique dates
expect_equal(nrow(freq(septic_patients$date, toConsole = FALSE)), 1662)
expect_equal(nrow(freq(septic_patients$date, toConsole = FALSE)),
length(unique(septic_patients$date)))
expect_output(freq(septic_patients$age))
expect_output(freq(septic_patients$date))
expect_output(freq(septic_patients$hospital_id))
library(dplyr)
expect_output(septic_patients %>% select(1:2) %>% freq())
expect_output(septic_patients %>% select(1:3) %>% freq())
expect_output(septic_patients %>% select(1:4) %>% freq())
expect_output(septic_patients %>% select(1:5) %>% freq())
expect_output(septic_patients %>% select(1:6) %>% freq())
expect_output(septic_patients %>% select(1:7) %>% freq())
expect_output(septic_patients %>% select(1:8) %>% freq())
expect_output(septic_patients %>% select(1:9) %>% freq())
})

View File

@ -0,0 +1,16 @@
context("mdro.R")
test_that("MDRO works", {
library(dplyr)
outcome <- MDRO(septic_patients, "nl", info = FALSE)
# check class
expect_equal(outcome %>% class(), c('ordered', 'factor'))
# septic_patients should have these finding using Dutch guidelines
expect_equal(outcome %>% freq(toConsole = FALSE) %>% pull(Count), c(1152, 824, 3, 21))
expect_equal(BRMO(septic_patients, info = FALSE), MDRO(septic_patients, "nl", info = FALSE))
})

View File

@ -10,5 +10,7 @@ test_that("`like` works", {
test_that("percentages works", { test_that("percentages works", {
expect_equal(percent(0.25), "25%") expect_equal(percent(0.25), "25%")
expect_equal(percent(0.5), "50%") expect_equal(percent(0.5), "50%")
expect_equal(percent(0.500, force_zero = TRUE), "50.0%")
expect_equal(percent(0.1234), "12.3%") expect_equal(percent(0.1234), "12.3%")
}) })

View File

@ -0,0 +1,8 @@
context("print.R")
test_that("tibble printing works", {
library(dplyr)
expect_output(print(starwars))
expect_output(print(septic_patients))
})