diff --git a/DESCRIPTION b/DESCRIPTION index 970d1917..fbe4ac27 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 0.1.2 -Date: 2018-04-02 +Version: 0.2.0 +Date: 2018-04-18 Title: Antimicrobial Resistance Analysis Authors@R: c( person( @@ -28,9 +28,12 @@ Depends: R (>= 3.2.0) Imports: dplyr (>= 0.7.0), + data.table (>= 1.10.0), reshape2 (>= 1.4.0), xml2 (>= 1.0.0), - rvest (>= 0.3.2) + knitr (>= 1.0.0), + rvest (>= 0.3.2), + tibble Suggests: testthat (>= 1.0.2), covr (>= 3.0.1) diff --git a/NAMESPACE b/NAMESPACE index 17e409f2..3b36d900 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,11 +7,18 @@ S3method(barplot,mic) S3method(barplot,rsi) S3method(plot,mic) S3method(plot,rsi) +S3method(print,data.table) S3method(print,mic) S3method(print,rsi) +S3method(print,tbl) +S3method(print,tbl_df) S3method(summary,mic) S3method(summary,rsi) +export("%like%") +export(BRMO) export(EUCAST_rules) +export(MDRO) +export(MRGN) export(abname) export(anti_join_microorganisms) export(as.mic) @@ -20,6 +27,8 @@ export(atc_property) export(clipboard_export) export(clipboard_import) export(first_isolate) +export(freq) +export(frequency_tbl) export(full_join_microorganisms) export(guess_bactid) export(inner_join_microorganisms) @@ -41,8 +50,11 @@ exportMethods(barplot.mic) exportMethods(barplot.rsi) exportMethods(plot.mic) exportMethods(plot.rsi) +exportMethods(print.data.table) exportMethods(print.mic) exportMethods(print.rsi) +exportMethods(print.tbl) +exportMethods(print.tbl_df) exportMethods(summary.mic) exportMethods(summary.rsi) importFrom(dplyr,"%>%") @@ -52,15 +64,19 @@ importFrom(dplyr,arrange) importFrom(dplyr,arrange_at) importFrom(dplyr,as_tibble) importFrom(dplyr,between) +importFrom(dplyr,desc) importFrom(dplyr,filter) importFrom(dplyr,filter_at) importFrom(dplyr,group_by) importFrom(dplyr,group_by_at) +importFrom(dplyr,group_size) +importFrom(dplyr,group_vars) importFrom(dplyr,if_else) importFrom(dplyr,lag) importFrom(dplyr,left_join) importFrom(dplyr,mutate) importFrom(dplyr,n_distinct) +importFrom(dplyr,n_groups) importFrom(dplyr,progress_estimated) importFrom(dplyr,pull) importFrom(dplyr,row_number) @@ -69,6 +85,7 @@ importFrom(dplyr,slice) importFrom(dplyr,summarise) importFrom(dplyr,tibble) importFrom(dplyr,vars) +importFrom(grDevices,boxplot.stats) importFrom(graphics,axis) importFrom(graphics,barplot) importFrom(graphics,plot) @@ -76,6 +93,9 @@ importFrom(graphics,text) importFrom(reshape2,dcast) importFrom(rvest,html_nodes) importFrom(rvest,html_table) +importFrom(stats,fivenum) +importFrom(stats,quantile) +importFrom(stats,sd) importFrom(utils,object.size) importFrom(utils,packageDescription) importFrom(utils,read.delim) diff --git a/NEWS b/NEWS deleted file mode 100644 index f4452ba6..00000000 --- a/NEWS +++ /dev/null @@ -1,28 +0,0 @@ -## 0.1.2 -- Added full support for Windows, Linux and macOS -- New function `guess_bactid` to determine the ID of a microorganism based on genus/species or known abbreviations like MRSA -- New functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS -- New algorithm to determine weighted isolates, can now be `"points"` or `"keyantibiotics"`, see `?first_isolate` -- Renamed dataset `ablist` to `antibiotics` -- Renamed dataset `bactlist` to `microorganisms` -- Added more microorganisms to `bactlist` -- Added analysis examples on help page of dataset `septic_patients` -- Added support for character vector in join functions -- Added warnings when applying a join results in more rows after than before the join -- Altered `%like%` to make it case insensitive -- For parameters of functions `first_isolate`, `EUCAST_rules` the column names are now case-insensitive -- Functions `as.rsi` and `as.mic` now add the package name and version as attribute -- Expanded README.md -- Added unit testing with the `testthat` package -- Added build tests for Linux and macOS using Travis CI (https://travis-ci.org/msberends/AMR) -- Added Line coverage checking using CodeCov (https://codecov.io/gh/msberends/AMR/tree/master/R) - -## 0.1.1 -- `EUCAST_rules` applies for amoxicillin even if ampicillin is missing -- Edited column names to comply with GLIMS, the laboratory information system -- Added more valid MIC values -- Renamed 'Daily Defined Dose' to 'Defined Daily Dose' -- Added barplots for `rsi` and `mic` classes - -## 0.1.0 -- First submission to CRAN. diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 00000000..5ed07fc0 --- /dev/null +++ b/NEWS.md @@ -0,0 +1,37 @@ +## 0.2.0 +#### New +- Full support for Windows, Linux and macOS +- Function `guess_bactid` to determine the ID of a microorganism based on genus/species or known abbreviations like MRSA +- Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS +- Function `MDRO` to determine Multi Drug Resistant Organisms (MDRO) with support for country-specific guidelines. Suggest your own via https://github.com/msberends/AMR/issues/new. Functions `BRMO` and `MRGN` are wrappers for Dutch and German guidelines, respectively +- Function `freq` to create frequency tables, with additional info in a header +- New algorithm to determine weighted isolates, can now be `"points"` or `"keyantibiotics"`, see `?first_isolate` +- New print format for tibbles and data.tables + +#### Changed +- Renamed dataset `ablist` to `antibiotics` +- Renamed dataset `bactlist` to `microorganisms` +- Added more microorganisms to `bactlist` +- Added analysis examples on help page of dataset `septic_patients` +- Added support for character vector in `join` functions +- Added warnings when a join results in more rows after than before the join +- Altered `%like%` to make it case insensitive +- For parameters of functions `first_isolate` and `EUCAST_rules` the column names are now case-insensitive +- Functions `as.rsi` and `as.mic` now add the package name and version as attribute + +#### Other +- Expanded README.md with more examples +- Added ORC IDs of authors to DESCRIPTION file +- Added unit testing with the `testthat` package +- Added build tests for Linux and macOS using Travis CI (https://travis-ci.org/msberends/AMR) +- Added Line coverage checking using CodeCov (https://codecov.io/gh/msberends/AMR/tree/master/R) + +## 0.1.1 +- `EUCAST_rules` applies for amoxicillin even if ampicillin is missing +- Edited column names to comply with GLIMS, the laboratory information system +- Added more valid MIC values +- Renamed 'Daily Defined Dose' to 'Defined Daily Dose' +- Added barplots for `rsi` and `mic` classes + +## 0.1.0 +- First submission to CRAN. diff --git a/R/EUCAST.R b/R/eucast.R similarity index 100% rename from R/EUCAST.R rename to R/eucast.R diff --git a/R/first_isolates.R b/R/first_isolates.R index 862b9800..e4f83a59 100644 --- a/R/first_isolates.R +++ b/R/first_isolates.R @@ -439,45 +439,26 @@ key_antibiotics <- function(tbl, col.list <- c(amox, cfot, cfta, cftr, cfur, cipr, clar, clin, clox, doxy, gent, line, mero, peni, pita, rifa, teic, trsu, vanc) - col.list <- col.list[!is.na(col.list)] - col.list.bak <- col.list - # are they available as upper case or lower case then? - for (i in 1:length(col.list)) { - if (toupper(col.list[i]) %in% colnames(tbl)) { - col.list[i] <- toupper(col.list[i]) - } else if (tolower(col.list[i]) %in% colnames(tbl)) { - col.list[i] <- tolower(col.list[i]) - } else if (!col.list[i] %in% colnames(tbl)) { - col.list[i] <- NA - } - } - if (!all(col.list %in% colnames(tbl))) { - if (info == TRUE) { - warning('These columns do not exist and will be ignored: ', - col.list.bak[!(col.list %in% colnames(tbl))] %>% toString(), - immediate. = TRUE, - call. = FALSE) - } - } - amox <- col.list[1] - cfot <- col.list[2] - cfta <- col.list[3] - cftr <- col.list[4] - cfur <- col.list[5] - cipr <- col.list[6] - clar <- col.list[7] - clin <- col.list[8] - clox <- col.list[9] - doxy <- col.list[10] - gent <- col.list[11] - line <- col.list[12] - mero <- col.list[13] - peni <- col.list[14] - pita <- col.list[15] - rifa <- col.list[16] - teic <- col.list[17] - trsu <- col.list[18] - vanc <- col.list[19] + col.list <- check_available_columns(tbl = tbl, col.list = col.list, info = info) + amox <- col.list[amox] + cfot <- col.list[cfot] + cfta <- col.list[cfta] + cftr <- col.list[cftr] + cfur <- col.list[cfur] + cipr <- col.list[cipr] + clar <- col.list[clar] + clin <- col.list[clin] + clox <- col.list[clox] + doxy <- col.list[doxy] + gent <- col.list[gent] + line <- col.list[line] + mero <- col.list[mero] + peni <- col.list[peni] + pita <- col.list[pita] + rifa <- col.list[rifa] + teic <- col.list[teic] + trsu <- col.list[trsu] + vanc <- col.list[vanc] # join microorganisms tbl <- tbl %>% left_join_microorganisms(col_bactid) diff --git a/R/freq.R b/R/freq.R new file mode 100644 index 00000000..fb6d6a47 --- /dev/null +++ b/R/freq.R @@ -0,0 +1,361 @@ +# ==================================================================== # +# TITLE # +# Antimicrobial Resistance (AMR) Analysis # +# # +# AUTHORS # +# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) # +# # +# LICENCE # +# This program is free software; you can redistribute it and/or modify # +# it under the terms of the GNU General Public License version 2.0, # +# as published by the Free Software Foundation. # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# ==================================================================== # + +#' Frequency table +#' +#' Create a frequency table of a vector of data, a single column or a maximum of 9 columns of a data frame. Supports markdown for reports. +#' @param x data +#' @param sort.count Sort on count. Use \code{FALSE} to sort alphabetically on item. +#' @param nmax number of row to print. Use \code{nmax = 0} or \code{nmax = NA} to print all rows. +#' @param na.rm a logical value indicating whether NA values should be removed from the frequency table. The header will always print the amount of\code{NA}s. +#' @param markdown print table in markdown format (this forces \code{nmax = NA}) +#' @param toConsole Print table to the console. Use \code{FALSE} to assign the table to an object. +#' @param digits how many significant digits are to be used for numeric values (not for the items themselves, that depends on \code{\link{getOption}("digits")}) +#' @param sep a character string to separate the terms when selecting multiple columns +#' @details For numeric values, the next values will be calculated and shown into the header: +#' \itemize{ +#' \item{Mean, using \code{\link[base]{mean}}} +#' \item{Standard deviation, using \code{\link[stats]{sd}}} +#' \item{Five numbers of Tukey (min, Q1, median, Q3, max), using \code{\link[stats]{fivenum}}} +#' \item{Outliers (count and list), using \code{\link{boxplot.stats}}} +#' \item{Coefficient of variation (CV), the standard deviation divided by the mean} +#' \item{Coefficient of quartile variation (CQV, sometimes called coefficient of dispersion), calculated as \code{(Q3 - Q1) / (Q3 + Q1)} using \code{\link{quantile}} with \code{type = 6} as quantile algorithm to comply with SPSS standards} +#' } +#' @importFrom stats fivenum sd quantile +#' @importFrom grDevices boxplot.stats +#' @importFrom dplyr %>% select pull n_distinct group_by arrange desc mutate summarise +#' @keywords summary summarise frequency freq +#' @rdname freq +#' @export +#' @examples +#' library(dplyr) +#' +#' freq(septic_patients$hospital_id) +#' +#' septic_patients %>% +#' filter(hospital_id == "A") %>% +#' select(bactid) %>% +#' freq() +#' +#' # select multiple columns; they will be pasted together +#' septic_patients %>% +#' left_join_microorganisms %>% +#' filter(hospital_id == "A") %>% +#' select(genus, species) %>% +#' freq() +#' +#' # save frequency table to an object +#' years <- septic_patients %>% +#' mutate(year = format(date, "%Y")) %>% +#' select(year) %>% +#' freq(toConsole = FALSE) +freq <- function(x, + sort.count = TRUE, + nmax = 15, + na.rm = TRUE, + markdown = FALSE, + toConsole = TRUE, + digits = 2, + sep = " ") { + + mult.columns <- 0 + + if (NROW(x) == 0) { + cat('\nNo observations.\n') + return(invisible()) + } + + if (!is.null(ncol(x))) { + if (ncol(x) == 1 & any(class(x) == 'data.frame')) { + x <- x %>% pull(1) + } else if (ncol(x) < 10) { + + mult.columns <- ncol(x) + + colnames(x) <- LETTERS[1:ncol(x)] + if (ncol(x) == 2) { + x$total <- paste(x$A %>% as.character(), + x$B %>% as.character(), + sep = sep) + } else if (ncol(x) == 3) { + x$total <- paste(x$A %>% as.character(), + x$B %>% as.character(), + x$C %>% as.character(), + sep = sep) + } else if (ncol(x) == 4) { + x$total <- paste(x$A %>% as.character(), + x$B %>% as.character(), + x$C %>% as.character(), + x$D %>% as.character(), + sep = sep) + } else if (ncol(x) == 5) { + x$total <- paste(x$A %>% as.character(), + x$B %>% as.character(), + x$C %>% as.character(), + x$D %>% as.character(), + x$E %>% as.character(), + sep = sep) + } else if (ncol(x) == 6) { + x$total <- paste(x$A %>% as.character(), + x$B %>% as.character(), + x$C %>% as.character(), + x$D %>% as.character(), + x$E %>% as.character(), + x$F %>% as.character(), + sep = sep) + } else if (ncol(x) == 7) { + x$total <- paste(x$A %>% as.character(), + x$B %>% as.character(), + x$C %>% as.character(), + x$D %>% as.character(), + x$E %>% as.character(), + x$F %>% as.character(), + x$G %>% as.character(), + sep = sep) + } else if (ncol(x) == 8) { + x$total <- paste(x$A %>% as.character(), + x$B %>% as.character(), + x$C %>% as.character(), + x$D %>% as.character(), + x$E %>% as.character(), + x$F %>% as.character(), + x$G %>% as.character(), + x$H %>% as.character(), + sep = sep) + } else if (ncol(x) == 9) { + x$total <- paste(x$A %>% as.character(), + x$B %>% as.character(), + x$C %>% as.character(), + x$D %>% as.character(), + x$E %>% as.character(), + x$F %>% as.character(), + x$G %>% as.character(), + x$H %>% as.character(), + x$I %>% as.character(), + sep = sep) + } + + x <- x$total + + } else { + stop('A maximum of 9 columns can be analysed at the same time.', call. = FALSE) + } + } + if (markdown == TRUE & toConsole == FALSE) { + warning('`toConsole = FALSE` will be ignored when `markdown = TRUE`.') + } + + if (mult.columns > 1) { + NAs <- x[is.na(x) | x == trimws(strrep('NA ', mult.columns))] + } else { + NAs <- x[is.na(x)] + } + if (na.rm == TRUE) { + x <- x[!x %in% NAs] + } + + if (missing(sort.count) & any(class(x) %in% c('double', 'integer', 'numeric', 'raw', 'single', 'factor'))) { + # sort on item/level at default when x is numeric or a factor and sort.count is not set + sort.count <- FALSE + } + + header <- character(0) + + markdown_line <- '' + if (markdown == TRUE) { + markdown_line <- '\n' + } + x_align <- 'l' + + if (mult.columns > 0) { + header <- header %>% paste0(markdown_line, 'Columns: ', mult.columns) + } else { + header <- header %>% paste0(markdown_line, 'Class: ', class(x) %>% rev() %>% paste(collapse = " > ")) + } + + if (is.list(x) | is.matrix(x) | is.environment(x) | is.function(x)) { + cat(header, "\n") + stop('`freq()` does not support lists, matrices, environments or functions.', call. = FALSE) + } + + header <- header %>% paste0(markdown_line, '\nLength: ', (NAs %>% length() + x %>% length()) %>% format(), + ' (of which NA: ', NAs %>% length() %>% format(), + ' = ', (NAs %>% length() / (NAs %>% length() + x %>% length())) %>% percent(force_zero = TRUE), ')') + header <- header %>% paste0(markdown_line, '\nUnique: ', x %>% n_distinct() %>% format()) + + header.numbers.done <- FALSE + if (any(class(x) %in% c('double', 'integer', 'numeric', 'raw', 'single'))) { + # right align number + x_align <- 'r' + header <- header %>% paste0('\n') + header <- header %>% paste(markdown_line, '\nMean: ', x %>% base::mean(na.rm = TRUE) %>% format(digits = digits)) + header <- header %>% paste0(markdown_line, '\nStd. dev.: ', x %>% stats::sd(na.rm = TRUE) %>% format(digits = digits), + ' (CV: ', x %>% cv(na.rm = TRUE) %>% format(digits = digits), ')') + header <- header %>% paste0(markdown_line, '\nFive-Num: ', x %>% stats::fivenum(na.rm = TRUE) %>% format(digits = digits) %>% trimws() %>% paste(collapse = ' | '), + ' (CQV: ', x %>% cqv(na.rm = TRUE) %>% format(digits = digits), ')') + outlier_length <- length(boxplot.stats(x)$out) + header <- header %>% paste0(markdown_line, '\nOutliers: ', outlier_length) + if (outlier_length > 0) { + header <- header %>% paste0(' (unique: ', boxplot.stats(x)$out %>% unique() %>% length(), ')') + } + } + + formatdates <- "%e %B %Y" # = d mmmm yyyy + if (any(class(x) == 'hms')) { + x <- x %>% as.POSIXlt() + formatdates <- "%H:%M:%S" + } + if (any(class(x) %in% c('Date', 'POSIXct', 'POSIXlt'))) { + header <- header %>% paste0('\n') + mindatum <- x %>% min() + maxdatum <- x %>% max() + header <- header %>% paste0(markdown_line, '\nOldest: ', mindatum %>% format(formatdates) %>% trimws()) + header <- header %>% paste0(markdown_line, '\nNewest: ', maxdatum %>% format(formatdates) %>% trimws(), + ' (+', difftime(maxdatum, mindatum, units = 'auto') %>% as.double() %>% format(), ')') + } + if (any(class(x) == 'POSIXlt')) { + x <- x %>% format(formatdates) + } + + if (toConsole == TRUE) { + cat(header) + } + + if (all(is.na(x))) { + cat('\n\nNo observations.\n') + return(invisible()) + } + if (n_distinct(x) == length(x)) { + warning('All observations are unique.', call. = FALSE) + } + + if (nmax == 0 | is.na(nmax)) { + nmax <- length(x) + } + nmax.1 <- min(length(x), nmax + 1) + + # create table with counts and percentages + if (any(class(x) == 'factor')) { + df <- tibble::tibble(Item = x, + Fctlvl = x %>% as.integer()) %>% + group_by(Item, Fctlvl) + column_names <- c('Item', 'Count', 'Percent', 'Cum. Count', 'Cum. Percent', '(Factor Level)') + column_align <- c('l', 'r', 'r', 'r', 'r', 'r') + } else { + df <- tibble::tibble(Item = x) %>% + group_by(Item) + column_names <- c('Item', 'Count', 'Percent', 'Cum. Count', 'Cum. Percent') + column_align <- c(x_align, 'r', 'r', 'r', 'r') + } + df <- df %>% + summarise(Count = n(), + Percent = (n() / length(x)) %>% percent(force_zero = TRUE)) + + if (df$Item %>% paste(collapse = ',') %like% '\033') { + df <- df %>% + mutate(Item = Item %>% + # remove escape char + # see https://en.wikipedia.org/wiki/Escape_character#ASCII_escape_character + gsub('\033', ' ', ., fixed = TRUE)) + } + + # sort according to setting + if (sort.count == TRUE) { + df <- df %>% arrange(desc(Count)) + } else { + if (any(class(x) == 'factor')) { + df <- df %>% arrange(Fctlvl) + } else { + df <- df %>% arrange(Item) + } + } + + # add cumulative values + df$Cum <- cumsum(df$Count) + df$CumTot <- (df$Cum / sum(df$Count, na.rm = TRUE)) %>% percent(force_zero = TRUE) + df$Cum <- df$Cum %>% format() + + if (any(class(x) == 'factor')) { + # put factor last + df <- df %>% select(Item, Count, Percent, Cum, CumTot, Fctlvl) + } + + if (markdown == TRUE) { + tblformat <- 'markdown' + } else { + tblformat <- 'pandoc' + } + + if (toConsole == FALSE) { + # assign to object + df[, 3] <- df[, 2] / sum(df[, 2], na.rm = TRUE) + df[, 4] <- cumsum(df[, 2]) + df[, 5] <- df[, 4] / sum(df[, 2], na.rm = TRUE) + return(df) + + } else { + + # save old NA setting for kable + opt.old <- options()$knitr.kable.NA + options(knitr.kable.NA = "") + + Count.rest <- sum(df[nmax.1:nrow(df), 'Count'], na.rm = TRUE) + if (any(class(x) %in% c('double', 'integer', 'numeric', 'raw', 'single'))) { + df <- df %>% mutate(Item = format(Item)) + } + df <- df %>% mutate(Count = format(Count)) + + if (nrow(df) > nmax.1 & markdown == FALSE) { + df2 <- df[1:nmax,] + print( + knitr::kable(df2, + format = tblformat, + col.names = column_names, + align = column_align, + padding = 1) + ) + cat('... and ', + format(nrow(df) - nmax), + ' more ', + paste0('(n = ', + format(Count.rest), + '; ', + (Count.rest / length(x)) %>% percent(force_zero = TRUE), + ')'), + '. Use `nmax` to show more rows.\n\n', sep = '') + + } else { + print( + knitr::kable(df, + format = tblformat, + col.names = column_names, + align = column_align, + padding = 1) + ) + } + cat('\n') + + # reset old kable setting + options(knitr.kable.NA = opt.old) + return(invisible()) + } +} + +#' @rdname freq +#' @export +frequency_tbl <- freq diff --git a/R/globals.R b/R/globals.R index 2eb98d09..fe977831 100644 --- a/R/globals.R +++ b/R/globals.R @@ -16,16 +16,20 @@ # GNU General Public License for more details. # # ==================================================================== # -globalVariables(c('.', - 'abname', +globalVariables(c('abname', 'bactid', 'cnt', + 'Count', + 'Cum', + 'CumTot', 'date_lab', 'days_diff', + 'Fctlvl', 'first_isolate_row_index', 'fullname', 'genus', 'gramstain', + 'Item', 'key_ab', 'key_ab_lag', 'key_ab_other', @@ -34,6 +38,9 @@ globalVariables(c('.', 'n', 'other_pat_or_mo', 'patient_id', + 'Percent', + 'quantile', 'real_first_isolate', 'species', - 'y')) + 'y', + '.')) diff --git a/R/mdro.R b/R/mdro.R new file mode 100644 index 00000000..b2c3ae79 --- /dev/null +++ b/R/mdro.R @@ -0,0 +1,203 @@ +# ==================================================================== # +# TITLE # +# Antimicrobial Resistance (AMR) Analysis # +# # +# AUTHORS # +# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) # +# # +# LICENCE # +# This program is free software; you can redistribute it and/or modify # +# it under the terms of the GNU General Public License version 2.0, # +# as published by the Free Software Foundation. # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# ==================================================================== # + +#' Determine multidrug-resistant organisms (MDRO) +#' +#' Determine which isolates are multidrug-resistant organisms (MDRO) according to country-specific guidelines. +#' @param tbl table with antibiotic columns, like e.g. \code{amox} and \code{amcl} +#' @param country country to determine guidelines. Should be a code from the \href{https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2#Officially_assigned_code_elements}{list of ISO 3166-1 alpha-2 country codes}. Case-insensitive. Currently supported are \code{de} (Germany) and \code{nl} (the Netherlands). +#' @param col_bactid column name of the bacteria ID in \code{tbl} - values of this column should be present in \code{microorganisms$bactid}, see \code{\link{microorganisms}} +#' @param info print progress +#' @param aminoglycosides,quinolones,carbapenems character vector with column names of antibiotics +#' @param ceftazidime,piperacillin,trimethoprim_sulfa,penicillin,vancomycin column names of antibiotics +#' @param ... parameters that are passed on to \code{MDR} +#' @return Ordered factor with values \code{Positive}, \code{Unconfirmed}, \code{Negative}. +#' @rdname MDRO +#' @export +MDRO <- function(tbl, + country, + col_bactid = 'bactid', + info = TRUE, + aminoglycosides = c('gent', 'tobr', 'kana'), + quinolones = c('cipr', 'norf'), + carbapenems = c('imip', 'mero', 'erta'), + ceftazidime = 'cfta', + piperacillin = 'pita', + trimethoprim_sulfa = 'trsu', + penicillin = 'peni', + vancomycin = 'vanc') { + + # strip whitespaces + country <- trimws(country) + if (length(country) > 1) { + stop('`country` must be a length one character string.', call. = FALSE) + } + if (!country %like% '^[a-z]{2}$') { + stop('This is not a valid ISO 3166-1 alpha-2 country code: "', country, '". Please see ?MDRO.', call. = FALSE) + } + + # create list and make country code case-independent + guideline <- list(country = list(code = tolower(country))) + + # support per country + if (guideline$country$code == 'de') { + guideline$country$name <- 'Germany' + guideline$name <- '' + guideline$version <- '' + guideline$source <- '' + } else if (guideline$country$code == 'nl') { + guideline$country$name <- 'The Netherlands' + guideline$name <- 'WIP-Richtlijn BRMO' + guideline$version <- 'Revision of December 2017' + guideline$source <- 'https://www.rivm.nl/Documenten_en_publicaties/Professioneel_Praktisch/Richtlijnen/Infectieziekten/WIP_Richtlijnen/WIP_Richtlijnen/Ziekenhuizen/WIP_richtlijn_BRMO_Bijzonder_Resistente_Micro_Organismen_ZKH' + # add here more countries like this: + # } else if (country$code == 'AA') { + # country$name <- 'country name' + } else { + stop('This country code is currently unsupported: ', guideline$country$code, call. = FALSE) + } + + # Console colours + # source: http://www.tldp.org/HOWTO/Bash-Prompt-HOWTO/x329.html + ANSI_red <- "\033[31m" + ANSI_blue <- "\033[34m" + ANSI_reset <- "\033[0m" + + if (info == TRUE) { + cat("Determining Highly Resistant Microorganisms (MDRO), according to:\n", + "Guideline: ", ANSI_red, guideline$name, ", ", guideline$version, ANSI_reset, "\n", + "Country : ", ANSI_red, guideline$country$name, ANSI_reset, "\n", + "Source : ", ANSI_blue, guideline$source, ANSI_reset, "\n", + "\n", sep = "") + } + + # join microorganisms + tbl <- tbl %>% left_join_microorganisms(col_bactid) + + tbl$MDRO <- 1 + + if (guideline$country$code == 'nl') { + # BRMO; Bijzonder Resistente Micro-Organismen + aminoglycosides <- aminoglycosides[aminoglycosides %in% colnames(tbl)] + quinolones <- quinolones[quinolones %in% colnames(tbl)] + carbapenems <- carbapenems[carbapenems %in% colnames(tbl)] + if (!ceftazidime %in% colnames(tbl)) { ceftazidime <- NA } + if (!piperacillin %in% colnames(tbl)) { piperacillin <- NA } + if (!trimethoprim_sulfa %in% colnames(tbl)) { trimethoprim_sulfa <- NA } + if (!penicillin %in% colnames(tbl)) { penicillin <- NA } + if (!vancomycin %in% colnames(tbl)) { vancomycin <- NA } + + # Table 1 + tbl[which( + tbl$family == 'Enterobacteriaceae' + & rowSums(tbl[, aminoglycosides] == 'R', na.rm = TRUE) >= 1 + & rowSums(tbl[, quinolones] == 'R', na.rm = TRUE) >= 1 + ), 'MDRO'] <- 4 + tbl[which( + tbl$family == 'Enterobacteriaceae' + & rowSums(tbl[, carbapenems] == 'R', na.rm = TRUE) >= 1 + ), 'MDRO'] <- 3 + # rest is negative + tbl[which( + tbl$family == 'Enterobacteriaceae' + & tbl$MDRO == 1 + ), 'MDRO'] <- 2 + + # Table 2 + tbl[which( + tbl$genus == 'Acinetobacter' + & rowSums(tbl[, carbapenems] == 'R', na.rm = TRUE) >= 1 + ), 'MDRO'] <- 3 + tbl[which( + tbl$genus == 'Acinetobacter' + & rowSums(tbl[, aminoglycosides] == 'R', na.rm = TRUE) >= 1 + & rowSums(tbl[, quinolones] == 'R', na.rm = TRUE) >= 1 + ), 'MDRO'] <- 4 + # rest of Acinetobacter is negative + tbl[which( + tbl$genus == 'Acinetobacter' + & tbl$MDRO == 1 + ), 'MDRO'] <- 2 + + tbl[which( + tbl$fullname %like% 'Stenotrophomonas maltophilia' + & tbl[, trimethoprim_sulfa] == 'R' + ), 'MDRO'] <- 4 + # rest of Stenotrophomonas is negative + tbl[which( + tbl$fullname %like% 'Stenotrophomonas maltophilia' + & tbl$MDRO == 1 + ), 'MDRO'] <- 2 + + tbl[which( + tbl$fullname %like% 'Pseudomonas aeruginosa' + & sum(rowSums(tbl[, carbapenems] == 'R', na.rm = TRUE) >= 1, + rowSums(tbl[, aminoglycosides] == 'R', na.rm = TRUE) >= 1, + rowSums(tbl[, quinolones] == 'R', na.rm = TRUE) >= 1, + tbl[, ceftazidime] == 'R', + tbl[, piperacillin] == 'R') >= 3 + ), 'MDRO'] <- 4 + # rest of Pseudomonas is negative + tbl[which( + tbl$fullname %like% 'Pseudomonas aeruginosa' + & tbl$MDRO == 1 + ), 'MDRO'] <- 2 + + # Table 3 + tbl[which( + tbl$fullname %like% 'Streptococcus pneumoniae' + & tbl[, penicillin] == 'R' + ), 'MDRO'] <- 4 + tbl[which( + tbl$fullname %like% 'Streptococcus pneumoniae' + & tbl[, vancomycin] == 'R' + ), 'MDRO'] <- 4 + # rest of Streptococcus pneumoniae is negative + tbl[which( + tbl$fullname %like% 'Streptococcus pneumoniae' + & tbl$MDRO == 1 + ), 'MDRO'] <- 2 + + tbl[which( + tbl$fullname %like% 'Enterococcus faecium' + & rowSums(tbl[, c(penicillin, vancomycin)] == 'R', na.rm = TRUE) >= 1 + ), 'MDRO'] <- 4 + # rest of Enterococcus faecium is negative + tbl[which( + tbl$fullname %like% 'Enterococcus faecium' + & tbl$MDRO == 1 + ), 'MDRO'] <- 2 + } + + factor(x = tbl$MDRO, + levels = c(1:4), + labels = c('Unknown', 'Negative', 'Unconfirmed', 'Positive'), + ordered = TRUE) +} + +#' @rdname MDRO +#' @export +BRMO <- function(tbl, country = "nl", ...) { + MDRO(tbl = tbl, country = country, ...) +} + +#' @rdname MDRO +#' @export +MRGN <- function(tbl, country = "de", ...) { + MDRO(tbl = tbl, country = country, ...) +} diff --git a/R/misc.R b/R/misc.R index b9504c3c..d0588eb4 100644 --- a/R/misc.R +++ b/R/misc.R @@ -16,18 +16,98 @@ # GNU General Public License for more details. # # ==================================================================== # -# No export, no Rd -"%like%" <- function(vector, pattern) { - # Source: https://github.com/Rdatatable/data.table/blob/master/R/like.R - # But made it case insensitive - if (is.factor(vector)) { - as.integer(vector) %in% grep(pattern, levels(vector), ignore.case = TRUE) +#' Pattern Matching +#' +#' Convenience function to compare a vector with a pattern, like \code{\link[base]{grep}}. It always returns a \code{logical} vector and is always case-insensitive. +#' @inheritParams base::grep +#' @return A \code{logical} vector +#' @name like +#' @rdname like +#' @export +#' @source Inherited from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default. +#' @examples +#' library(dplyr) +#' # get unique occurences of bacteria whose name start with 'Ent' +#' septic_patients %>% +#' left_join_microorganisms() %>% +#' filter(fullname %like% '^Ent') %>% +#' pull(fullname) %>% +#' unique() +"%like%" <- function(x, pattern) { + if (length(pattern) > 1) { + pattern <- pattern[1] + warning('only the first element of argument `pattern` used for `%like%`', call. = FALSE) + } + if (is.factor(x)) { + as.integer(x) %in% base::grep(pattern, levels(x), ignore.case = TRUE) } else { - grepl(pattern, vector, ignore.case = TRUE) + base::grepl(pattern, x, ignore.case = TRUE) } } # No export, no Rd -percent <- function(x, round = 1, ...) { - base::paste0(base::round(x * 100, digits = round), "%") +percent <- function(x, round = 1, force_zero = FALSE, ...) { + val <- base::round(x * 100, digits = round) + if (force_zero & any(val == as.integer(val))) { + val[val == as.integer(val)] <- paste0(val[val == as.integer(val)], ".", base::strrep(0, round)) + } + base::paste0(val, "%") +} + +check_available_columns <- function(tbl, col.list, info = TRUE) { + # check columns + col.list <- col.list[!is.na(col.list)] + names(col.list) <- col.list + col.list.bak <- col.list + # are they available as upper case or lower case then? + for (i in 1:length(col.list)) { + if (toupper(col.list[i]) %in% colnames(tbl)) { + col.list[i] <- toupper(col.list[i]) + } else if (tolower(col.list[i]) %in% colnames(tbl)) { + col.list[i] <- tolower(col.list[i]) + } else if (!col.list[i] %in% colnames(tbl)) { + col.list[i] <- NA + } + } + if (!all(col.list %in% colnames(tbl))) { + if (info == TRUE) { + warning('These columns do not exist and will be ignored: ', + col.list.bak[!(col.list %in% colnames(tbl))] %>% toString(), + immediate. = TRUE, + call. = FALSE) + } + } + col.list +} + +# Coefficient of variation (CV) +cv <- function(x, na.rm = TRUE) { + cv.x <- sd(x, na.rm = na.rm) / abs(mean(x, na.rm = na.rm)) + cv.x +} + +# Coefficient of dispersion, or coefficient of quartile variation (CQV). +# (Bonett et al., 2006: Confidence interval for a coefficient of quartile variation). +cqv <- function(x, na.rm = TRUE) { + cqv.x <- + (quantile(x, 0.75, na.rm = na.rm, type = 6) - quantile(x, 0.25, na.rm = na.rm, type = 6)) / + (quantile(x, 0.75, na.rm = na.rm, type = 6) + quantile(x, 0.25, na.rm = na.rm, type = 6)) + unname(cqv.x) +} + +# show bytes as kB/MB/GB +# size_humanreadable(123456) # 121 kB +# size_humanreadable(12345678) # 11.8 MB +size_humanreadable <- function(bytes, decimals = 1) { + bytes <- bytes %>% as.double() + # Adapted from: + # http://jeffreysambells.com/2012/10/25/human-readable-filesize-php + size <- c('B','kB','MB','GB','TB','PB','EB','ZB','YB') + factor <- floor((nchar(bytes) - 1) / 3) + # added slight improvement; no decimals for B and kB: + decimals <- rep(decimals, length(bytes)) + decimals[size[factor + 1] %in% c('B', 'kB')] <- 0 + + out <- paste(sprintf(paste0("%.", decimals, "f"), bytes / (1024 ^ factor)), size[factor + 1]) + out } diff --git a/R/print.R b/R/print.R new file mode 100644 index 00000000..377cb9ac --- /dev/null +++ b/R/print.R @@ -0,0 +1,349 @@ +# ==================================================================== # +# TITLE # +# Antimicrobial Resistance (AMR) Analysis # +# # +# AUTHORS # +# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) # +# # +# LICENCE # +# This program is free software; you can redistribute it and/or modify # +# it under the terms of the GNU General Public License version 2.0, # +# as published by the Free Software Foundation. # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# ==================================================================== # + +#' Printing Data Tables and Tibbles +#' +#' Print a data table or tibble. It prints: \cr- The \strong{first and last rows} like \code{data.table}s are printed by the \code{data.table} package,\cr- A \strong{header} and \strong{left aligned text} like \code{tibble}s are printed by the \code{tibble} package with info about grouped variables,\cr- \strong{Unchanged values} and \strong{support for row names} like \code{data.frame}s are printed by the \code{base} package. +#' @inheritParams base::print.data.frame +#' @param nmax amount of rows to print in total. When the total amount of rows exceeds this limit, the first and last \code{nmax / 2} rows will be printed. Use \code{nmax = NA} to print all rows. +#' @param header print header with information about data size and tibble grouping +#' @param print.keys print keys for \code{data.table} +#' @param na value to print instead of NA +#' @param width amount of white spaces to keep between columns, must be at least 1 +#' @rdname print +#' @name print +#' @importFrom dplyr %>% n_groups group_vars group_size filter pull select +#' @exportMethod print.tbl_df +#' @export +#' @examples +#' # still showing all values unchanged: +#' library(dplyr) +#' starwars +#' print(starwars, width = 3) +#' +#' # compare the significance notation of this +#' starwars %>% select(birth_year) +#' # with this +#' tibble:::print.tbl_df(starwars %>% select(birth_year)) +#' +#' # supports info about groups (look at header) +#' starwars %>% group_by(homeworld, gender) +print.tbl_df <- function(x, + nmax = 10, + header = TRUE, + row.names = TRUE, + right = FALSE, + width = 1, + na = "", + ...) { + prettyprint_df(x = x, + nmax = nmax, + header = header, + row.names = row.names, + print.keys = FALSE, + right = right, + width = width, + na = na, + ...) +} + +#' @rdname print +#' @exportMethod print.tbl +#' @export +print.tbl <- function(x, ...) { + prettyprint_df(x, ...) +} + +#' @rdname print +#' @exportMethod print.data.table +#' @export +print.data.table <- function(x, + nmax = 10, + header = TRUE, + row.names = TRUE, + print.keys = FALSE, + right = FALSE, + width = 1, + na = "", + ...) { + prettyprint_df(x = x, + nmax = nmax, + header = header, + row.names = row.names, + print.keys = print.keys, + right = right, + width = width, + na = na, + ...) +} + +printDT <- data.table:::print.data.table +prettyprint_df <- function(x, + nmax = 10, + header = TRUE, + row.names = TRUE, + print.keys = FALSE, + right = FALSE, + width = 1, + na = "", + ...) { + + ansi_reset <- "\u001B[0m" + ansi_black <- "\u001B[30m" + ansi_red <- "\u001B[31m" + ansi_green <- "\u001B[32m" + ansi_yellow <- "\u001B[33m" + ansi_blue <- "\u001B[34m" + ansi_purple <- "\u001B[35m" + ansi_cyan <- "\u001B[36m" + ansi_white <- "\u001B[37m" + ansi_gray <- "\u001B[38;5;246m" + + if (width < 1) { + stop('`width` must be at least 1.', call. = FALSE) + } + + if (is.na(nmax)) { + nmax <- NROW(x) + } + n <- nmax + if (n %% 2 == 1) { + # odd number; add 1 + n <- n + 1 + } + + width <- width - 1 + + if ('tbl_df' %in% class(x)) { + type <- 'tibble' + } else if ('data.table' %in% class(x)) { + type <- 'data.table' + } else { + type <- 'data.frame' + } + + if (header == TRUE) { + if (NCOL(x) == 1) { + vars <- 'variable' + } else { + vars <- 'variables' + } + + size <- object.size(x) %>% as.double() %>% size_humanreadable() + + cat(paste0("A ", type,": ", + format(NROW(x)), + " obs. of ", + format(NCOL(x)), + " ", vars, + ansi_gray, " (", size, ")\n", ansi_reset)) + if ('grouped_df' %in% class(x) & n_groups(x) > 0) { + cat(paste0("Grouped by ", + x %>% group_vars() %>% paste0(ansi_red, ., ansi_reset) %>% paste0(collapse = " and "), + ansi_gray, + " (", + x %>% n_groups(), + " groups with sizes between ", + x %>% group_size() %>% min(), + " and ", + x %>% group_size() %>% max(), + ")\n", + ansi_reset)) + } + if (!is.null(attributes(x)$qry)) { + cat(paste0(ansi_gray, "This data contains a query. Use qry() to view it.\n", ansi_reset)) + } + cat("\n") + } + + # data.table where keys should be printed + if (print.keys == TRUE) { + printDT(x, + class = header, + row.names = row.names, + print.keys = TRUE, + right = right, + ... + ) + return(invisible()) + } + + # tibbles give warning when setting column names + x <- x %>% base::as.data.frame(stringsAsFactors = FALSE) + + # extra space of 3 chars, right to row name or number + if (NROW(x) > 0) { + maxrowchars <- rownames(x) %>% nchar() %>% max() + 3 + rownames(x) <- paste0(rownames(x), strrep(" ", maxrowchars - nchar(rownames(x)))) + } else { + maxrowchars <- 0 + } + + x.bak <- x + + if (n + 1 < nrow(x)) { + # remove in between part, 1 extra for ~~~~ between first and last part + rows_list <- c(1:(n / 2 + 1), (nrow(x) - (n / 2) + 1):nrow(x)) + x <- x %>% filter(row_number() %in% rows_list) + rownames(x) <- rownames(x.bak)[rows_list] + # set inbetweener between parts + rownames(x)[n / 2 + 1] <- strrep("~", maxrowchars) + } + + if (header == TRUE) { + # add 1 row for classes + # class will be marked up per column + if (NROW(x.bak) > 0) { + rownames.x <- rownames(x) + x <- x %>% filter(row_number() == 1) %>% rbind(x, stringsAsFactors = FALSE) + rownames(x) <- c('*', rownames.x) + } + + # select 1st class per column and abbreviate + classes <- x.bak %>% + sapply(class) %>% + lapply( + function(c) { + # do print all POSIX classes like "POSct/t" + if ('POSIXct' %in% c) { + paste0('POS', + c %>% + gsub('POSIX', '', .) %>% + paste0(collapse = '/')) + } else { + c[[1]] + } + }) %>% + unlist() %>% + gsub("character", "chr", ., fixed = TRUE) %>% + gsub("complex", "cplx", ., fixed = TRUE) %>% + gsub("Date", "Date", ., fixed = TRUE) %>% + gsub("double", "dbl", ., fixed = TRUE) %>% + gsub("expression", "expr", ., fixed = TRUE) %>% + gsub("factor", "fct", ., fixed = TRUE) %>% + gsub("IDate", "IDat", ., fixed = TRUE) %>% + gsub("integer", "int", ., fixed = TRUE) %>% + gsub("integer64", "i64", ., fixed = TRUE) %>% + gsub("list", "list", ., fixed = TRUE) %>% + gsub("logical", "lgl", ., fixed = TRUE) %>% + gsub("numeric", "dbl", ., fixed = TRUE) %>% + gsub("ordered", "ord", ., fixed = TRUE) %>% + gsub("percent", "pct", ., fixed = TRUE) %>% + gsub("single", "sgl", ., fixed = TRUE) %>% + paste0("<", ., ">") + } + + # markup cols + for (i in 1:ncol(x)) { + if (all(!class(x[, i]) %in% class(x.bak[, i]))) { + class(x[, i]) <- class(x.bak[, i]) + } + try(x[, i] <- format(x %>% pull(i)), silent = TRUE) + + # replace NAs + if (nchar(na) < 2) { + # make as long as the text "NA" + na <- paste0(na, strrep(" ", 2 - nchar(na))) + } + try(x[, i] <- gsub("^NA$", na, trimws(x[, i], 'both')), silent = TRUE) + # place class into 1st row + if (header == TRUE) { + x[1, i] <- classes[i] + } + # dashes between two parts when exceeding nmax + maxvalchars <- max(colnames(x)[i] %>% nchar(), x[, i] %>% nchar() %>% max()) + if (n + 1 < nrow(x.bak)) { + x[n / 2 + if_else(header == TRUE, 2, 1), i] <- strrep("~", maxvalchars) + } + + # align according to `right` parameter, but only factors and text, but not MICs + if (any(x.bak %>% pull(i) %>% class() %in% c('factor', 'character')) + & !("mic" %in% (x.bak %>% pull(i) %>% class()))) { + vals <- x %>% pull(i) %>% trimws('both') + colname <- colnames(x)[i] %>% trimws('both') + if (right == FALSE) { + vals <- paste0(vals, strrep(" ", maxvalchars - nchar(vals))) + colname <- paste0(colname, strrep(" ", maxvalchars - nchar(colname))) + } else { + vals <- paste0(strrep(" ", maxvalchars - nchar(vals)), vals) + colname <- paste0(strrep(" ", maxvalchars - nchar(colname)), colname) + } + x[, i] <- vals + colnames(x)[i] <- colname + } + + # add left padding according to `width` parameter + # but not in 1st col when row names are off + if (row.names == TRUE | i > 1) { + x[, i] <- paste0(strrep(" ", width), x[, i]) + colnames(x)[i] <- paste0(strrep(" ", width), colnames(x)[i]) + } + + # strip columns that do not fit (3 chars as margin) + width_console <- options()$width + width_until_col <- x %>% + select(1:i) %>% + apply(1, paste, collapse = strrep(" ", width + 1)) %>% + nchar() %>% + max() + width_until_col_before <- x %>% + select(1:(max(i, 2) - 1)) %>% + apply(1, paste, collapse = strrep(" ", width + 1)) %>% + nchar() %>% + max() + extraspace <- maxrowchars + nchar(rownames(x)[length(rownames(x))]) + width_until_colnames <- colnames(x)[1:i] %>% paste0(collapse = strrep(" ", width + 1)) %>% nchar() + extraspace + width_until_colnames_before <- colnames(x)[1:(max(i, 2) - 1)] %>% paste0(collapse = strrep(" ", width + 1)) %>% nchar() + extraspace + + if (i > 1 & + (width_until_col > width_console + | width_until_colnames > width_console)) { + if (width_until_col_before > width_console + | width_until_colnames_before > width_console) { + x <- x[, 1:(i - 2)] + } else { + x <- x[, 1:(i - 1)] + } + break + } + } + + # empty table, row name of header should be "*" + if (NROW(x.bak) == 0) { + rownames(x) <- '* ' + } + + # and here it is... + suppressWarnings( + base::print.data.frame(x, row.names = row.names, ...) + ) + + # print rest of col names when they were stripped + if (ncol(x) < ncol(x.bak)) { + x.notshown <- x.bak %>% select((ncol(x) + 1):ncol(x.bak)) + if (ncol(x.notshown) == 1) { + cat('...and 1 more column: ') + } else { + cat('...and', ncol(x.notshown), 'more columns: ') + } + cat(x.notshown %>% + colnames() %>% + paste0(' ', ansi_gray, classes[(ncol(x) + 1):ncol(x.bak)], ansi_reset) %>% + paste0(collapse = ", "), '\n') + } +} diff --git a/README.md b/README.md index 942504ea..91125054 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,104 @@ after # 5 PSEAER R R - - R ``` +### Frequency tables +Base R lacks a simple function to create frequency tables. We created such a function that works with almost all data types: `freq()` (or `frequency_tbl()`). +```r +## Factors sort on item by default: + +freq(septic_patients$hospital_id) +# Class: factor +# Length: 2000 (of which NA: 0 = 0.0%) +# Unique: 5 +# +# Item Count Percent Cum. Count Cum. Percent (Factor Level) +# ----- ------ -------- ----------- ------------- --------------- +# A 233 11.7% 233 11.7% 1 +# B 583 29.1% 816 40.8% 2 +# C 221 11.1% 1037 51.8% 3 +# D 650 32.5% 1687 84.4% 4 +# E 313 15.7% 2000 100.0% 5 + + +## This can be changed with the `sort.count` parameter: + +freq(septic_patients$hospital_id, sort.count = TRUE) +# Class: factor +# Length: 2000 (of which NA: 0 = 0.0%) +# Unique: 5 +# +# Item Count Percent Cum. Count Cum. Percent (Factor Level) +# ----- ------ -------- ----------- ------------- --------------- +# D 650 32.5% 650 32.5% 4 +# B 583 29.1% 1233 61.7% 2 +# E 313 15.7% 1546 77.3% 5 +# A 233 11.7% 1779 88.9% 1 +# C 221 11.1% 2000 100.0% 3 + + +## Other types, like numbers or dates, sort on count by default: + +> freq(septic_patients$date) +# Class: Date +# Length: 2000 (of which NA: 0 = 0.0%) +# Unique: 1662 +# +# Oldest: 2 January 2001 +# Newest: 18 October 2017 (+6133) +# +# Item Count Percent Cum. Count Cum. Percent +# ----------- ------ -------- ----------- ------------- +# 2008-12-24 5 0.2% 5 0.2% +# 2010-12-10 4 0.2% 9 0.4% +# 2011-03-03 4 0.2% 13 0.6% +# 2013-06-24 4 0.2% 17 0.8% +# 2017-09-01 4 0.2% 21 1.1% +# 2002-09-02 3 0.2% 24 1.2% +# 2003-10-14 3 0.2% 27 1.4% +# 2004-06-25 3 0.2% 30 1.5% +# 2004-06-27 3 0.2% 33 1.7% +# 2004-10-29 3 0.2% 36 1.8% +# 2005-09-27 3 0.2% 39 2.0% +# 2006-08-01 3 0.2% 42 2.1% +# 2006-10-10 3 0.2% 45 2.2% +# 2007-11-16 3 0.2% 48 2.4% +# 2008-03-09 3 0.2% 51 2.5% +# ... and 1647 more (n = 1949; 97.5%). Use `nmax` to show more rows. + + +## For numeric values, some extra descriptive statistics will be calculated: + +> freq(runif(n = 10, min = 1, max = 5)) +# Class: numeric +# Length: 10 (of which NA: 0 = 0.0%) +# Unique: 10 +# +# Mean: 3 +# Std. dev.: 0.93 (CV: 0.31) +# Five-Num: 1.1 | 2.3 | 3.1 | 3.8 | 4.0 (CQV: 0.25) +# Outliers: 0 +# +# Item Count Percent Cum. Count Cum. Percent +# --------- ------ -------- ----------- ------------- +# 1.132033 1 10.0% 1 10.0% +# 2.226903 1 10.0% 2 20.0% +# 2.280779 1 10.0% 3 30.0% +# 2.640898 1 10.0% 4 40.0% +# 2.913462 1 10.0% 5 50.0% +# 3.364201 1 10.0% 6 60.0% +# 3.771975 1 10.0% 7 70.0% +# 3.802861 1 10.0% 8 80.0% +# 3.803547 1 10.0% 9 90.0% +# 3.985691 1 10.0% 10 100.0% +# +# Warning message: +# All observations are unique. +``` +Learn more about this function with: +```r +?freq +``` + ### New classes This package contains two new S3 classes: `mic` for MIC values (e.g. from Vitek or Phoenix) and `rsi` for antimicrobial drug interpretations (i.e. S, I and R). Both are actually ordered factors under the hood (an MIC of `2` being higher than `<=1` but lower than `>=32`, and for class `rsi` factors are ordered as `S < I < R`). Both classes have extensions for existing generic functions like `print`, `summary` and `plot`. diff --git a/man/EUCAST.Rd b/man/EUCAST.Rd index 4315a55b..40c51d84 100644 --- a/man/EUCAST.Rd +++ b/man/EUCAST.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/EUCAST.R +% Please edit documentation in R/eucast.R \name{EUCAST_rules} \alias{EUCAST_rules} \alias{interpretive_reading} diff --git a/man/MDRO.Rd b/man/MDRO.Rd new file mode 100644 index 00000000..9b89c55d --- /dev/null +++ b/man/MDRO.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mdro.R +\name{MDRO} +\alias{MDRO} +\alias{BRMO} +\alias{MRGN} +\title{Determine multidrug-resistant organisms (MDRO)} +\usage{ +MDRO(tbl, country, col_bactid = "bactid", info = TRUE, + aminoglycosides = c("gent", "tobr", "kana"), quinolones = c("cipr", + "norf"), carbapenems = c("imip", "mero", "erta"), ceftazidime = "cfta", + piperacillin = "pita", trimethoprim_sulfa = "trsu", penicillin = "peni", + vancomycin = "vanc") + +BRMO(tbl, country = "nl", ...) + +MRGN(tbl, country = "de", ...) +} +\arguments{ +\item{tbl}{table with antibiotic columns, like e.g. \code{amox} and \code{amcl}} + +\item{country}{country to determine guidelines. Should be a code from the \href{https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2#Officially_assigned_code_elements}{list of ISO 3166-1 alpha-2 country codes}. Case-insensitive. Currently supported are \code{de} (Germany) and \code{nl} (the Netherlands).} + +\item{col_bactid}{column name of the bacteria ID in \code{tbl} - values of this column should be present in \code{microorganisms$bactid}, see \code{\link{microorganisms}}} + +\item{info}{print progress} + +\item{aminoglycosides, quinolones, carbapenems}{character vector with column names of antibiotics} + +\item{ceftazidime, piperacillin, trimethoprim_sulfa, penicillin, vancomycin}{column names of antibiotics} + +\item{...}{parameters that are passed on to \code{MDR}} +} +\value{ +Ordered factor with values \code{Positive}, \code{Unconfirmed}, \code{Negative}. +} +\description{ +Determine which isolates are multidrug-resistant organisms (MDRO) according to country-specific guidelines. +} diff --git a/man/freq.Rd b/man/freq.Rd new file mode 100644 index 00000000..ced3a297 --- /dev/null +++ b/man/freq.Rd @@ -0,0 +1,71 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/freq.R +\name{freq} +\alias{freq} +\alias{frequency_tbl} +\title{Frequency table} +\usage{ +freq(x, sort.count = TRUE, nmax = 15, na.rm = TRUE, markdown = FALSE, + toConsole = TRUE, digits = 2, sep = " ") + +frequency_tbl(x, sort.count = TRUE, nmax = 15, na.rm = TRUE, + markdown = FALSE, toConsole = TRUE, digits = 2, sep = " ") +} +\arguments{ +\item{x}{data} + +\item{sort.count}{Sort on count. Use \code{FALSE} to sort alphabetically on item.} + +\item{nmax}{number of row to print. Use \code{nmax = 0} or \code{nmax = NA} to print all rows.} + +\item{na.rm}{a logical value indicating whether NA values should be removed from the frequency table. The header will always print the amount of\code{NA}s.} + +\item{markdown}{print table in markdown format (this forces \code{nmax = NA})} + +\item{toConsole}{Print table to the console. Use \code{FALSE} to assign the table to an object.} + +\item{digits}{how many significant digits are to be used for numeric values (not for the items themselves, that depends on \code{\link{getOption}("digits")})} + +\item{sep}{a character string to separate the terms when selecting multiple columns} +} +\description{ +Create a frequency table of a vector of data, a single column or a maximum of 9 columns of a data frame. Supports markdown for reports. +} +\details{ +For numeric values, the next values will be calculated and shown into the header: +\itemize{ + \item{Mean, using \code{\link[base]{mean}}} + \item{Standard deviation, using \code{\link[stats]{sd}}} + \item{Five numbers of Tukey (min, Q1, median, Q3, max), using \code{\link[stats]{fivenum}}} + \item{Outliers (count and list), using \code{\link{boxplot.stats}}} + \item{Coefficient of variation (CV), the standard deviation divided by the mean} + \item{Coefficient of quartile variation (CQV, sometimes called coefficient of dispersion), calculated as \code{(Q3 - Q1) / (Q3 + Q1)} using \code{\link{quantile}} with \code{type = 6} as quantile algorithm to comply with SPSS standards} +} +} +\examples{ +library(dplyr) + +freq(septic_patients$hospital_id) + +septic_patients \%>\% + filter(hospital_id == "A") \%>\% + select(bactid) \%>\% + freq() + +# select multiple columns; they will be pasted together +septic_patients \%>\% + left_join_microorganisms \%>\% + filter(hospital_id == "A") \%>\% + select(genus, species) \%>\% + freq() + +# save frequency table to an object +years <- septic_patients \%>\% + mutate(year = format(date, "\%Y")) \%>\% + select(year) \%>\% + freq(toConsole = FALSE) +} +\keyword{freq} +\keyword{frequency} +\keyword{summarise} +\keyword{summary} diff --git a/man/like.Rd b/man/like.Rd new file mode 100644 index 00000000..8fdd388f --- /dev/null +++ b/man/like.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R +\name{like} +\alias{like} +\alias{\%like\%} +\title{Pattern Matching} +\source{ +Inherited from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default. +} +\usage{ +x \%like\% pattern +} +\arguments{ +\item{x}{a character vector where matches are sought, or an + object which can be coerced by \code{as.character} to a character + vector. \link{Long vectors} are supported.} + +\item{pattern}{character string containing a \link{regular expression} + (or character string for \code{fixed = TRUE}) to be matched + in the given character vector. Coerced by + \code{\link{as.character}} to a character string if possible. If a + character vector of length 2 or more is supplied, the first element + is used with a warning. Missing values are allowed except for + \code{regexpr} and \code{gregexpr}.} +} +\value{ +A \code{logical} vector +} +\description{ +Convenience function to compare a vector with a pattern, like \code{\link[base]{grep}}. It always returns a \code{logical} vector and is always case-insensitive. +} +\examples{ +library(dplyr) +# get unique occurences of bacteria whose name start with 'Ent' +septic_patients \%>\% + left_join_microorganisms() \%>\% + filter(fullname \%like\% '^Ent') \%>\% + pull(fullname) \%>\% + unique() +} diff --git a/man/mo_property.Rd b/man/mo_property.Rd index 3f7e9e49..8bc92e46 100644 --- a/man/mo_property.Rd +++ b/man/mo_property.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/EUCAST.R +% Please edit documentation in R/eucast.R \name{mo_property} \alias{mo_property} \title{Poperties of a microorganism} diff --git a/man/print.Rd b/man/print.Rd new file mode 100644 index 00000000..a9f4ef01 --- /dev/null +++ b/man/print.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/print.R +\name{print} +\alias{print} +\alias{print.tbl_df} +\alias{print.tbl} +\alias{print.data.table} +\title{Printing Data Tables and Tibbles} +\usage{ +\method{print}{tbl_df}(x, nmax = 10, header = TRUE, row.names = TRUE, + right = FALSE, width = 1, na = "", ...) + +\method{print}{tbl}(x, ...) + +\method{print}{data.table}(x, nmax = 10, header = TRUE, row.names = TRUE, + print.keys = FALSE, right = FALSE, width = 1, na = "", ...) +} +\arguments{ +\item{x}{object of class \code{data.frame}.} + +\item{nmax}{amount of rows to print in total. When the total amount of rows exceeds this limit, the first and last \code{nmax / 2} rows will be printed. Use \code{nmax = NA} to print all rows.} + +\item{header}{print header with information about data size and tibble grouping} + +\item{row.names}{logical (or character vector), indicating whether (or + what) row names should be printed.} + +\item{right}{logical, indicating whether or not strings should be + right-aligned. The default is right-alignment.} + +\item{width}{amount of white spaces to keep between columns, must be at least 1} + +\item{na}{value to print instead of NA} + +\item{...}{optional arguments to \code{print} or \code{plot} methods.} + +\item{print.keys}{print keys for \code{data.table}} +} +\description{ +Print a data table or tibble. It prints: \cr- The \strong{first and last rows} like \code{data.table}s are printed by the \code{data.table} package,\cr- A \strong{header} and \strong{left aligned text} like \code{tibble}s are printed by the \code{tibble} package with info about grouped variables,\cr- \strong{Unchanged values} and \strong{support for row names} like \code{data.frame}s are printed by the \code{base} package. +} +\examples{ +# still showing all values unchanged: +library(dplyr) +starwars +print(starwars, width = 3) + +# compare the significance notation of this +starwars \%>\% select(birth_year) +# with this +tibble:::print.tbl_df(starwars \%>\% select(birth_year)) + +# supports info about groups (look at header) +starwars \%>\% group_by(homeworld, gender) +} diff --git a/tests/testthat/test-eucast.R b/tests/testthat/test-eucast.R index cd0211cf..c52b5a6c 100644 --- a/tests/testthat/test-eucast.R +++ b/tests/testthat/test-eucast.R @@ -1,4 +1,4 @@ -context("EUCAST.R") +context("eucast.R") test_that("EUCAST rules work", { a <- data.frame(bactid = c("KLEPNE", # Klebsiella pneumoniae diff --git a/tests/testthat/test-freq.R b/tests/testthat/test-freq.R new file mode 100644 index 00000000..5aa65cb9 --- /dev/null +++ b/tests/testthat/test-freq.R @@ -0,0 +1,26 @@ +context("freq.R") + +test_that("frequency table works", { + expect_equal(nrow(freq(c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5), toConsole = FALSE)), 5) + expect_equal(nrow(frequency_tbl(c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5), toConsole = FALSE)), 5) + + # date column of septic_patients should contain 1662 unique dates + expect_equal(nrow(freq(septic_patients$date, toConsole = FALSE)), 1662) + expect_equal(nrow(freq(septic_patients$date, toConsole = FALSE)), + length(unique(septic_patients$date))) + + expect_output(freq(septic_patients$age)) + expect_output(freq(septic_patients$date)) + expect_output(freq(septic_patients$hospital_id)) + + library(dplyr) + expect_output(septic_patients %>% select(1:2) %>% freq()) + expect_output(septic_patients %>% select(1:3) %>% freq()) + expect_output(septic_patients %>% select(1:4) %>% freq()) + expect_output(septic_patients %>% select(1:5) %>% freq()) + expect_output(septic_patients %>% select(1:6) %>% freq()) + expect_output(septic_patients %>% select(1:7) %>% freq()) + expect_output(septic_patients %>% select(1:8) %>% freq()) + expect_output(septic_patients %>% select(1:9) %>% freq()) +}) + diff --git a/tests/testthat/test-mdro.R b/tests/testthat/test-mdro.R new file mode 100644 index 00000000..5b6fce1a --- /dev/null +++ b/tests/testthat/test-mdro.R @@ -0,0 +1,16 @@ +context("mdro.R") + + +test_that("MDRO works", { + library(dplyr) + outcome <- MDRO(septic_patients, "nl", info = FALSE) + + # check class + expect_equal(outcome %>% class(), c('ordered', 'factor')) + + # septic_patients should have these finding using Dutch guidelines + expect_equal(outcome %>% freq(toConsole = FALSE) %>% pull(Count), c(1152, 824, 3, 21)) + + expect_equal(BRMO(septic_patients, info = FALSE), MDRO(septic_patients, "nl", info = FALSE)) + +}) diff --git a/tests/testthat/test-misc.R b/tests/testthat/test-misc.R index 21187ae5..d526ef27 100644 --- a/tests/testthat/test-misc.R +++ b/tests/testthat/test-misc.R @@ -10,5 +10,7 @@ test_that("`like` works", { test_that("percentages works", { expect_equal(percent(0.25), "25%") expect_equal(percent(0.5), "50%") + expect_equal(percent(0.500, force_zero = TRUE), "50.0%") expect_equal(percent(0.1234), "12.3%") }) + diff --git a/tests/testthat/test-print.R b/tests/testthat/test-print.R new file mode 100644 index 00000000..57d8d583 --- /dev/null +++ b/tests/testthat/test-print.R @@ -0,0 +1,8 @@ +context("print.R") + + +test_that("tibble printing works", { + library(dplyr) + expect_output(print(starwars)) + expect_output(print(septic_patients)) +})