From a8132922af37408048dd89b47c63d06dd73dfbe7 Mon Sep 17 00:00:00 2001 From: "Matthijs S. Berends" Date: Mon, 19 Nov 2018 13:00:22 +0100 Subject: [PATCH] unit test read.4d, unselecting freq cols --- DESCRIPTION | 4 +- NEWS.md | 8 +++- R/freq.R | 70 +++++++++++++++++++++++++++-------- R/read.4d.R | 2 +- man/freq.Rd | 27 +++++++++++++- tests/testthat/test-freq.R | 8 ++++ tests/testthat/test-read.4d.R | 30 +++++++++++++++ 7 files changed, 127 insertions(+), 22 deletions(-) create mode 100644 tests/testthat/test-read.4d.R diff --git a/DESCRIPTION b/DESCRIPTION index 28121a7a..db62ab0b 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 0.4.0.9011 -Date: 2018-11-16 +Version: 0.4.0.9012 +Date: 2018-11-17 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/NEWS.md b/NEWS.md index eae56280..979f8240 100755 --- a/NEWS.md +++ b/NEWS.md @@ -35,6 +35,12 @@ group_by(hospital_id) %>% freq(gender) ``` + * Support for (un)selecting columns: + ```r + septic_patients %>% + freq(hospital_id) %>% + select(-count, -cum_count) # only get item, percent, cum_percent + ``` * Check for `hms::is.hms` * Now prints in markdown at default in non-interactive sessions * No longer adds the factor level column and sorts factors on count again @@ -43,7 +49,7 @@ * New parameter `header` to turn it off (default when `markdown = TRUE`) * New parameter `title` to replace the automatically set title * `first_isolate` now tries to find columns to use as input when parameters are left blank -* Improvement for MDRO algorithm (function `mdro`) +* Improvements for MDRO algorithm (function `mdro`) * Data set `septic_patients` is now a `data.frame`, not a tibble anymore * Removed diacritics from all authors (columns `microorganisms$ref` and `microorganisms.old$ref`) to comply with CRAN policy to only allow ASCII characters * Fix for `mo_property` not working properly diff --git a/R/freq.R b/R/freq.R index d754a3d9..a521b203 100755 --- a/R/freq.R +++ b/R/freq.R @@ -66,7 +66,7 @@ #' @keywords summary summarise frequency freq #' @rdname freq #' @name freq -#' @return A \code{data.frame} with an additional class \code{"frequency_tbl"} +#' @return A \code{data.frame} (with an additional class \code{"frequency_tbl"}) with five columns: \code{item}, \code{count}, \code{percent}, \code{cum_count} and \code{cum_percent}. #' @export #' @examples #' library(dplyr) @@ -79,55 +79,66 @@ #' septic_patients %>% freq("hospital_id") #' septic_patients %>% freq(hospital_id) #<- easiest to remember (tidyverse) #' +#' #' # you could also use `select` or `pull` to get your variables #' septic_patients %>% #' filter(hospital_id == "A") %>% #' select(mo) %>% #' freq() #' +#' #' # multiple selected variables will be pasted together #' septic_patients %>% #' left_join_microorganisms %>% #' filter(hospital_id == "A") %>% #' freq(genus, species) #' +#' #' # group a variable and analyse another #' septic_patients %>% #' group_by(hospital_id) %>% #' freq(gender) #' +#' #' # get top 10 bugs of hospital A as a vector #' septic_patients %>% #' filter(hospital_id == "A") %>% #' freq(mo) %>% #' top_freq(10) #' +#' #' # save frequency table to an object #' years <- septic_patients %>% #' mutate(year = format(date, "%Y")) %>% #' freq(year) #' +#' #' # show only the top 5 #' years %>% print(nmax = 5) #' +#' #' # save to an object with formatted percentages #' years <- format(years) #' +#' #' # print a histogram of numeric values #' septic_patients %>% #' freq(age) %>% #' hist() #' +#' #' # or print all points to a regular plot #' septic_patients %>% #' freq(age) %>% #' plot() #' +#' #' # transform to a data.frame or tibble #' septic_patients %>% #' freq(age) %>% #' as.data.frame() #' +#' #' # or transform (back) to a vector #' septic_patients %>% #' freq(age) %>% @@ -139,11 +150,23 @@ #' sort(), #' sort(septic_patients$age)) # TRUE #' -#' # it also supports `table` objects: +#' +#' # it also supports `table` objects #' table(septic_patients$gender, #' septic_patients$age) %>% #' freq(sep = " **sep** ") #' +#' +#' # only get selected columns +#' septic_patients %>% +#' freq(hospital_id) %>% +#' select(item, percent) +#' +#' septic_patients %>% +#' freq(hospital_id) %>% +#' select(-count, -cum_count) +#' +#' #' # check differences between frequency tables #' diff(freq(septic_patients$trim), #' freq(septic_patients$trsu)) @@ -569,6 +592,7 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default = } title <- paste(title, group_var) } + title <- paste("Frequency table of", trimws(title)) } else { title <- opt$title } @@ -592,12 +616,6 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default = opt$header <- header } - if (trimws(title) == "") { - title <- "Frequency table" - } else { - title <- paste("Frequency table of", trimws(title)) - } - # bold title if (opt$tbl_format == "pandoc") { title <- bold(title) @@ -620,10 +638,6 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default = return(invisible()) } - if (all(x$count == 1)) { - warning('All observations are unique.', call. = FALSE) - } - # save old NA setting for kable opt.old <- options()$knitr.kable.NA if (is.null(opt$na)) { @@ -668,10 +682,34 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default = if (any(class(x$item) %in% c('double', 'integer', 'numeric', 'raw', 'single'))) { x$item <- format(x$item) } - x$count <- format(x$count) - x$percent <- percent(x$percent, force_zero = TRUE) - x$cum_count <- format(x$cum_count) - x$cum_percent <- percent(x$cum_percent, force_zero = TRUE) + if ("item" %in% colnames(x)) { + x$item <- format(x$item) + } else { + opt$column_names <- opt$column_names[!opt$column_names == "Item"] + } + if ("count" %in% colnames(x)) { + if (all(x$count == 1)) { + warning('All observations are unique.', call. = FALSE) + } + x$count <- format(x$count) + } else { + opt$column_names <- opt$column_names[!opt$column_names == "Count"] + } + if ("percent" %in% colnames(x)) { + x$percent <- percent(x$percent, force_zero = TRUE) + } else { + opt$column_names <- opt$column_names[!opt$column_names == "Percent"] + } + if ("cum_count" %in% colnames(x)) { + x$cum_count <- format(x$cum_count) + } else { + opt$column_names <- opt$column_names[!opt$column_names == "Cum. Count"] + } + if ("cum_percent" %in% colnames(x)) { + x$cum_percent <- percent(x$cum_percent, force_zero = TRUE) + } else { + opt$column_names <- opt$column_names[!opt$column_names == "Cum. Percent"] + } if (opt$tbl_format == "markdown") { cat("\n") diff --git a/R/read.4d.R b/R/read.4d.R index 34e75626..ae1538de 100644 --- a/R/read.4d.R +++ b/R/read.4d.R @@ -41,7 +41,7 @@ read.4D <- function(file, encoding = "UTF-8") { if (info == TRUE) { - message("Importing data... ", appendLF = FALSE) + message("Importing ", file, "... ", appendLF = FALSE) } data_4D <- utils::read.table(file = file, row.names = row.names, diff --git a/man/freq.Rd b/man/freq.Rd index cd7b0d67..ec293872 100755 --- a/man/freq.Rd +++ b/man/freq.Rd @@ -54,7 +54,7 @@ top_freq(f, n) \item{n}{number of top \emph{n} items to return, use -n for the bottom \emph{n} items. It will include more than \code{n} rows if there are ties.} } \value{ -A \code{data.frame} with an additional class \code{"frequency_tbl"} +A \code{data.frame} (with an additional class \code{"frequency_tbl"}) with five columns: \code{item}, \code{count}, \code{percent}, \code{cum_count} and \code{cum_percent}. } \description{ Create a frequency table of a vector with items or a data frame. Supports quasiquotation and markdown for reports. \code{top_freq} can be used to get the top/bottom \emph{n} items of a frequency table, with counts as names. @@ -95,55 +95,66 @@ septic_patients[, "hospital_id"] \%>\% freq() septic_patients \%>\% freq("hospital_id") septic_patients \%>\% freq(hospital_id) #<- easiest to remember (tidyverse) + # you could also use `select` or `pull` to get your variables septic_patients \%>\% filter(hospital_id == "A") \%>\% select(mo) \%>\% freq() + # multiple selected variables will be pasted together septic_patients \%>\% left_join_microorganisms \%>\% filter(hospital_id == "A") \%>\% freq(genus, species) + # group a variable and analyse another septic_patients \%>\% group_by(hospital_id) \%>\% freq(gender) + # get top 10 bugs of hospital A as a vector septic_patients \%>\% filter(hospital_id == "A") \%>\% freq(mo) \%>\% top_freq(10) + # save frequency table to an object years <- septic_patients \%>\% mutate(year = format(date, "\%Y")) \%>\% freq(year) + # show only the top 5 years \%>\% print(nmax = 5) + # save to an object with formatted percentages years <- format(years) + # print a histogram of numeric values septic_patients \%>\% freq(age) \%>\% hist() + # or print all points to a regular plot septic_patients \%>\% freq(age) \%>\% plot() + # transform to a data.frame or tibble septic_patients \%>\% freq(age) \%>\% as.data.frame() + # or transform (back) to a vector septic_patients \%>\% freq(age) \%>\% @@ -155,11 +166,23 @@ identical(septic_patients \%>\% sort(), sort(septic_patients$age)) # TRUE -# it also supports `table` objects: + +# it also supports `table` objects table(septic_patients$gender, septic_patients$age) \%>\% freq(sep = " **sep** ") + +# only get selected columns +septic_patients \%>\% + freq(hospital_id) \%>\% + select(item, percent) + +septic_patients \%>\% + freq(hospital_id) \%>\% + select(-count, -cum_count) + + # check differences between frequency tables diff(freq(septic_patients$trim), freq(septic_patients$trsu)) diff --git a/tests/testthat/test-freq.R b/tests/testthat/test-freq.R index 2dba7dc7..fa59b4e4 100755 --- a/tests/testthat/test-freq.R +++ b/tests/testthat/test-freq.R @@ -11,6 +11,8 @@ test_that("frequency table works", { expect_equal(nrow(freq(septic_patients$date)), length(unique(septic_patients$date))) + expect_output(print(septic_patients %>% freq(age))) + expect_output(print(septic_patients %>% freq(age, nmax = 5))) expect_output(print(septic_patients %>% freq(age, nmax = Inf))) expect_output(print(freq(septic_patients$age, nmax = Inf))) expect_output(print(freq(septic_patients$age, nmax = NA))) @@ -123,6 +125,12 @@ test_that("frequency table works", { expect_error(septic_patients %>% freq(peni, oxac, clox, amox, amcl, ampi, pita, czol, cfep, cfur)) + # (un)select columns + expect_equal(septic_patients %>% freq(hospital_id) %>% select(item) %>% ncol(), + 1) + expect_equal(septic_patients %>% freq(hospital_id) %>% select(-item) %>% ncol(), + 4) + # run diff expect_output(print( diff(freq(septic_patients$amcl), diff --git a/tests/testthat/test-read.4d.R b/tests/testthat/test-read.4d.R new file mode 100644 index 00000000..9b99b1e5 --- /dev/null +++ b/tests/testthat/test-read.4d.R @@ -0,0 +1,30 @@ +context("read.4d.R") + +test_that("read 4D works", { + + library(dplyr) + test1 <- data.frame(Patientnr = "ABC", + MV = "M", + Monsternr = "0123", + Afnamedat = "10-11-12", + Bepaling = "bk", + Afd. = "ABC", + Spec = "ABC", + Matbijz. = "ABC", + Mat = "ABC", + Mocode = "esccol", + PENI = "R", + stringsAsFactors = FALSE) + tf <- tempfile() + write.table(test1, file = tf, quote = F, sep = "\t") + + x <- read.4D(tf, skip = 0) + unlink(tf) + + expect_equal(ncol(x), 11) + expect_equal(class(x$date_received), "Date") + expect_equal(class(x$mo), "mo") + expect_equal(as.character(x$mo), "B_ESCHR_COL") + expect_equal(is.rsi(x$peni), TRUE) + +})