From a8132922af37408048dd89b47c63d06dd73dfbe7 Mon Sep 17 00:00:00 2001
From: "Matthijs S. Berends" <m.s.berends@umcg.nl>
Date: Mon, 19 Nov 2018 13:00:22 +0100
Subject: [PATCH] unit test read.4d, unselecting freq cols

---
 DESCRIPTION                   |  4 +-
 NEWS.md                       |  8 +++-
 R/freq.R                      | 70 +++++++++++++++++++++++++++--------
 R/read.4d.R                   |  2 +-
 man/freq.Rd                   | 27 +++++++++++++-
 tests/testthat/test-freq.R    |  8 ++++
 tests/testthat/test-read.4d.R | 30 +++++++++++++++
 7 files changed, 127 insertions(+), 22 deletions(-)
 create mode 100644 tests/testthat/test-read.4d.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 28121a7a8..db62ab0ba 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: AMR
-Version: 0.4.0.9011
-Date: 2018-11-16
+Version: 0.4.0.9012
+Date: 2018-11-17
 Title: Antimicrobial Resistance Analysis
 Authors@R: c(
     person(
diff --git a/NEWS.md b/NEWS.md
index eae562805..979f82408 100755
--- a/NEWS.md
+++ b/NEWS.md
@@ -35,6 +35,12 @@
       group_by(hospital_id) %>% 
       freq(gender)
     ```
+  * Support for (un)selecting columns:
+    ```r
+    septic_patients %>% 
+      freq(hospital_id) %>% 
+      select(-count, -cum_count) # only get item, percent, cum_percent
+    ```
   * Check for `hms::is.hms`
   * Now prints in markdown at default in non-interactive sessions
   * No longer adds the factor level column and sorts factors on count again
@@ -43,7 +49,7 @@
   * New parameter `header` to turn it off (default when `markdown = TRUE`)
   * New parameter `title` to replace the automatically set title
 * `first_isolate` now tries to find columns to use as input when parameters are left blank
-* Improvement for MDRO algorithm (function `mdro`)
+* Improvements for MDRO algorithm (function `mdro`)
 * Data set `septic_patients` is now a `data.frame`, not a tibble anymore
 * Removed diacritics from all authors (columns `microorganisms$ref` and `microorganisms.old$ref`) to comply with CRAN policy to only allow ASCII characters
 * Fix for `mo_property` not working properly
diff --git a/R/freq.R b/R/freq.R
index d754a3d9b..a521b203a 100755
--- a/R/freq.R
+++ b/R/freq.R
@@ -66,7 +66,7 @@
 #' @keywords summary summarise frequency freq
 #' @rdname freq
 #' @name freq
-#' @return A \code{data.frame} with an additional class \code{"frequency_tbl"}
+#' @return A \code{data.frame} (with an additional class \code{"frequency_tbl"}) with five columns: \code{item}, \code{count}, \code{percent}, \code{cum_count} and \code{cum_percent}.
 #' @export
 #' @examples
 #' library(dplyr)
@@ -79,55 +79,66 @@
 #' septic_patients %>% freq("hospital_id")
 #' septic_patients %>% freq(hospital_id)  #<- easiest to remember (tidyverse)
 #'
+#'
 #' # you could also use `select` or `pull` to get your variables
 #' septic_patients %>%
 #'   filter(hospital_id == "A") %>%
 #'   select(mo) %>%
 #'   freq()
 #'
+#'
 #' # multiple selected variables will be pasted together
 #' septic_patients %>%
 #'   left_join_microorganisms %>%
 #'   filter(hospital_id == "A") %>%
 #'   freq(genus, species)
 #'
+#'
 #' # group a variable and analyse another
 #' septic_patients %>%
 #'   group_by(hospital_id) %>%
 #'   freq(gender)
 #'
+#'
 #' # get top 10 bugs of hospital A as a vector
 #' septic_patients %>%
 #'   filter(hospital_id == "A") %>%
 #'   freq(mo) %>%
 #'   top_freq(10)
 #'
+#'
 #' # save frequency table to an object
 #' years <- septic_patients %>%
 #'   mutate(year = format(date, "%Y")) %>%
 #'   freq(year)
 #'
+#'
 #' # show only the top 5
 #' years %>% print(nmax = 5)
 #'
+#'
 #' # save to an object with formatted percentages
 #' years <- format(years)
 #'
+#'
 #' # print a histogram of numeric values
 #' septic_patients %>%
 #'   freq(age) %>%
 #'   hist()
 #'
+#'
 #' # or print all points to a regular plot
 #' septic_patients %>%
 #'   freq(age) %>%
 #'   plot()
 #'
+#'
 #' # transform to a data.frame or tibble
 #' septic_patients %>%
 #'   freq(age) %>%
 #'   as.data.frame()
 #'
+#'
 #' # or transform (back) to a vector
 #' septic_patients %>%
 #'   freq(age) %>%
@@ -139,11 +150,23 @@
 #'             sort(),
 #'           sort(septic_patients$age)) # TRUE
 #'
-#' # it also supports `table` objects:
+#'
+#' # it also supports `table` objects
 #' table(septic_patients$gender,
 #'       septic_patients$age) %>%
 #'   freq(sep = " **sep** ")
 #'
+#'
+#' # only get selected columns
+#' septic_patients %>%
+#'   freq(hospital_id) %>%
+#'   select(item, percent)
+#'
+#' septic_patients %>%
+#'   freq(hospital_id) %>%
+#'   select(-count, -cum_count)
+#'
+#'
 #' # check differences between frequency tables
 #' diff(freq(septic_patients$trim),
 #'      freq(septic_patients$trsu))
@@ -569,6 +592,7 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
       }
       title <- paste(title, group_var)
     }
+    title <- paste("Frequency table of", trimws(title))
   } else {
     title <- opt$title
   }
@@ -592,12 +616,6 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
     opt$header <- header
   }
 
-  if (trimws(title) == "") {
-    title <- "Frequency table"
-  } else {
-    title <- paste("Frequency table of", trimws(title))
-  }
-
   # bold title
   if (opt$tbl_format == "pandoc") {
     title <- bold(title)
@@ -620,10 +638,6 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
     return(invisible())
   }
 
-  if (all(x$count == 1)) {
-    warning('All observations are unique.', call. = FALSE)
-  }
-
   # save old NA setting for kable
   opt.old <- options()$knitr.kable.NA
   if (is.null(opt$na)) {
@@ -668,10 +682,34 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
   if (any(class(x$item) %in% c('double', 'integer', 'numeric', 'raw', 'single'))) {
     x$item <- format(x$item)
   }
-  x$count <- format(x$count)
-  x$percent <- percent(x$percent, force_zero = TRUE)
-  x$cum_count <- format(x$cum_count)
-  x$cum_percent <- percent(x$cum_percent, force_zero = TRUE)
+  if ("item" %in% colnames(x)) {
+    x$item <- format(x$item)
+  } else {
+    opt$column_names <- opt$column_names[!opt$column_names == "Item"]
+  }
+  if ("count" %in% colnames(x)) {
+    if (all(x$count == 1)) {
+      warning('All observations are unique.', call. = FALSE)
+    }
+    x$count <- format(x$count)
+  } else {
+    opt$column_names <- opt$column_names[!opt$column_names == "Count"]
+  }
+  if ("percent" %in% colnames(x)) {
+    x$percent <- percent(x$percent, force_zero = TRUE)
+  } else {
+    opt$column_names <- opt$column_names[!opt$column_names == "Percent"]
+  }
+  if ("cum_count" %in% colnames(x)) {
+    x$cum_count <- format(x$cum_count)
+  } else {
+    opt$column_names <- opt$column_names[!opt$column_names == "Cum. Count"]
+  }
+  if ("cum_percent" %in% colnames(x)) {
+    x$cum_percent <- percent(x$cum_percent, force_zero = TRUE)
+  } else {
+    opt$column_names <- opt$column_names[!opt$column_names == "Cum. Percent"]
+  }
 
   if (opt$tbl_format == "markdown") {
     cat("\n")
diff --git a/R/read.4d.R b/R/read.4d.R
index 34e756268..ae1538de6 100644
--- a/R/read.4d.R
+++ b/R/read.4d.R
@@ -41,7 +41,7 @@ read.4D <- function(file,
                     encoding = "UTF-8") {
 
   if (info == TRUE) {
-    message("Importing data... ", appendLF = FALSE)
+    message("Importing ", file, "... ", appendLF = FALSE)
   }
   data_4D <- utils::read.table(file = file,
                                row.names = row.names,
diff --git a/man/freq.Rd b/man/freq.Rd
index cd7b0d671..ec2938729 100755
--- a/man/freq.Rd
+++ b/man/freq.Rd
@@ -54,7 +54,7 @@ top_freq(f, n)
 \item{n}{number of top \emph{n} items to return, use -n for the bottom \emph{n} items. It will include more than \code{n} rows if there are ties.}
 }
 \value{
-A \code{data.frame} with an additional class \code{"frequency_tbl"}
+A \code{data.frame} (with an additional class \code{"frequency_tbl"}) with five columns: \code{item}, \code{count}, \code{percent}, \code{cum_count} and \code{cum_percent}.
 }
 \description{
 Create a frequency table of a vector with items or a data frame. Supports quasiquotation and markdown for reports. \code{top_freq} can be used to get the top/bottom \emph{n} items of a frequency table, with counts as names.
@@ -95,55 +95,66 @@ septic_patients[, "hospital_id"] \%>\% freq()
 septic_patients \%>\% freq("hospital_id")
 septic_patients \%>\% freq(hospital_id)  #<- easiest to remember (tidyverse)
 
+
 # you could also use `select` or `pull` to get your variables
 septic_patients \%>\%
   filter(hospital_id == "A") \%>\%
   select(mo) \%>\%
   freq()
 
+
 # multiple selected variables will be pasted together
 septic_patients \%>\%
   left_join_microorganisms \%>\%
   filter(hospital_id == "A") \%>\%
   freq(genus, species)
 
+
 # group a variable and analyse another
 septic_patients \%>\%
   group_by(hospital_id) \%>\%
   freq(gender)
 
+
 # get top 10 bugs of hospital A as a vector
 septic_patients \%>\%
   filter(hospital_id == "A") \%>\%
   freq(mo) \%>\%
   top_freq(10)
 
+
 # save frequency table to an object
 years <- septic_patients \%>\%
   mutate(year = format(date, "\%Y")) \%>\%
   freq(year)
 
+
 # show only the top 5
 years \%>\% print(nmax = 5)
 
+
 # save to an object with formatted percentages
 years <- format(years)
 
+
 # print a histogram of numeric values
 septic_patients \%>\%
   freq(age) \%>\%
   hist()
 
+
 # or print all points to a regular plot
 septic_patients \%>\%
   freq(age) \%>\%
   plot()
 
+
 # transform to a data.frame or tibble
 septic_patients \%>\%
   freq(age) \%>\%
   as.data.frame()
 
+
 # or transform (back) to a vector
 septic_patients \%>\%
   freq(age) \%>\%
@@ -155,11 +166,23 @@ identical(septic_patients \%>\%
             sort(),
           sort(septic_patients$age)) # TRUE
 
-# it also supports `table` objects:
+
+# it also supports `table` objects
 table(septic_patients$gender,
       septic_patients$age) \%>\%
   freq(sep = " **sep** ")
 
+
+# only get selected columns
+septic_patients \%>\%
+  freq(hospital_id) \%>\%
+  select(item, percent)
+
+septic_patients \%>\%
+  freq(hospital_id) \%>\%
+  select(-count, -cum_count)
+
+
 # check differences between frequency tables
 diff(freq(septic_patients$trim),
      freq(septic_patients$trsu))
diff --git a/tests/testthat/test-freq.R b/tests/testthat/test-freq.R
index 2dba7dc7d..fa59b4e4f 100755
--- a/tests/testthat/test-freq.R
+++ b/tests/testthat/test-freq.R
@@ -11,6 +11,8 @@ test_that("frequency table works", {
   expect_equal(nrow(freq(septic_patients$date)),
                length(unique(septic_patients$date)))
 
+  expect_output(print(septic_patients %>% freq(age)))
+  expect_output(print(septic_patients %>% freq(age, nmax = 5)))
   expect_output(print(septic_patients %>% freq(age, nmax = Inf)))
   expect_output(print(freq(septic_patients$age, nmax = Inf)))
   expect_output(print(freq(septic_patients$age, nmax = NA)))
@@ -123,6 +125,12 @@ test_that("frequency table works", {
   expect_error(septic_patients %>% freq(peni, oxac, clox, amox, amcl,
                                         ampi, pita, czol, cfep, cfur))
 
+  # (un)select columns
+  expect_equal(septic_patients %>% freq(hospital_id) %>% select(item) %>% ncol(),
+               1)
+  expect_equal(septic_patients %>% freq(hospital_id) %>% select(-item) %>% ncol(),
+               4)
+
   # run diff
   expect_output(print(
     diff(freq(septic_patients$amcl),
diff --git a/tests/testthat/test-read.4d.R b/tests/testthat/test-read.4d.R
new file mode 100644
index 000000000..9b99b1e5f
--- /dev/null
+++ b/tests/testthat/test-read.4d.R
@@ -0,0 +1,30 @@
+context("read.4d.R")
+
+test_that("read 4D works", {
+
+  library(dplyr)
+  test1 <- data.frame(Patientnr = "ABC",
+                      MV = "M",
+                      Monsternr = "0123",
+                      Afnamedat = "10-11-12",
+                      Bepaling = "bk",
+                      Afd. = "ABC",
+                      Spec = "ABC",
+                      Matbijz. = "ABC",
+                      Mat = "ABC",
+                      Mocode = "esccol",
+                      PENI = "R",
+                      stringsAsFactors = FALSE)
+  tf <- tempfile()
+  write.table(test1, file = tf, quote = F, sep = "\t")
+
+  x <- read.4D(tf, skip = 0)
+  unlink(tf)
+
+  expect_equal(ncol(x), 11)
+  expect_equal(class(x$date_received), "Date")
+  expect_equal(class(x$mo), "mo")
+  expect_equal(as.character(x$mo), "B_ESCHR_COL")
+  expect_equal(is.rsi(x$peni), TRUE)
+
+})