From 4a027f3c34d9a0efa389342564d190eddfc7c9fc Mon Sep 17 00:00:00 2001
From: "Matthijs S. Berends" <m.s.berends@umcg.nl>
Date: Tue, 19 Jun 2018 15:20:14 +0200
Subject: [PATCH] extra unit tests, add row.names to freq

---
 NEWS.md                            |  9 ++++++---
 R/atc.R                            |  2 +-
 R/freq.R                           | 21 ++++++++++++++-------
 man/freq.Rd                        | 10 ++++++----
 tests/testthat/test-atc.R          | 24 ++++++++++++++++++++----
 tests/testthat/test-guess_bactid.R |  9 +++++++++
 vignettes/freq.R                   | 11 ++++++++---
 vignettes/freq.Rmd                 | 16 +++++++++++++---
 8 files changed, 77 insertions(+), 25 deletions(-)
 mode change 100755 => 100644 vignettes/freq.R

diff --git a/NEWS.md b/NEWS.md
index 2d487ebc8..5925d9a4f 100755
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,9 +4,12 @@
 * Added possibility to globally set the default for the amount of items to print in frequency tables (`freq` function), with `options(max.print.freq = n)`
 * Renamed `toConsole` parameter of `freq` function to `as.data.frame`
 * Small translational improvements to the `septic_patients` dataset
-* Combined MIC/RSI values will now be coerced by the `rsi` and `mic` functions: `as.rsi("<=0.002; S")` will return `S` and `as.mic("<=0.002; S")` will return `<=0.002`
-* It is now possible to coerce MIC values when there's a space between the operator and the value, i.e. `as.mic("<= 0.002")` now works
-* Added `"groups"` option for `atc_property(..., property)`. It will return a vector of the ATC hierarchy as defined by the [WHO](https://www.whocc.no/atc/structure_and_principles/). The new function `atc_groups` is a convenient wrapper around this. * Build-in host check for `atc_property` as it requires the host set by `url` to be responsive
+* Combined MIC/RSI values will now be coerced by the `rsi` and `mic` functions:
+  * `as.rsi("<=0.002; S")` will return `S`
+  * `as.mic("<=0.002; S")` will return `<=0.002`
+* Now possible to coerce MIC values with a space between operator and value, i.e. `as.mic("<= 0.002")` now works
+* Added `"groups"` option for `atc_property(..., property)`. It will return a vector of the ATC hierarchy as defined by the [WHO](https://www.whocc.no/atc/structure_and_principles/). The new function `atc_groups` is a convenient wrapper around this.
+* Build-in host check for `atc_property` as it requires the host set by `url` to be responsive
 * Improved `first_isolate` algorithm to exclude isolates where bacteria ID or genus is unavailable
 * Fix for warning *hybrid evaluation forced for row_number* ([`924b62`](https://github.com/tidyverse/dplyr/commit/924b62)) from the `dplyr` package v0.7.5 and above
 * Support for 1 or 2 columns as input for `guess_bactid`
diff --git a/R/atc.R b/R/atc.R
index a0737dce3..264086f89 100755
--- a/R/atc.R
+++ b/R/atc.R
@@ -263,7 +263,7 @@ abname <- function(abcode, from = c("guess", "atc", "molis", "umcg"), to = 'offi
   if (!from %in% colnames(antibiotics) |
       !to %in% colnames(antibiotics)) {
     stop(paste0('Invalid `from` or `to`. Choose one of ',
-                colnames(antibiotics) %>% paste(collapse = ","), '.'), call. = FALSE)
+                colnames(antibiotics) %>% paste(collapse = ", "), '.'), call. = FALSE)
   }
 
   abcode <- as.character(abcode)
diff --git a/R/freq.R b/R/freq.R
index 49705d841..77e15f472 100755
--- a/R/freq.R
+++ b/R/freq.R
@@ -23,6 +23,7 @@
 #' @param sort.count sort on count. Use \code{FALSE} to sort alphabetically on item.
 #' @param nmax number of row to print. The default, \code{15}, uses \code{\link{getOption}("max.print.freq")}. Use \code{nmax = 0} or \code{nmax = NA} to print all rows.
 #' @param na.rm a logical value indicating whether NA values should be removed from the frequency table. The header will always print the amount of \code{NA}s.
+#' @param row.names a logical value indicating whether row indices should be printed as \code{1:nrow(x)}
 #' @param markdown print table in markdown format (this forces \code{nmax = NA})
 #' @param as.data.frame return frequency table without header as a \code{data.frame} (e.g. to assign the table to an object)
 #' @param digits how many significant digits are to be used for numeric values (not for the items themselves, that depends on \code{\link{getOption}("digits")})
@@ -68,6 +69,7 @@ freq <- function(x,
                  sort.count = TRUE,
                  nmax = getOption("max.print.freq"),
                  na.rm = TRUE,
+                 row.names = TRUE,
                  markdown = FALSE,
                  as.data.frame = FALSE,
                  digits = 2,
@@ -222,11 +224,13 @@ freq <- function(x,
   }
   if (any(class(x) %in% c('Date', 'POSIXct', 'POSIXlt'))) {
     header <- header %>% paste0('\n')
-    mindatum <- x %>% min()
-    maxdatum <- x %>% max()
-    header <- header %>% paste0(markdown_line, '\nOldest:    ', mindatum %>% format(formatdates) %>% trimws())
-    header <- header %>% paste0(markdown_line, '\nNewest:    ', maxdatum %>% format(formatdates) %>% trimws(),
-                                ' (+', difftime(maxdatum, mindatum, units = 'auto') %>% as.double() %>% format(), ')')
+    mindate <- x %>% min(na.rm = TRUE)
+    maxdate <- x %>% max(na.rm = TRUE)
+    mediandate <- x %>% median(na.rm = TRUE)
+    header <- header %>% paste0(markdown_line, '\nOldest:    ', mindate %>% format(formatdates) %>% trimws())
+    header <- header %>% paste0(markdown_line, '\nNewest:    ', maxdate %>% format(formatdates) %>% trimws(),
+                                ' (+', difftime(maxdate, mindate, units = 'auto') %>% as.double() %>% format(), ')')
+    header <- header %>% paste0(markdown_line, '\nMedian:    ', mediandate %>% format(formatdates) %>% trimws())
   }
   if (any(class(x) == 'POSIXlt')) {
     x <- x %>% format(formatdates)
@@ -266,8 +270,9 @@ freq <- function(x,
   } else {
     df <- tibble::tibble(Item = x) %>%
       group_by(Item)
-    column_names <- column_names[1:5] # strip factor lvl
-    column_names_df <- column_names_df[1:5] # strip factor lvl
+    # strip factor lvl from col names
+    column_names <- column_names[1:length(column_names) - 1]
+    column_names_df <- column_names_df[1:length(column_names_df) - 1]
     column_align <- c(x_align, 'r', 'r', 'r', 'r')
   }
   df <- df %>%
@@ -333,6 +338,7 @@ freq <- function(x,
     print(
       knitr::kable(df2,
                    format = tblformat,
+                   row.names = row.names,
                    col.names = column_names,
                    align = column_align,
                    padding = 1)
@@ -354,6 +360,7 @@ freq <- function(x,
     print(
       knitr::kable(df,
                    format = tblformat,
+                   row.names = row.names,
                    col.names = column_names,
                    align = column_align,
                    padding = 1)
diff --git a/man/freq.Rd b/man/freq.Rd
index 51f9f7481..4126b5561 100755
--- a/man/freq.Rd
+++ b/man/freq.Rd
@@ -6,12 +6,12 @@
 \title{Frequency table}
 \usage{
 freq(x, sort.count = TRUE, nmax = getOption("max.print.freq"),
-  na.rm = TRUE, markdown = FALSE, as.data.frame = FALSE, digits = 2,
-  sep = " ")
+  na.rm = TRUE, row.names = TRUE, markdown = FALSE,
+  as.data.frame = FALSE, digits = 2, sep = " ")
 
 frequency_tbl(x, sort.count = TRUE, nmax = getOption("max.print.freq"),
-  na.rm = TRUE, markdown = FALSE, as.data.frame = FALSE, digits = 2,
-  sep = " ")
+  na.rm = TRUE, row.names = TRUE, markdown = FALSE,
+  as.data.frame = FALSE, digits = 2, sep = " ")
 }
 \arguments{
 \item{x}{data}
@@ -22,6 +22,8 @@ frequency_tbl(x, sort.count = TRUE, nmax = getOption("max.print.freq"),
 
 \item{na.rm}{a logical value indicating whether NA values should be removed from the frequency table. The header will always print the amount of \code{NA}s.}
 
+\item{row.names}{a logical value indicating whether row indices should be printed as \code{1:nrow(x)}}
+
 \item{markdown}{print table in markdown format (this forces \code{nmax = NA})}
 
 \item{as.data.frame}{return frequency table without header as a \code{data.frame} (e.g. to assign the table to an object)}
diff --git a/tests/testthat/test-atc.R b/tests/testthat/test-atc.R
index 81da027f1..2df635771 100755
--- a/tests/testthat/test-atc.R
+++ b/tests/testthat/test-atc.R
@@ -2,10 +2,19 @@ context("atc.R")
 
 test_that("atc_property works", {
   expect_equal(tolower(atc_property("J01CA04", property = "Name")), "amoxicillin")
-  expect_equivalent(atc_property("J01CA04", "DDD"), 1)
-  expect_equal(length(atc_property("J01CA04", property = "Groups")), 4)
+  expect_equal(atc_property("J01CA04", property = "unit"), "g")
+
+  expect_equal(atc_property("J01CA04", property = "DDD"),
+               atc_ddd("J01CA04"))
+
+  expect_identical(atc_property("J01CA04", property = "Groups"),
+                   atc_groups("J01CA04"))
+
+  expect_warning(atc_property("ABCDEFG", property = "DDD"))
+
   expect_error(atc_property("J01CA04", property = c(1:5)))
-  expect_error(atc_property("J01CA04", administration = c(1:5)))
+  expect_error(atc_property("J01CA04", property = "test"))
+  expect_error(atc_property("J01CA04", property = "test", administration = c(1:5)))
 })
 
 test_that("abname works", {
@@ -13,8 +22,15 @@ test_that("abname works", {
   expect_equal(abname(c("AMOX", "GENT")), c("Amoxicillin", "Gentamicin"))
   expect_equal(abname(c("AMOX+GENT")), "Amoxicillin + gentamicin")
   expect_equal(abname("AMOX", from = 'umcg'), "Amoxicillin")
-  expect_equal(abname("amox", from = 'molis'), "Amoxicillin")
+  expect_equal(abname("amox", from = 'molis', tolower = TRUE), "amoxicillin")
   expect_equal(abname("J01CA04", from = 'atc'), "Amoxicillin")
+  expect_equal(abname("AMOX", to = 'atc'), "J01CA04")
+  expect_equal(abname("AMOX en GENT"), "Amoxicillin + gentamicin")
+  expect_error(abname("AMOX", to = c(1:3)))
+  expect_error(abname("AMOX", to = "test"))
+  expect_warning(abname("TEST
+       "))
+  expect_warning(abname("AMOX or GENT"))
 })
 
 test_that("guess_atc works", {
diff --git a/tests/testthat/test-guess_bactid.R b/tests/testthat/test-guess_bactid.R
index 8bb7b0106..17d03d112 100644
--- a/tests/testthat/test-guess_bactid.R
+++ b/tests/testthat/test-guess_bactid.R
@@ -6,9 +6,18 @@ test_that("guess_bactid works", {
     c("ESCCOL", "HAEINF"))
 
   expect_equal(guess_bactid("Escherichia coli"), "ESCCOL")
+  expect_equal(guess_bactid("P. aer"), "PSEAER") # not Pasteurella aerogenes
 
   expect_equal(guess_bactid("Negative rods"), "GNR")
 
+  expect_equal(guess_bactid("MRSE"), "STAEPI")
+  expect_equal(guess_bactid("VRE"), "ENC")
+  expect_equal(guess_bactid("MRPA"), "PSEAER")
+  expect_equal(guess_bactid("PISP"), "STCPNE")
+  expect_equal(guess_bactid("PRSP"), "STCPNE")
+  expect_equal(guess_bactid("VISP"), "STCPNE")
+  expect_equal(guess_bactid("VRSP"), "STCPNE")
+
   expect_identical(
     guess_bactid(c("stau",
                    "STAU",
diff --git a/vignettes/freq.R b/vignettes/freq.R
old mode 100755
new mode 100644
index 84ddea088..74701955c
--- a/vignettes/freq.R
+++ b/vignettes/freq.R
@@ -7,13 +7,13 @@ library(dplyr)
 library(AMR)
 
 ## ---- echo = TRUE, results = 'hide'--------------------------------------
-# # just using base R
+# just using base R
 freq(septic_patients$sex)
 
-# # using base R to select the variable and pass it on with a pipe
+# using base R to select the variable and pass it on with a pipe
 septic_patients$sex %>% freq()
 
-# # do it all with pipes, using the `select` function of the dplyr package
+# do it all with pipes, using the `select` function of the dplyr package
 septic_patients %>%
   select(sex) %>%
   freq()
@@ -69,6 +69,11 @@ septic_patients %>%
   select(amox) %>% 
   freq(na.rm = FALSE)
 
+## ---- echo = TRUE--------------------------------------------------------
+septic_patients %>%
+  select(hospital_id) %>% 
+  freq(row.names = FALSE)
+
 ## ---- echo = TRUE--------------------------------------------------------
 septic_patients %>%
   select(hospital_id) %>% 
diff --git a/vignettes/freq.Rmd b/vignettes/freq.Rmd
index f8c2a14c7..7d625bc48 100755
--- a/vignettes/freq.Rmd
+++ b/vignettes/freq.Rmd
@@ -27,13 +27,13 @@ Frequency tables (or frequency distributions) are summaries of the distribution
 
 To only show and quickly review the content of one variable, you can just select this variable in various ways. Let's say we want to get the frequencies of the `sex` variable of the `septic_patients` dataset:
 ```{r, echo = TRUE, results = 'hide'}
-# # just using base R
+# just using base R
 freq(septic_patients$sex)
 
-# # using base R to select the variable and pass it on with a pipe
+# using base R to select the variable and pass it on with a pipe
 septic_patients$sex %>% freq()
 
-# # do it all with pipes, using the `select` function of the dplyr package
+# do it all with pipes, using the `select` function of the dplyr package
 septic_patients %>%
   select(sex) %>%
   freq()
@@ -143,12 +143,22 @@ septic_patients %>%
 
 ### Parameter `na.rm`
 With the `na.rm` parameter (defaults to `TRUE`, but they will always be shown into the header), you can include `NA` values in the frequency table:
+
 ```{r, echo = TRUE}
 septic_patients %>%
   select(amox) %>% 
   freq(na.rm = FALSE)
 ```
 
+### Parameter `row.names`
+The default frequency tables shows row indices. To remove them, use `row.names = FALSE`:
+
+```{r, echo = TRUE}
+septic_patients %>%
+  select(hospital_id) %>% 
+  freq(row.names = FALSE)
+```
+
 ### Parameter `markdown`
 The `markdown` parameter can be used in reports created with R Markdown. This will always print all rows: