extra unit tests, add row.names to freq

2026-02-24 05:09:14 +01:00 · 2018-06-19 15:20:14 +02:00
parent bdc860e29c
commit 4a027f3c34
8 changed files with 77 additions and 25 deletions
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,9 +4,12 @@
 * Added possibility to globally set the default for the amount of items to print in frequency tables (`freq` function), with `options(max.print.freq = n)`
 * Renamed `toConsole` parameter of `freq` function to `as.data.frame`
 * Small translational improvements to the `septic_patients` dataset
-* Combined MIC/RSI values will now be coerced by the `rsi` and `mic` functions: `as.rsi("<=0.002; S")` will return `S` and `as.mic("<=0.002; S")` will return `<=0.002`
+* Combined MIC/RSI values will now be coerced by the `rsi` and `mic` functions:
-* It is now possible to coerce MIC values when there's a space between the operator and the value, i.e. `as.mic("<= 0.002")` now works
+  * `as.rsi("<=0.002; S")` will return `S`
-* Added `"groups"` option for `atc_property(..., property)`. It will return a vector of the ATC hierarchy as defined by the [WHO](https://www.whocc.no/atc/structure_and_principles/). The new function `atc_groups` is a convenient wrapper around this. * Build-in host check for `atc_property` as it requires the host set by `url` to be responsive
+  * `as.mic("<=0.002; S")` will return `<=0.002`
 * Now possible to coerce MIC values with a space between operator and value, i.e. `as.mic("<= 0.002")` now works
 * Added `"groups"` option for `atc_property(..., property)`. It will return a vector of the ATC hierarchy as defined by the [WHO](https://www.whocc.no/atc/structure_and_principles/). The new function `atc_groups` is a convenient wrapper around this.
 * Build-in host check for `atc_property` as it requires the host set by `url` to be responsive
 * Improved `first_isolate` algorithm to exclude isolates where bacteria ID or genus is unavailable
 * Fix for warning *hybrid evaluation forced for row_number* ([`924b62`](https://github.com/tidyverse/dplyr/commit/924b62)) from the `dplyr` package v0.7.5 and above
 * Support for 1 or 2 columns as input for `guess_bactid`
--- a/R/atc.R
+++ b/R/atc.R
@@ -263,7 +263,7 @@ abname <- function(abcode, from = c("guess", "atc", "molis", "umcg"), to = 'offi
  if (!from %in% colnames(antibiotics) |
      !to %in% colnames(antibiotics)) {
    stop(paste0('Invalid `from` or `to`. Choose one of ',
-                colnames(antibiotics) %>% paste(collapse = ","), '.'), call. = FALSE)
+                colnames(antibiotics) %>% paste(collapse = ", "), '.'), call. = FALSE)
  }
  abcode <- as.character(abcode)
--- a/R/freq.R
+++ b/R/freq.R
@@ -23,6 +23,7 @@
 #' @param sort.count sort on count. Use \code{FALSE} to sort alphabetically on item.
 #' @param nmax number of row to print. The default, \code{15}, uses \code{\link{getOption}("max.print.freq")}. Use \code{nmax = 0} or \code{nmax = NA} to print all rows.
 #' @param na.rm a logical value indicating whether NA values should be removed from the frequency table. The header will always print the amount of \code{NA}s.
 #' @param row.names a logical value indicating whether row indices should be printed as \code{1:nrow(x)}
 #' @param markdown print table in markdown format (this forces \code{nmax = NA})
 #' @param as.data.frame return frequency table without header as a \code{data.frame} (e.g. to assign the table to an object)
 #' @param digits how many significant digits are to be used for numeric values (not for the items themselves, that depends on \code{\link{getOption}("digits")})
@@ -68,6 +69,7 @@ freq <- function(x,
                 sort.count = TRUE,
                 nmax = getOption("max.print.freq"),
                 na.rm = TRUE,
                 row.names = TRUE,
                 markdown = FALSE,
                 as.data.frame = FALSE,
                 digits = 2,
@@ -222,11 +224,13 @@ freq <- function(x,
  }
  if (any(class(x) %in% c('Date', 'POSIXct', 'POSIXlt'))) {
    header <- header %>% paste0('\n')
-    mindatum <- x %>% min()
+    mindate <- x %>% min(na.rm = TRUE)
-    maxdatum <- x %>% max()
+    maxdate <- x %>% max(na.rm = TRUE)
-    header <- header %>% paste0(markdown_line, '\nOldest:    ', mindatum %>% format(formatdates) %>% trimws())
+    mediandate <- x %>% median(na.rm = TRUE)
-    header <- header %>% paste0(markdown_line, '\nNewest:    ', maxdatum %>% format(formatdates) %>% trimws(),
+    header <- header %>% paste0(markdown_line, '\nOldest:    ', mindate %>% format(formatdates) %>% trimws())
-                                ' (+', difftime(maxdatum, mindatum, units = 'auto') %>% as.double() %>% format(), ')')
+    header <- header %>% paste0(markdown_line, '\nNewest:    ', maxdate %>% format(formatdates) %>% trimws(),
                                ' (+', difftime(maxdate, mindate, units = 'auto') %>% as.double() %>% format(), ')')
    header <- header %>% paste0(markdown_line, '\nMedian:    ', mediandate %>% format(formatdates) %>% trimws())
  }
  if (any(class(x) == 'POSIXlt')) {
    x <- x %>% format(formatdates)
@@ -266,8 +270,9 @@ freq <- function(x,
  } else {
    df <- tibble::tibble(Item = x) %>%
      group_by(Item)
-    column_names <- column_names[1:5] # strip factor lvl
+    # strip factor lvl from col names
-    column_names_df <- column_names_df[1:5] # strip factor lvl
+    column_names <- column_names[1:length(column_names) - 1]
    column_names_df <- column_names_df[1:length(column_names_df) - 1]
    column_align <- c(x_align, 'r', 'r', 'r', 'r')
  }
  df <- df %>%
@@ -333,6 +338,7 @@ freq <- function(x,
    print(
      knitr::kable(df2,
                   format = tblformat,
                   row.names = row.names,
                   col.names = column_names,
                   align = column_align,
                   padding = 1)
@@ -354,6 +360,7 @@ freq <- function(x,
    print(
      knitr::kable(df,
                   format = tblformat,
                   row.names = row.names,
                   col.names = column_names,
                   align = column_align,
                   padding = 1)
--- a/man/freq.Rd
+++ b/man/freq.Rd
@@ -6,12 +6,12 @@
 \title{Frequency table}
 \usage{
 freq(x, sort.count = TRUE, nmax = getOption("max.print.freq"),
-  na.rm = TRUE, markdown = FALSE, as.data.frame = FALSE, digits = 2,
+  na.rm = TRUE, row.names = TRUE, markdown = FALSE,
-  sep = " ")
+  as.data.frame = FALSE, digits = 2, sep = " ")
 frequency_tbl(x, sort.count = TRUE, nmax = getOption("max.print.freq"),
-  na.rm = TRUE, markdown = FALSE, as.data.frame = FALSE, digits = 2,
+  na.rm = TRUE, row.names = TRUE, markdown = FALSE,
-  sep = " ")
+  as.data.frame = FALSE, digits = 2, sep = " ")
 }
 \arguments{
 \item{x}{data}
@@ -22,6 +22,8 @@ frequency_tbl(x, sort.count = TRUE, nmax = getOption("max.print.freq"),
 \item{na.rm}{a logical value indicating whether NA values should be removed from the frequency table. The header will always print the amount of \code{NA}s.}
 \item{row.names}{a logical value indicating whether row indices should be printed as \code{1:nrow(x)}}
 \item{markdown}{print table in markdown format (this forces \code{nmax = NA})}
 \item{as.data.frame}{return frequency table without header as a \code{data.frame} (e.g. to assign the table to an object)}
--- a/tests/testthat/test-atc.R
+++ b/tests/testthat/test-atc.R
@@ -2,10 +2,19 @@ context("atc.R")
 test_that("atc_property works", {
  expect_equal(tolower(atc_property("J01CA04", property = "Name")), "amoxicillin")
-  expect_equivalent(atc_property("J01CA04", "DDD"), 1)
+  expect_equal(atc_property("J01CA04", property = "unit"), "g")
-  expect_equal(length(atc_property("J01CA04", property = "Groups")), 4)
+
  expect_equal(atc_property("J01CA04", property = "DDD"),
               atc_ddd("J01CA04"))
  expect_identical(atc_property("J01CA04", property = "Groups"),
                   atc_groups("J01CA04"))
  expect_warning(atc_property("ABCDEFG", property = "DDD"))
  expect_error(atc_property("J01CA04", property = c(1:5)))
-  expect_error(atc_property("J01CA04", administration = c(1:5)))
+  expect_error(atc_property("J01CA04", property = "test"))
  expect_error(atc_property("J01CA04", property = "test", administration = c(1:5)))
 })
 test_that("abname works", {
@@ -13,8 +22,15 @@ test_that("abname works", {
  expect_equal(abname(c("AMOX", "GENT")), c("Amoxicillin", "Gentamicin"))
  expect_equal(abname(c("AMOX+GENT")), "Amoxicillin + gentamicin")
  expect_equal(abname("AMOX", from = 'umcg'), "Amoxicillin")
-  expect_equal(abname("amox", from = 'molis'), "Amoxicillin")
+  expect_equal(abname("amox", from = 'molis', tolower = TRUE), "amoxicillin")
  expect_equal(abname("J01CA04", from = 'atc'), "Amoxicillin")
  expect_equal(abname("AMOX", to = 'atc'), "J01CA04")
  expect_equal(abname("AMOX en GENT"), "Amoxicillin + gentamicin")
  expect_error(abname("AMOX", to = c(1:3)))
  expect_error(abname("AMOX", to = "test"))
  expect_warning(abname("TEST
       "))
  expect_warning(abname("AMOX or GENT"))
 })
 test_that("guess_atc works", {
--- a/tests/testthat/test-guess_bactid.R
+++ b/tests/testthat/test-guess_bactid.R
@@ -6,9 +6,18 @@ test_that("guess_bactid works", {
    c("ESCCOL", "HAEINF"))
  expect_equal(guess_bactid("Escherichia coli"), "ESCCOL")
  expect_equal(guess_bactid("P. aer"), "PSEAER") # not Pasteurella aerogenes
  expect_equal(guess_bactid("Negative rods"), "GNR")
  expect_equal(guess_bactid("MRSE"), "STAEPI")
  expect_equal(guess_bactid("VRE"), "ENC")
  expect_equal(guess_bactid("MRPA"), "PSEAER")
  expect_equal(guess_bactid("PISP"), "STCPNE")
  expect_equal(guess_bactid("PRSP"), "STCPNE")
  expect_equal(guess_bactid("VISP"), "STCPNE")
  expect_equal(guess_bactid("VRSP"), "STCPNE")
  expect_identical(
    guess_bactid(c("stau",
                   "STAU",
--- a/vignettes/freq.R
+++ b/vignettes/freq.R
@@ -7,13 +7,13 @@ library(dplyr)
 library(AMR)
 ## ---- echo = TRUE, results = 'hide'--------------------------------------
-# # just using base R
+# just using base R
 freq(septic_patients$sex)
-# # using base R to select the variable and pass it on with a pipe
+# using base R to select the variable and pass it on with a pipe
 septic_patients$sex %>% freq()
-# # do it all with pipes, using the `select` function of the dplyr package
+# do it all with pipes, using the `select` function of the dplyr package
 septic_patients %>%
  select(sex) %>%
  freq()
@@ -69,6 +69,11 @@ septic_patients %>%
  select(amox) %>% 
  freq(na.rm = FALSE)
 ## ---- echo = TRUE--------------------------------------------------------
 septic_patients %>%
  select(hospital_id) %>% 
  freq(row.names = FALSE)
 ## ---- echo = TRUE--------------------------------------------------------
 septic_patients %>%
  select(hospital_id) %>% 
--- a/vignettes/freq.Rmd
+++ b/vignettes/freq.Rmd
@@ -27,13 +27,13 @@ Frequency tables (or frequency distributions) are summaries of the distribution
 To only show and quickly review the content of one variable, you can just select this variable in various ways. Let's say we want to get the frequencies of the `sex` variable of the `septic_patients` dataset:
 ```{r, echo = TRUE, results = 'hide'}
-# # just using base R
+# just using base R
 freq(septic_patients$sex)
-# # using base R to select the variable and pass it on with a pipe
+# using base R to select the variable and pass it on with a pipe
 septic_patients$sex %>% freq()
-# # do it all with pipes, using the `select` function of the dplyr package
+# do it all with pipes, using the `select` function of the dplyr package
 septic_patients %>%
  select(sex) %>%
  freq()
@@ -143,12 +143,22 @@ septic_patients %>%
 ### Parameter `na.rm`
 With the `na.rm` parameter (defaults to `TRUE`, but they will always be shown into the header), you can include `NA` values in the frequency table:
 ```{r, echo = TRUE}
 septic_patients %>%
  select(amox) %>% 
  freq(na.rm = FALSE)
 ```
 ### Parameter `row.names`
 The default frequency tables shows row indices. To remove them, use `row.names = FALSE`:
 ```{r, echo = TRUE}
 septic_patients %>%
  select(hospital_id) %>% 
  freq(row.names = FALSE)
 ```
 ### Parameter `markdown`
 The `markdown` parameter can be used in reports created with R Markdown. This will always print all rows: