extra unit tests, add row.names to freq

This commit is contained in:
dr. M.S. (Matthijs) Berends 2018-06-19 15:20:14 +02:00
parent bdc860e29c
commit 4a027f3c34
8 changed files with 77 additions and 25 deletions

View File

@ -4,9 +4,12 @@
* Added possibility to globally set the default for the amount of items to print in frequency tables (`freq` function), with `options(max.print.freq = n)`
* Renamed `toConsole` parameter of `freq` function to `as.data.frame`
* Small translational improvements to the `septic_patients` dataset
* Combined MIC/RSI values will now be coerced by the `rsi` and `mic` functions: `as.rsi("<=0.002; S")` will return `S` and `as.mic("<=0.002; S")` will return `<=0.002`
* It is now possible to coerce MIC values when there's a space between the operator and the value, i.e. `as.mic("<= 0.002")` now works
* Added `"groups"` option for `atc_property(..., property)`. It will return a vector of the ATC hierarchy as defined by the [WHO](https://www.whocc.no/atc/structure_and_principles/). The new function `atc_groups` is a convenient wrapper around this. * Build-in host check for `atc_property` as it requires the host set by `url` to be responsive
* Combined MIC/RSI values will now be coerced by the `rsi` and `mic` functions:
* `as.rsi("<=0.002; S")` will return `S`
* `as.mic("<=0.002; S")` will return `<=0.002`
* Now possible to coerce MIC values with a space between operator and value, i.e. `as.mic("<= 0.002")` now works
* Added `"groups"` option for `atc_property(..., property)`. It will return a vector of the ATC hierarchy as defined by the [WHO](https://www.whocc.no/atc/structure_and_principles/). The new function `atc_groups` is a convenient wrapper around this.
* Build-in host check for `atc_property` as it requires the host set by `url` to be responsive
* Improved `first_isolate` algorithm to exclude isolates where bacteria ID or genus is unavailable
* Fix for warning *hybrid evaluation forced for row_number* ([`924b62`](https://github.com/tidyverse/dplyr/commit/924b62)) from the `dplyr` package v0.7.5 and above
* Support for 1 or 2 columns as input for `guess_bactid`

View File

@ -263,7 +263,7 @@ abname <- function(abcode, from = c("guess", "atc", "molis", "umcg"), to = 'offi
if (!from %in% colnames(antibiotics) |
!to %in% colnames(antibiotics)) {
stop(paste0('Invalid `from` or `to`. Choose one of ',
colnames(antibiotics) %>% paste(collapse = ","), '.'), call. = FALSE)
colnames(antibiotics) %>% paste(collapse = ", "), '.'), call. = FALSE)
}
abcode <- as.character(abcode)

View File

@ -23,6 +23,7 @@
#' @param sort.count sort on count. Use \code{FALSE} to sort alphabetically on item.
#' @param nmax number of row to print. The default, \code{15}, uses \code{\link{getOption}("max.print.freq")}. Use \code{nmax = 0} or \code{nmax = NA} to print all rows.
#' @param na.rm a logical value indicating whether NA values should be removed from the frequency table. The header will always print the amount of \code{NA}s.
#' @param row.names a logical value indicating whether row indices should be printed as \code{1:nrow(x)}
#' @param markdown print table in markdown format (this forces \code{nmax = NA})
#' @param as.data.frame return frequency table without header as a \code{data.frame} (e.g. to assign the table to an object)
#' @param digits how many significant digits are to be used for numeric values (not for the items themselves, that depends on \code{\link{getOption}("digits")})
@ -68,6 +69,7 @@ freq <- function(x,
sort.count = TRUE,
nmax = getOption("max.print.freq"),
na.rm = TRUE,
row.names = TRUE,
markdown = FALSE,
as.data.frame = FALSE,
digits = 2,
@ -222,11 +224,13 @@ freq <- function(x,
}
if (any(class(x) %in% c('Date', 'POSIXct', 'POSIXlt'))) {
header <- header %>% paste0('\n')
mindatum <- x %>% min()
maxdatum <- x %>% max()
header <- header %>% paste0(markdown_line, '\nOldest: ', mindatum %>% format(formatdates) %>% trimws())
header <- header %>% paste0(markdown_line, '\nNewest: ', maxdatum %>% format(formatdates) %>% trimws(),
' (+', difftime(maxdatum, mindatum, units = 'auto') %>% as.double() %>% format(), ')')
mindate <- x %>% min(na.rm = TRUE)
maxdate <- x %>% max(na.rm = TRUE)
mediandate <- x %>% median(na.rm = TRUE)
header <- header %>% paste0(markdown_line, '\nOldest: ', mindate %>% format(formatdates) %>% trimws())
header <- header %>% paste0(markdown_line, '\nNewest: ', maxdate %>% format(formatdates) %>% trimws(),
' (+', difftime(maxdate, mindate, units = 'auto') %>% as.double() %>% format(), ')')
header <- header %>% paste0(markdown_line, '\nMedian: ', mediandate %>% format(formatdates) %>% trimws())
}
if (any(class(x) == 'POSIXlt')) {
x <- x %>% format(formatdates)
@ -266,8 +270,9 @@ freq <- function(x,
} else {
df <- tibble::tibble(Item = x) %>%
group_by(Item)
column_names <- column_names[1:5] # strip factor lvl
column_names_df <- column_names_df[1:5] # strip factor lvl
# strip factor lvl from col names
column_names <- column_names[1:length(column_names) - 1]
column_names_df <- column_names_df[1:length(column_names_df) - 1]
column_align <- c(x_align, 'r', 'r', 'r', 'r')
}
df <- df %>%
@ -333,6 +338,7 @@ freq <- function(x,
print(
knitr::kable(df2,
format = tblformat,
row.names = row.names,
col.names = column_names,
align = column_align,
padding = 1)
@ -354,6 +360,7 @@ freq <- function(x,
print(
knitr::kable(df,
format = tblformat,
row.names = row.names,
col.names = column_names,
align = column_align,
padding = 1)

View File

@ -6,12 +6,12 @@
\title{Frequency table}
\usage{
freq(x, sort.count = TRUE, nmax = getOption("max.print.freq"),
na.rm = TRUE, markdown = FALSE, as.data.frame = FALSE, digits = 2,
sep = " ")
na.rm = TRUE, row.names = TRUE, markdown = FALSE,
as.data.frame = FALSE, digits = 2, sep = " ")
frequency_tbl(x, sort.count = TRUE, nmax = getOption("max.print.freq"),
na.rm = TRUE, markdown = FALSE, as.data.frame = FALSE, digits = 2,
sep = " ")
na.rm = TRUE, row.names = TRUE, markdown = FALSE,
as.data.frame = FALSE, digits = 2, sep = " ")
}
\arguments{
\item{x}{data}
@ -22,6 +22,8 @@ frequency_tbl(x, sort.count = TRUE, nmax = getOption("max.print.freq"),
\item{na.rm}{a logical value indicating whether NA values should be removed from the frequency table. The header will always print the amount of \code{NA}s.}
\item{row.names}{a logical value indicating whether row indices should be printed as \code{1:nrow(x)}}
\item{markdown}{print table in markdown format (this forces \code{nmax = NA})}
\item{as.data.frame}{return frequency table without header as a \code{data.frame} (e.g. to assign the table to an object)}

View File

@ -2,10 +2,19 @@ context("atc.R")
test_that("atc_property works", {
expect_equal(tolower(atc_property("J01CA04", property = "Name")), "amoxicillin")
expect_equivalent(atc_property("J01CA04", "DDD"), 1)
expect_equal(length(atc_property("J01CA04", property = "Groups")), 4)
expect_equal(atc_property("J01CA04", property = "unit"), "g")
expect_equal(atc_property("J01CA04", property = "DDD"),
atc_ddd("J01CA04"))
expect_identical(atc_property("J01CA04", property = "Groups"),
atc_groups("J01CA04"))
expect_warning(atc_property("ABCDEFG", property = "DDD"))
expect_error(atc_property("J01CA04", property = c(1:5)))
expect_error(atc_property("J01CA04", administration = c(1:5)))
expect_error(atc_property("J01CA04", property = "test"))
expect_error(atc_property("J01CA04", property = "test", administration = c(1:5)))
})
test_that("abname works", {
@ -13,8 +22,15 @@ test_that("abname works", {
expect_equal(abname(c("AMOX", "GENT")), c("Amoxicillin", "Gentamicin"))
expect_equal(abname(c("AMOX+GENT")), "Amoxicillin + gentamicin")
expect_equal(abname("AMOX", from = 'umcg'), "Amoxicillin")
expect_equal(abname("amox", from = 'molis'), "Amoxicillin")
expect_equal(abname("amox", from = 'molis', tolower = TRUE), "amoxicillin")
expect_equal(abname("J01CA04", from = 'atc'), "Amoxicillin")
expect_equal(abname("AMOX", to = 'atc'), "J01CA04")
expect_equal(abname("AMOX en GENT"), "Amoxicillin + gentamicin")
expect_error(abname("AMOX", to = c(1:3)))
expect_error(abname("AMOX", to = "test"))
expect_warning(abname("TEST
"))
expect_warning(abname("AMOX or GENT"))
})
test_that("guess_atc works", {

View File

@ -6,9 +6,18 @@ test_that("guess_bactid works", {
c("ESCCOL", "HAEINF"))
expect_equal(guess_bactid("Escherichia coli"), "ESCCOL")
expect_equal(guess_bactid("P. aer"), "PSEAER") # not Pasteurella aerogenes
expect_equal(guess_bactid("Negative rods"), "GNR")
expect_equal(guess_bactid("MRSE"), "STAEPI")
expect_equal(guess_bactid("VRE"), "ENC")
expect_equal(guess_bactid("MRPA"), "PSEAER")
expect_equal(guess_bactid("PISP"), "STCPNE")
expect_equal(guess_bactid("PRSP"), "STCPNE")
expect_equal(guess_bactid("VISP"), "STCPNE")
expect_equal(guess_bactid("VRSP"), "STCPNE")
expect_identical(
guess_bactid(c("stau",
"STAU",

11
vignettes/freq.R Executable file → Normal file
View File

@ -7,13 +7,13 @@ library(dplyr)
library(AMR)
## ---- echo = TRUE, results = 'hide'--------------------------------------
# # just using base R
# just using base R
freq(septic_patients$sex)
# # using base R to select the variable and pass it on with a pipe
# using base R to select the variable and pass it on with a pipe
septic_patients$sex %>% freq()
# # do it all with pipes, using the `select` function of the dplyr package
# do it all with pipes, using the `select` function of the dplyr package
septic_patients %>%
select(sex) %>%
freq()
@ -69,6 +69,11 @@ septic_patients %>%
select(amox) %>%
freq(na.rm = FALSE)
## ---- echo = TRUE--------------------------------------------------------
septic_patients %>%
select(hospital_id) %>%
freq(row.names = FALSE)
## ---- echo = TRUE--------------------------------------------------------
septic_patients %>%
select(hospital_id) %>%

View File

@ -27,13 +27,13 @@ Frequency tables (or frequency distributions) are summaries of the distribution
To only show and quickly review the content of one variable, you can just select this variable in various ways. Let's say we want to get the frequencies of the `sex` variable of the `septic_patients` dataset:
```{r, echo = TRUE, results = 'hide'}
# # just using base R
# just using base R
freq(septic_patients$sex)
# # using base R to select the variable and pass it on with a pipe
# using base R to select the variable and pass it on with a pipe
septic_patients$sex %>% freq()
# # do it all with pipes, using the `select` function of the dplyr package
# do it all with pipes, using the `select` function of the dplyr package
septic_patients %>%
select(sex) %>%
freq()
@ -143,12 +143,22 @@ septic_patients %>%
### Parameter `na.rm`
With the `na.rm` parameter (defaults to `TRUE`, but they will always be shown into the header), you can include `NA` values in the frequency table:
```{r, echo = TRUE}
septic_patients %>%
select(amox) %>%
freq(na.rm = FALSE)
```
### Parameter `row.names`
The default frequency tables shows row indices. To remove them, use `row.names = FALSE`:
```{r, echo = TRUE}
septic_patients %>%
select(hospital_id) %>%
freq(row.names = FALSE)
```
### Parameter `markdown`
The `markdown` parameter can be used in reports created with R Markdown. This will always print all rows: