From 4a027f3c34d9a0efa389342564d190eddfc7c9fc Mon Sep 17 00:00:00 2001 From: "Matthijs S. Berends" Date: Tue, 19 Jun 2018 15:20:14 +0200 Subject: [PATCH] extra unit tests, add row.names to freq --- NEWS.md | 9 ++++++--- R/atc.R | 2 +- R/freq.R | 21 ++++++++++++++------- man/freq.Rd | 10 ++++++---- tests/testthat/test-atc.R | 24 ++++++++++++++++++++---- tests/testthat/test-guess_bactid.R | 9 +++++++++ vignettes/freq.R | 11 ++++++++--- vignettes/freq.Rmd | 16 +++++++++++++--- 8 files changed, 77 insertions(+), 25 deletions(-) mode change 100755 => 100644 vignettes/freq.R diff --git a/NEWS.md b/NEWS.md index 2d487ebc..5925d9a4 100755 --- a/NEWS.md +++ b/NEWS.md @@ -4,9 +4,12 @@ * Added possibility to globally set the default for the amount of items to print in frequency tables (`freq` function), with `options(max.print.freq = n)` * Renamed `toConsole` parameter of `freq` function to `as.data.frame` * Small translational improvements to the `septic_patients` dataset -* Combined MIC/RSI values will now be coerced by the `rsi` and `mic` functions: `as.rsi("<=0.002; S")` will return `S` and `as.mic("<=0.002; S")` will return `<=0.002` -* It is now possible to coerce MIC values when there's a space between the operator and the value, i.e. `as.mic("<= 0.002")` now works -* Added `"groups"` option for `atc_property(..., property)`. It will return a vector of the ATC hierarchy as defined by the [WHO](https://www.whocc.no/atc/structure_and_principles/). The new function `atc_groups` is a convenient wrapper around this. * Build-in host check for `atc_property` as it requires the host set by `url` to be responsive +* Combined MIC/RSI values will now be coerced by the `rsi` and `mic` functions: + * `as.rsi("<=0.002; S")` will return `S` + * `as.mic("<=0.002; S")` will return `<=0.002` +* Now possible to coerce MIC values with a space between operator and value, i.e. `as.mic("<= 0.002")` now works +* Added `"groups"` option for `atc_property(..., property)`. It will return a vector of the ATC hierarchy as defined by the [WHO](https://www.whocc.no/atc/structure_and_principles/). The new function `atc_groups` is a convenient wrapper around this. +* Build-in host check for `atc_property` as it requires the host set by `url` to be responsive * Improved `first_isolate` algorithm to exclude isolates where bacteria ID or genus is unavailable * Fix for warning *hybrid evaluation forced for row_number* ([`924b62`](https://github.com/tidyverse/dplyr/commit/924b62)) from the `dplyr` package v0.7.5 and above * Support for 1 or 2 columns as input for `guess_bactid` diff --git a/R/atc.R b/R/atc.R index a0737dce..264086f8 100755 --- a/R/atc.R +++ b/R/atc.R @@ -263,7 +263,7 @@ abname <- function(abcode, from = c("guess", "atc", "molis", "umcg"), to = 'offi if (!from %in% colnames(antibiotics) | !to %in% colnames(antibiotics)) { stop(paste0('Invalid `from` or `to`. Choose one of ', - colnames(antibiotics) %>% paste(collapse = ","), '.'), call. = FALSE) + colnames(antibiotics) %>% paste(collapse = ", "), '.'), call. = FALSE) } abcode <- as.character(abcode) diff --git a/R/freq.R b/R/freq.R index 49705d84..77e15f47 100755 --- a/R/freq.R +++ b/R/freq.R @@ -23,6 +23,7 @@ #' @param sort.count sort on count. Use \code{FALSE} to sort alphabetically on item. #' @param nmax number of row to print. The default, \code{15}, uses \code{\link{getOption}("max.print.freq")}. Use \code{nmax = 0} or \code{nmax = NA} to print all rows. #' @param na.rm a logical value indicating whether NA values should be removed from the frequency table. The header will always print the amount of \code{NA}s. +#' @param row.names a logical value indicating whether row indices should be printed as \code{1:nrow(x)} #' @param markdown print table in markdown format (this forces \code{nmax = NA}) #' @param as.data.frame return frequency table without header as a \code{data.frame} (e.g. to assign the table to an object) #' @param digits how many significant digits are to be used for numeric values (not for the items themselves, that depends on \code{\link{getOption}("digits")}) @@ -68,6 +69,7 @@ freq <- function(x, sort.count = TRUE, nmax = getOption("max.print.freq"), na.rm = TRUE, + row.names = TRUE, markdown = FALSE, as.data.frame = FALSE, digits = 2, @@ -222,11 +224,13 @@ freq <- function(x, } if (any(class(x) %in% c('Date', 'POSIXct', 'POSIXlt'))) { header <- header %>% paste0('\n') - mindatum <- x %>% min() - maxdatum <- x %>% max() - header <- header %>% paste0(markdown_line, '\nOldest: ', mindatum %>% format(formatdates) %>% trimws()) - header <- header %>% paste0(markdown_line, '\nNewest: ', maxdatum %>% format(formatdates) %>% trimws(), - ' (+', difftime(maxdatum, mindatum, units = 'auto') %>% as.double() %>% format(), ')') + mindate <- x %>% min(na.rm = TRUE) + maxdate <- x %>% max(na.rm = TRUE) + mediandate <- x %>% median(na.rm = TRUE) + header <- header %>% paste0(markdown_line, '\nOldest: ', mindate %>% format(formatdates) %>% trimws()) + header <- header %>% paste0(markdown_line, '\nNewest: ', maxdate %>% format(formatdates) %>% trimws(), + ' (+', difftime(maxdate, mindate, units = 'auto') %>% as.double() %>% format(), ')') + header <- header %>% paste0(markdown_line, '\nMedian: ', mediandate %>% format(formatdates) %>% trimws()) } if (any(class(x) == 'POSIXlt')) { x <- x %>% format(formatdates) @@ -266,8 +270,9 @@ freq <- function(x, } else { df <- tibble::tibble(Item = x) %>% group_by(Item) - column_names <- column_names[1:5] # strip factor lvl - column_names_df <- column_names_df[1:5] # strip factor lvl + # strip factor lvl from col names + column_names <- column_names[1:length(column_names) - 1] + column_names_df <- column_names_df[1:length(column_names_df) - 1] column_align <- c(x_align, 'r', 'r', 'r', 'r') } df <- df %>% @@ -333,6 +338,7 @@ freq <- function(x, print( knitr::kable(df2, format = tblformat, + row.names = row.names, col.names = column_names, align = column_align, padding = 1) @@ -354,6 +360,7 @@ freq <- function(x, print( knitr::kable(df, format = tblformat, + row.names = row.names, col.names = column_names, align = column_align, padding = 1) diff --git a/man/freq.Rd b/man/freq.Rd index 51f9f748..4126b556 100755 --- a/man/freq.Rd +++ b/man/freq.Rd @@ -6,12 +6,12 @@ \title{Frequency table} \usage{ freq(x, sort.count = TRUE, nmax = getOption("max.print.freq"), - na.rm = TRUE, markdown = FALSE, as.data.frame = FALSE, digits = 2, - sep = " ") + na.rm = TRUE, row.names = TRUE, markdown = FALSE, + as.data.frame = FALSE, digits = 2, sep = " ") frequency_tbl(x, sort.count = TRUE, nmax = getOption("max.print.freq"), - na.rm = TRUE, markdown = FALSE, as.data.frame = FALSE, digits = 2, - sep = " ") + na.rm = TRUE, row.names = TRUE, markdown = FALSE, + as.data.frame = FALSE, digits = 2, sep = " ") } \arguments{ \item{x}{data} @@ -22,6 +22,8 @@ frequency_tbl(x, sort.count = TRUE, nmax = getOption("max.print.freq"), \item{na.rm}{a logical value indicating whether NA values should be removed from the frequency table. The header will always print the amount of \code{NA}s.} +\item{row.names}{a logical value indicating whether row indices should be printed as \code{1:nrow(x)}} + \item{markdown}{print table in markdown format (this forces \code{nmax = NA})} \item{as.data.frame}{return frequency table without header as a \code{data.frame} (e.g. to assign the table to an object)} diff --git a/tests/testthat/test-atc.R b/tests/testthat/test-atc.R index 81da027f..2df63577 100755 --- a/tests/testthat/test-atc.R +++ b/tests/testthat/test-atc.R @@ -2,10 +2,19 @@ context("atc.R") test_that("atc_property works", { expect_equal(tolower(atc_property("J01CA04", property = "Name")), "amoxicillin") - expect_equivalent(atc_property("J01CA04", "DDD"), 1) - expect_equal(length(atc_property("J01CA04", property = "Groups")), 4) + expect_equal(atc_property("J01CA04", property = "unit"), "g") + + expect_equal(atc_property("J01CA04", property = "DDD"), + atc_ddd("J01CA04")) + + expect_identical(atc_property("J01CA04", property = "Groups"), + atc_groups("J01CA04")) + + expect_warning(atc_property("ABCDEFG", property = "DDD")) + expect_error(atc_property("J01CA04", property = c(1:5))) - expect_error(atc_property("J01CA04", administration = c(1:5))) + expect_error(atc_property("J01CA04", property = "test")) + expect_error(atc_property("J01CA04", property = "test", administration = c(1:5))) }) test_that("abname works", { @@ -13,8 +22,15 @@ test_that("abname works", { expect_equal(abname(c("AMOX", "GENT")), c("Amoxicillin", "Gentamicin")) expect_equal(abname(c("AMOX+GENT")), "Amoxicillin + gentamicin") expect_equal(abname("AMOX", from = 'umcg'), "Amoxicillin") - expect_equal(abname("amox", from = 'molis'), "Amoxicillin") + expect_equal(abname("amox", from = 'molis', tolower = TRUE), "amoxicillin") expect_equal(abname("J01CA04", from = 'atc'), "Amoxicillin") + expect_equal(abname("AMOX", to = 'atc'), "J01CA04") + expect_equal(abname("AMOX en GENT"), "Amoxicillin + gentamicin") + expect_error(abname("AMOX", to = c(1:3))) + expect_error(abname("AMOX", to = "test")) + expect_warning(abname("TEST + ")) + expect_warning(abname("AMOX or GENT")) }) test_that("guess_atc works", { diff --git a/tests/testthat/test-guess_bactid.R b/tests/testthat/test-guess_bactid.R index 8bb7b010..17d03d11 100644 --- a/tests/testthat/test-guess_bactid.R +++ b/tests/testthat/test-guess_bactid.R @@ -6,9 +6,18 @@ test_that("guess_bactid works", { c("ESCCOL", "HAEINF")) expect_equal(guess_bactid("Escherichia coli"), "ESCCOL") + expect_equal(guess_bactid("P. aer"), "PSEAER") # not Pasteurella aerogenes expect_equal(guess_bactid("Negative rods"), "GNR") + expect_equal(guess_bactid("MRSE"), "STAEPI") + expect_equal(guess_bactid("VRE"), "ENC") + expect_equal(guess_bactid("MRPA"), "PSEAER") + expect_equal(guess_bactid("PISP"), "STCPNE") + expect_equal(guess_bactid("PRSP"), "STCPNE") + expect_equal(guess_bactid("VISP"), "STCPNE") + expect_equal(guess_bactid("VRSP"), "STCPNE") + expect_identical( guess_bactid(c("stau", "STAU", diff --git a/vignettes/freq.R b/vignettes/freq.R old mode 100755 new mode 100644 index 84ddea08..74701955 --- a/vignettes/freq.R +++ b/vignettes/freq.R @@ -7,13 +7,13 @@ library(dplyr) library(AMR) ## ---- echo = TRUE, results = 'hide'-------------------------------------- -# # just using base R +# just using base R freq(septic_patients$sex) -# # using base R to select the variable and pass it on with a pipe +# using base R to select the variable and pass it on with a pipe septic_patients$sex %>% freq() -# # do it all with pipes, using the `select` function of the dplyr package +# do it all with pipes, using the `select` function of the dplyr package septic_patients %>% select(sex) %>% freq() @@ -69,6 +69,11 @@ septic_patients %>% select(amox) %>% freq(na.rm = FALSE) +## ---- echo = TRUE-------------------------------------------------------- +septic_patients %>% + select(hospital_id) %>% + freq(row.names = FALSE) + ## ---- echo = TRUE-------------------------------------------------------- septic_patients %>% select(hospital_id) %>% diff --git a/vignettes/freq.Rmd b/vignettes/freq.Rmd index f8c2a14c..7d625bc4 100755 --- a/vignettes/freq.Rmd +++ b/vignettes/freq.Rmd @@ -27,13 +27,13 @@ Frequency tables (or frequency distributions) are summaries of the distribution To only show and quickly review the content of one variable, you can just select this variable in various ways. Let's say we want to get the frequencies of the `sex` variable of the `septic_patients` dataset: ```{r, echo = TRUE, results = 'hide'} -# # just using base R +# just using base R freq(septic_patients$sex) -# # using base R to select the variable and pass it on with a pipe +# using base R to select the variable and pass it on with a pipe septic_patients$sex %>% freq() -# # do it all with pipes, using the `select` function of the dplyr package +# do it all with pipes, using the `select` function of the dplyr package septic_patients %>% select(sex) %>% freq() @@ -143,12 +143,22 @@ septic_patients %>% ### Parameter `na.rm` With the `na.rm` parameter (defaults to `TRUE`, but they will always be shown into the header), you can include `NA` values in the frequency table: + ```{r, echo = TRUE} septic_patients %>% select(amox) %>% freq(na.rm = FALSE) ``` +### Parameter `row.names` +The default frequency tables shows row indices. To remove them, use `row.names = FALSE`: + +```{r, echo = TRUE} +septic_patients %>% + select(hospital_id) %>% + freq(row.names = FALSE) +``` + ### Parameter `markdown` The `markdown` parameter can be used in reports created with R Markdown. This will always print all rows: