1
0
mirror of https://github.com/msberends/AMR.git synced 2024-12-26 08:06:12 +01:00

unit test read.4d, unselecting freq cols

This commit is contained in:
dr. M.S. (Matthijs) Berends 2018-11-19 13:00:22 +01:00
parent 83c7da85ab
commit a8132922af
7 changed files with 127 additions and 22 deletions

View File

@ -1,6 +1,6 @@
Package: AMR Package: AMR
Version: 0.4.0.9011 Version: 0.4.0.9012
Date: 2018-11-16 Date: 2018-11-17
Title: Antimicrobial Resistance Analysis Title: Antimicrobial Resistance Analysis
Authors@R: c( Authors@R: c(
person( person(

View File

@ -35,6 +35,12 @@
group_by(hospital_id) %>% group_by(hospital_id) %>%
freq(gender) freq(gender)
``` ```
* Support for (un)selecting columns:
```r
septic_patients %>%
freq(hospital_id) %>%
select(-count, -cum_count) # only get item, percent, cum_percent
```
* Check for `hms::is.hms` * Check for `hms::is.hms`
* Now prints in markdown at default in non-interactive sessions * Now prints in markdown at default in non-interactive sessions
* No longer adds the factor level column and sorts factors on count again * No longer adds the factor level column and sorts factors on count again
@ -43,7 +49,7 @@
* New parameter `header` to turn it off (default when `markdown = TRUE`) * New parameter `header` to turn it off (default when `markdown = TRUE`)
* New parameter `title` to replace the automatically set title * New parameter `title` to replace the automatically set title
* `first_isolate` now tries to find columns to use as input when parameters are left blank * `first_isolate` now tries to find columns to use as input when parameters are left blank
* Improvement for MDRO algorithm (function `mdro`) * Improvements for MDRO algorithm (function `mdro`)
* Data set `septic_patients` is now a `data.frame`, not a tibble anymore * Data set `septic_patients` is now a `data.frame`, not a tibble anymore
* Removed diacritics from all authors (columns `microorganisms$ref` and `microorganisms.old$ref`) to comply with CRAN policy to only allow ASCII characters * Removed diacritics from all authors (columns `microorganisms$ref` and `microorganisms.old$ref`) to comply with CRAN policy to only allow ASCII characters
* Fix for `mo_property` not working properly * Fix for `mo_property` not working properly

View File

@ -66,7 +66,7 @@
#' @keywords summary summarise frequency freq #' @keywords summary summarise frequency freq
#' @rdname freq #' @rdname freq
#' @name freq #' @name freq
#' @return A \code{data.frame} with an additional class \code{"frequency_tbl"} #' @return A \code{data.frame} (with an additional class \code{"frequency_tbl"}) with five columns: \code{item}, \code{count}, \code{percent}, \code{cum_count} and \code{cum_percent}.
#' @export #' @export
#' @examples #' @examples
#' library(dplyr) #' library(dplyr)
@ -79,55 +79,66 @@
#' septic_patients %>% freq("hospital_id") #' septic_patients %>% freq("hospital_id")
#' septic_patients %>% freq(hospital_id) #<- easiest to remember (tidyverse) #' septic_patients %>% freq(hospital_id) #<- easiest to remember (tidyverse)
#' #'
#'
#' # you could also use `select` or `pull` to get your variables #' # you could also use `select` or `pull` to get your variables
#' septic_patients %>% #' septic_patients %>%
#' filter(hospital_id == "A") %>% #' filter(hospital_id == "A") %>%
#' select(mo) %>% #' select(mo) %>%
#' freq() #' freq()
#' #'
#'
#' # multiple selected variables will be pasted together #' # multiple selected variables will be pasted together
#' septic_patients %>% #' septic_patients %>%
#' left_join_microorganisms %>% #' left_join_microorganisms %>%
#' filter(hospital_id == "A") %>% #' filter(hospital_id == "A") %>%
#' freq(genus, species) #' freq(genus, species)
#' #'
#'
#' # group a variable and analyse another #' # group a variable and analyse another
#' septic_patients %>% #' septic_patients %>%
#' group_by(hospital_id) %>% #' group_by(hospital_id) %>%
#' freq(gender) #' freq(gender)
#' #'
#'
#' # get top 10 bugs of hospital A as a vector #' # get top 10 bugs of hospital A as a vector
#' septic_patients %>% #' septic_patients %>%
#' filter(hospital_id == "A") %>% #' filter(hospital_id == "A") %>%
#' freq(mo) %>% #' freq(mo) %>%
#' top_freq(10) #' top_freq(10)
#' #'
#'
#' # save frequency table to an object #' # save frequency table to an object
#' years <- septic_patients %>% #' years <- septic_patients %>%
#' mutate(year = format(date, "%Y")) %>% #' mutate(year = format(date, "%Y")) %>%
#' freq(year) #' freq(year)
#' #'
#'
#' # show only the top 5 #' # show only the top 5
#' years %>% print(nmax = 5) #' years %>% print(nmax = 5)
#' #'
#'
#' # save to an object with formatted percentages #' # save to an object with formatted percentages
#' years <- format(years) #' years <- format(years)
#' #'
#'
#' # print a histogram of numeric values #' # print a histogram of numeric values
#' septic_patients %>% #' septic_patients %>%
#' freq(age) %>% #' freq(age) %>%
#' hist() #' hist()
#' #'
#'
#' # or print all points to a regular plot #' # or print all points to a regular plot
#' septic_patients %>% #' septic_patients %>%
#' freq(age) %>% #' freq(age) %>%
#' plot() #' plot()
#' #'
#'
#' # transform to a data.frame or tibble #' # transform to a data.frame or tibble
#' septic_patients %>% #' septic_patients %>%
#' freq(age) %>% #' freq(age) %>%
#' as.data.frame() #' as.data.frame()
#' #'
#'
#' # or transform (back) to a vector #' # or transform (back) to a vector
#' septic_patients %>% #' septic_patients %>%
#' freq(age) %>% #' freq(age) %>%
@ -139,11 +150,23 @@
#' sort(), #' sort(),
#' sort(septic_patients$age)) # TRUE #' sort(septic_patients$age)) # TRUE
#' #'
#' # it also supports `table` objects: #'
#' # it also supports `table` objects
#' table(septic_patients$gender, #' table(septic_patients$gender,
#' septic_patients$age) %>% #' septic_patients$age) %>%
#' freq(sep = " **sep** ") #' freq(sep = " **sep** ")
#' #'
#'
#' # only get selected columns
#' septic_patients %>%
#' freq(hospital_id) %>%
#' select(item, percent)
#'
#' septic_patients %>%
#' freq(hospital_id) %>%
#' select(-count, -cum_count)
#'
#'
#' # check differences between frequency tables #' # check differences between frequency tables
#' diff(freq(septic_patients$trim), #' diff(freq(septic_patients$trim),
#' freq(septic_patients$trsu)) #' freq(septic_patients$trsu))
@ -569,6 +592,7 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
} }
title <- paste(title, group_var) title <- paste(title, group_var)
} }
title <- paste("Frequency table of", trimws(title))
} else { } else {
title <- opt$title title <- opt$title
} }
@ -592,12 +616,6 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
opt$header <- header opt$header <- header
} }
if (trimws(title) == "") {
title <- "Frequency table"
} else {
title <- paste("Frequency table of", trimws(title))
}
# bold title # bold title
if (opt$tbl_format == "pandoc") { if (opt$tbl_format == "pandoc") {
title <- bold(title) title <- bold(title)
@ -620,10 +638,6 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
return(invisible()) return(invisible())
} }
if (all(x$count == 1)) {
warning('All observations are unique.', call. = FALSE)
}
# save old NA setting for kable # save old NA setting for kable
opt.old <- options()$knitr.kable.NA opt.old <- options()$knitr.kable.NA
if (is.null(opt$na)) { if (is.null(opt$na)) {
@ -668,10 +682,34 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
if (any(class(x$item) %in% c('double', 'integer', 'numeric', 'raw', 'single'))) { if (any(class(x$item) %in% c('double', 'integer', 'numeric', 'raw', 'single'))) {
x$item <- format(x$item) x$item <- format(x$item)
} }
if ("item" %in% colnames(x)) {
x$item <- format(x$item)
} else {
opt$column_names <- opt$column_names[!opt$column_names == "Item"]
}
if ("count" %in% colnames(x)) {
if (all(x$count == 1)) {
warning('All observations are unique.', call. = FALSE)
}
x$count <- format(x$count) x$count <- format(x$count)
} else {
opt$column_names <- opt$column_names[!opt$column_names == "Count"]
}
if ("percent" %in% colnames(x)) {
x$percent <- percent(x$percent, force_zero = TRUE) x$percent <- percent(x$percent, force_zero = TRUE)
} else {
opt$column_names <- opt$column_names[!opt$column_names == "Percent"]
}
if ("cum_count" %in% colnames(x)) {
x$cum_count <- format(x$cum_count) x$cum_count <- format(x$cum_count)
} else {
opt$column_names <- opt$column_names[!opt$column_names == "Cum. Count"]
}
if ("cum_percent" %in% colnames(x)) {
x$cum_percent <- percent(x$cum_percent, force_zero = TRUE) x$cum_percent <- percent(x$cum_percent, force_zero = TRUE)
} else {
opt$column_names <- opt$column_names[!opt$column_names == "Cum. Percent"]
}
if (opt$tbl_format == "markdown") { if (opt$tbl_format == "markdown") {
cat("\n") cat("\n")

View File

@ -41,7 +41,7 @@ read.4D <- function(file,
encoding = "UTF-8") { encoding = "UTF-8") {
if (info == TRUE) { if (info == TRUE) {
message("Importing data... ", appendLF = FALSE) message("Importing ", file, "... ", appendLF = FALSE)
} }
data_4D <- utils::read.table(file = file, data_4D <- utils::read.table(file = file,
row.names = row.names, row.names = row.names,

View File

@ -54,7 +54,7 @@ top_freq(f, n)
\item{n}{number of top \emph{n} items to return, use -n for the bottom \emph{n} items. It will include more than \code{n} rows if there are ties.} \item{n}{number of top \emph{n} items to return, use -n for the bottom \emph{n} items. It will include more than \code{n} rows if there are ties.}
} }
\value{ \value{
A \code{data.frame} with an additional class \code{"frequency_tbl"} A \code{data.frame} (with an additional class \code{"frequency_tbl"}) with five columns: \code{item}, \code{count}, \code{percent}, \code{cum_count} and \code{cum_percent}.
} }
\description{ \description{
Create a frequency table of a vector with items or a data frame. Supports quasiquotation and markdown for reports. \code{top_freq} can be used to get the top/bottom \emph{n} items of a frequency table, with counts as names. Create a frequency table of a vector with items or a data frame. Supports quasiquotation and markdown for reports. \code{top_freq} can be used to get the top/bottom \emph{n} items of a frequency table, with counts as names.
@ -95,55 +95,66 @@ septic_patients[, "hospital_id"] \%>\% freq()
septic_patients \%>\% freq("hospital_id") septic_patients \%>\% freq("hospital_id")
septic_patients \%>\% freq(hospital_id) #<- easiest to remember (tidyverse) septic_patients \%>\% freq(hospital_id) #<- easiest to remember (tidyverse)
# you could also use `select` or `pull` to get your variables # you could also use `select` or `pull` to get your variables
septic_patients \%>\% septic_patients \%>\%
filter(hospital_id == "A") \%>\% filter(hospital_id == "A") \%>\%
select(mo) \%>\% select(mo) \%>\%
freq() freq()
# multiple selected variables will be pasted together # multiple selected variables will be pasted together
septic_patients \%>\% septic_patients \%>\%
left_join_microorganisms \%>\% left_join_microorganisms \%>\%
filter(hospital_id == "A") \%>\% filter(hospital_id == "A") \%>\%
freq(genus, species) freq(genus, species)
# group a variable and analyse another # group a variable and analyse another
septic_patients \%>\% septic_patients \%>\%
group_by(hospital_id) \%>\% group_by(hospital_id) \%>\%
freq(gender) freq(gender)
# get top 10 bugs of hospital A as a vector # get top 10 bugs of hospital A as a vector
septic_patients \%>\% septic_patients \%>\%
filter(hospital_id == "A") \%>\% filter(hospital_id == "A") \%>\%
freq(mo) \%>\% freq(mo) \%>\%
top_freq(10) top_freq(10)
# save frequency table to an object # save frequency table to an object
years <- septic_patients \%>\% years <- septic_patients \%>\%
mutate(year = format(date, "\%Y")) \%>\% mutate(year = format(date, "\%Y")) \%>\%
freq(year) freq(year)
# show only the top 5 # show only the top 5
years \%>\% print(nmax = 5) years \%>\% print(nmax = 5)
# save to an object with formatted percentages # save to an object with formatted percentages
years <- format(years) years <- format(years)
# print a histogram of numeric values # print a histogram of numeric values
septic_patients \%>\% septic_patients \%>\%
freq(age) \%>\% freq(age) \%>\%
hist() hist()
# or print all points to a regular plot # or print all points to a regular plot
septic_patients \%>\% septic_patients \%>\%
freq(age) \%>\% freq(age) \%>\%
plot() plot()
# transform to a data.frame or tibble # transform to a data.frame or tibble
septic_patients \%>\% septic_patients \%>\%
freq(age) \%>\% freq(age) \%>\%
as.data.frame() as.data.frame()
# or transform (back) to a vector # or transform (back) to a vector
septic_patients \%>\% septic_patients \%>\%
freq(age) \%>\% freq(age) \%>\%
@ -155,11 +166,23 @@ identical(septic_patients \%>\%
sort(), sort(),
sort(septic_patients$age)) # TRUE sort(septic_patients$age)) # TRUE
# it also supports `table` objects:
# it also supports `table` objects
table(septic_patients$gender, table(septic_patients$gender,
septic_patients$age) \%>\% septic_patients$age) \%>\%
freq(sep = " **sep** ") freq(sep = " **sep** ")
# only get selected columns
septic_patients \%>\%
freq(hospital_id) \%>\%
select(item, percent)
septic_patients \%>\%
freq(hospital_id) \%>\%
select(-count, -cum_count)
# check differences between frequency tables # check differences between frequency tables
diff(freq(septic_patients$trim), diff(freq(septic_patients$trim),
freq(septic_patients$trsu)) freq(septic_patients$trsu))

View File

@ -11,6 +11,8 @@ test_that("frequency table works", {
expect_equal(nrow(freq(septic_patients$date)), expect_equal(nrow(freq(septic_patients$date)),
length(unique(septic_patients$date))) length(unique(septic_patients$date)))
expect_output(print(septic_patients %>% freq(age)))
expect_output(print(septic_patients %>% freq(age, nmax = 5)))
expect_output(print(septic_patients %>% freq(age, nmax = Inf))) expect_output(print(septic_patients %>% freq(age, nmax = Inf)))
expect_output(print(freq(septic_patients$age, nmax = Inf))) expect_output(print(freq(septic_patients$age, nmax = Inf)))
expect_output(print(freq(septic_patients$age, nmax = NA))) expect_output(print(freq(septic_patients$age, nmax = NA)))
@ -123,6 +125,12 @@ test_that("frequency table works", {
expect_error(septic_patients %>% freq(peni, oxac, clox, amox, amcl, expect_error(septic_patients %>% freq(peni, oxac, clox, amox, amcl,
ampi, pita, czol, cfep, cfur)) ampi, pita, czol, cfep, cfur))
# (un)select columns
expect_equal(septic_patients %>% freq(hospital_id) %>% select(item) %>% ncol(),
1)
expect_equal(septic_patients %>% freq(hospital_id) %>% select(-item) %>% ncol(),
4)
# run diff # run diff
expect_output(print( expect_output(print(
diff(freq(septic_patients$amcl), diff(freq(septic_patients$amcl),

View File

@ -0,0 +1,30 @@
context("read.4d.R")
test_that("read 4D works", {
library(dplyr)
test1 <- data.frame(Patientnr = "ABC",
MV = "M",
Monsternr = "0123",
Afnamedat = "10-11-12",
Bepaling = "bk",
Afd. = "ABC",
Spec = "ABC",
Matbijz. = "ABC",
Mat = "ABC",
Mocode = "esccol",
PENI = "R",
stringsAsFactors = FALSE)
tf <- tempfile()
write.table(test1, file = tf, quote = F, sep = "\t")
x <- read.4D(tf, skip = 0)
unlink(tf)
expect_equal(ncol(x), 11)
expect_equal(class(x$date_received), "Date")
expect_equal(class(x$mo), "mo")
expect_equal(as.character(x$mo), "B_ESCHR_COL")
expect_equal(is.rsi(x$peni), TRUE)
})