mirror of
https://github.com/msberends/AMR.git
synced 2024-12-26 08:06:12 +01:00
unit test read.4d, unselecting freq cols
This commit is contained in:
parent
83c7da85ab
commit
a8132922af
@ -1,6 +1,6 @@
|
|||||||
Package: AMR
|
Package: AMR
|
||||||
Version: 0.4.0.9011
|
Version: 0.4.0.9012
|
||||||
Date: 2018-11-16
|
Date: 2018-11-17
|
||||||
Title: Antimicrobial Resistance Analysis
|
Title: Antimicrobial Resistance Analysis
|
||||||
Authors@R: c(
|
Authors@R: c(
|
||||||
person(
|
person(
|
||||||
|
8
NEWS.md
8
NEWS.md
@ -35,6 +35,12 @@
|
|||||||
group_by(hospital_id) %>%
|
group_by(hospital_id) %>%
|
||||||
freq(gender)
|
freq(gender)
|
||||||
```
|
```
|
||||||
|
* Support for (un)selecting columns:
|
||||||
|
```r
|
||||||
|
septic_patients %>%
|
||||||
|
freq(hospital_id) %>%
|
||||||
|
select(-count, -cum_count) # only get item, percent, cum_percent
|
||||||
|
```
|
||||||
* Check for `hms::is.hms`
|
* Check for `hms::is.hms`
|
||||||
* Now prints in markdown at default in non-interactive sessions
|
* Now prints in markdown at default in non-interactive sessions
|
||||||
* No longer adds the factor level column and sorts factors on count again
|
* No longer adds the factor level column and sorts factors on count again
|
||||||
@ -43,7 +49,7 @@
|
|||||||
* New parameter `header` to turn it off (default when `markdown = TRUE`)
|
* New parameter `header` to turn it off (default when `markdown = TRUE`)
|
||||||
* New parameter `title` to replace the automatically set title
|
* New parameter `title` to replace the automatically set title
|
||||||
* `first_isolate` now tries to find columns to use as input when parameters are left blank
|
* `first_isolate` now tries to find columns to use as input when parameters are left blank
|
||||||
* Improvement for MDRO algorithm (function `mdro`)
|
* Improvements for MDRO algorithm (function `mdro`)
|
||||||
* Data set `septic_patients` is now a `data.frame`, not a tibble anymore
|
* Data set `septic_patients` is now a `data.frame`, not a tibble anymore
|
||||||
* Removed diacritics from all authors (columns `microorganisms$ref` and `microorganisms.old$ref`) to comply with CRAN policy to only allow ASCII characters
|
* Removed diacritics from all authors (columns `microorganisms$ref` and `microorganisms.old$ref`) to comply with CRAN policy to only allow ASCII characters
|
||||||
* Fix for `mo_property` not working properly
|
* Fix for `mo_property` not working properly
|
||||||
|
62
R/freq.R
62
R/freq.R
@ -66,7 +66,7 @@
|
|||||||
#' @keywords summary summarise frequency freq
|
#' @keywords summary summarise frequency freq
|
||||||
#' @rdname freq
|
#' @rdname freq
|
||||||
#' @name freq
|
#' @name freq
|
||||||
#' @return A \code{data.frame} with an additional class \code{"frequency_tbl"}
|
#' @return A \code{data.frame} (with an additional class \code{"frequency_tbl"}) with five columns: \code{item}, \code{count}, \code{percent}, \code{cum_count} and \code{cum_percent}.
|
||||||
#' @export
|
#' @export
|
||||||
#' @examples
|
#' @examples
|
||||||
#' library(dplyr)
|
#' library(dplyr)
|
||||||
@ -79,55 +79,66 @@
|
|||||||
#' septic_patients %>% freq("hospital_id")
|
#' septic_patients %>% freq("hospital_id")
|
||||||
#' septic_patients %>% freq(hospital_id) #<- easiest to remember (tidyverse)
|
#' septic_patients %>% freq(hospital_id) #<- easiest to remember (tidyverse)
|
||||||
#'
|
#'
|
||||||
|
#'
|
||||||
#' # you could also use `select` or `pull` to get your variables
|
#' # you could also use `select` or `pull` to get your variables
|
||||||
#' septic_patients %>%
|
#' septic_patients %>%
|
||||||
#' filter(hospital_id == "A") %>%
|
#' filter(hospital_id == "A") %>%
|
||||||
#' select(mo) %>%
|
#' select(mo) %>%
|
||||||
#' freq()
|
#' freq()
|
||||||
#'
|
#'
|
||||||
|
#'
|
||||||
#' # multiple selected variables will be pasted together
|
#' # multiple selected variables will be pasted together
|
||||||
#' septic_patients %>%
|
#' septic_patients %>%
|
||||||
#' left_join_microorganisms %>%
|
#' left_join_microorganisms %>%
|
||||||
#' filter(hospital_id == "A") %>%
|
#' filter(hospital_id == "A") %>%
|
||||||
#' freq(genus, species)
|
#' freq(genus, species)
|
||||||
#'
|
#'
|
||||||
|
#'
|
||||||
#' # group a variable and analyse another
|
#' # group a variable and analyse another
|
||||||
#' septic_patients %>%
|
#' septic_patients %>%
|
||||||
#' group_by(hospital_id) %>%
|
#' group_by(hospital_id) %>%
|
||||||
#' freq(gender)
|
#' freq(gender)
|
||||||
#'
|
#'
|
||||||
|
#'
|
||||||
#' # get top 10 bugs of hospital A as a vector
|
#' # get top 10 bugs of hospital A as a vector
|
||||||
#' septic_patients %>%
|
#' septic_patients %>%
|
||||||
#' filter(hospital_id == "A") %>%
|
#' filter(hospital_id == "A") %>%
|
||||||
#' freq(mo) %>%
|
#' freq(mo) %>%
|
||||||
#' top_freq(10)
|
#' top_freq(10)
|
||||||
#'
|
#'
|
||||||
|
#'
|
||||||
#' # save frequency table to an object
|
#' # save frequency table to an object
|
||||||
#' years <- septic_patients %>%
|
#' years <- septic_patients %>%
|
||||||
#' mutate(year = format(date, "%Y")) %>%
|
#' mutate(year = format(date, "%Y")) %>%
|
||||||
#' freq(year)
|
#' freq(year)
|
||||||
#'
|
#'
|
||||||
|
#'
|
||||||
#' # show only the top 5
|
#' # show only the top 5
|
||||||
#' years %>% print(nmax = 5)
|
#' years %>% print(nmax = 5)
|
||||||
#'
|
#'
|
||||||
|
#'
|
||||||
#' # save to an object with formatted percentages
|
#' # save to an object with formatted percentages
|
||||||
#' years <- format(years)
|
#' years <- format(years)
|
||||||
#'
|
#'
|
||||||
|
#'
|
||||||
#' # print a histogram of numeric values
|
#' # print a histogram of numeric values
|
||||||
#' septic_patients %>%
|
#' septic_patients %>%
|
||||||
#' freq(age) %>%
|
#' freq(age) %>%
|
||||||
#' hist()
|
#' hist()
|
||||||
#'
|
#'
|
||||||
|
#'
|
||||||
#' # or print all points to a regular plot
|
#' # or print all points to a regular plot
|
||||||
#' septic_patients %>%
|
#' septic_patients %>%
|
||||||
#' freq(age) %>%
|
#' freq(age) %>%
|
||||||
#' plot()
|
#' plot()
|
||||||
#'
|
#'
|
||||||
|
#'
|
||||||
#' # transform to a data.frame or tibble
|
#' # transform to a data.frame or tibble
|
||||||
#' septic_patients %>%
|
#' septic_patients %>%
|
||||||
#' freq(age) %>%
|
#' freq(age) %>%
|
||||||
#' as.data.frame()
|
#' as.data.frame()
|
||||||
#'
|
#'
|
||||||
|
#'
|
||||||
#' # or transform (back) to a vector
|
#' # or transform (back) to a vector
|
||||||
#' septic_patients %>%
|
#' septic_patients %>%
|
||||||
#' freq(age) %>%
|
#' freq(age) %>%
|
||||||
@ -139,11 +150,23 @@
|
|||||||
#' sort(),
|
#' sort(),
|
||||||
#' sort(septic_patients$age)) # TRUE
|
#' sort(septic_patients$age)) # TRUE
|
||||||
#'
|
#'
|
||||||
#' # it also supports `table` objects:
|
#'
|
||||||
|
#' # it also supports `table` objects
|
||||||
#' table(septic_patients$gender,
|
#' table(septic_patients$gender,
|
||||||
#' septic_patients$age) %>%
|
#' septic_patients$age) %>%
|
||||||
#' freq(sep = " **sep** ")
|
#' freq(sep = " **sep** ")
|
||||||
#'
|
#'
|
||||||
|
#'
|
||||||
|
#' # only get selected columns
|
||||||
|
#' septic_patients %>%
|
||||||
|
#' freq(hospital_id) %>%
|
||||||
|
#' select(item, percent)
|
||||||
|
#'
|
||||||
|
#' septic_patients %>%
|
||||||
|
#' freq(hospital_id) %>%
|
||||||
|
#' select(-count, -cum_count)
|
||||||
|
#'
|
||||||
|
#'
|
||||||
#' # check differences between frequency tables
|
#' # check differences between frequency tables
|
||||||
#' diff(freq(septic_patients$trim),
|
#' diff(freq(septic_patients$trim),
|
||||||
#' freq(septic_patients$trsu))
|
#' freq(septic_patients$trsu))
|
||||||
@ -569,6 +592,7 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
|
|||||||
}
|
}
|
||||||
title <- paste(title, group_var)
|
title <- paste(title, group_var)
|
||||||
}
|
}
|
||||||
|
title <- paste("Frequency table of", trimws(title))
|
||||||
} else {
|
} else {
|
||||||
title <- opt$title
|
title <- opt$title
|
||||||
}
|
}
|
||||||
@ -592,12 +616,6 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
|
|||||||
opt$header <- header
|
opt$header <- header
|
||||||
}
|
}
|
||||||
|
|
||||||
if (trimws(title) == "") {
|
|
||||||
title <- "Frequency table"
|
|
||||||
} else {
|
|
||||||
title <- paste("Frequency table of", trimws(title))
|
|
||||||
}
|
|
||||||
|
|
||||||
# bold title
|
# bold title
|
||||||
if (opt$tbl_format == "pandoc") {
|
if (opt$tbl_format == "pandoc") {
|
||||||
title <- bold(title)
|
title <- bold(title)
|
||||||
@ -620,10 +638,6 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
|
|||||||
return(invisible())
|
return(invisible())
|
||||||
}
|
}
|
||||||
|
|
||||||
if (all(x$count == 1)) {
|
|
||||||
warning('All observations are unique.', call. = FALSE)
|
|
||||||
}
|
|
||||||
|
|
||||||
# save old NA setting for kable
|
# save old NA setting for kable
|
||||||
opt.old <- options()$knitr.kable.NA
|
opt.old <- options()$knitr.kable.NA
|
||||||
if (is.null(opt$na)) {
|
if (is.null(opt$na)) {
|
||||||
@ -668,10 +682,34 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
|
|||||||
if (any(class(x$item) %in% c('double', 'integer', 'numeric', 'raw', 'single'))) {
|
if (any(class(x$item) %in% c('double', 'integer', 'numeric', 'raw', 'single'))) {
|
||||||
x$item <- format(x$item)
|
x$item <- format(x$item)
|
||||||
}
|
}
|
||||||
|
if ("item" %in% colnames(x)) {
|
||||||
|
x$item <- format(x$item)
|
||||||
|
} else {
|
||||||
|
opt$column_names <- opt$column_names[!opt$column_names == "Item"]
|
||||||
|
}
|
||||||
|
if ("count" %in% colnames(x)) {
|
||||||
|
if (all(x$count == 1)) {
|
||||||
|
warning('All observations are unique.', call. = FALSE)
|
||||||
|
}
|
||||||
x$count <- format(x$count)
|
x$count <- format(x$count)
|
||||||
|
} else {
|
||||||
|
opt$column_names <- opt$column_names[!opt$column_names == "Count"]
|
||||||
|
}
|
||||||
|
if ("percent" %in% colnames(x)) {
|
||||||
x$percent <- percent(x$percent, force_zero = TRUE)
|
x$percent <- percent(x$percent, force_zero = TRUE)
|
||||||
|
} else {
|
||||||
|
opt$column_names <- opt$column_names[!opt$column_names == "Percent"]
|
||||||
|
}
|
||||||
|
if ("cum_count" %in% colnames(x)) {
|
||||||
x$cum_count <- format(x$cum_count)
|
x$cum_count <- format(x$cum_count)
|
||||||
|
} else {
|
||||||
|
opt$column_names <- opt$column_names[!opt$column_names == "Cum. Count"]
|
||||||
|
}
|
||||||
|
if ("cum_percent" %in% colnames(x)) {
|
||||||
x$cum_percent <- percent(x$cum_percent, force_zero = TRUE)
|
x$cum_percent <- percent(x$cum_percent, force_zero = TRUE)
|
||||||
|
} else {
|
||||||
|
opt$column_names <- opt$column_names[!opt$column_names == "Cum. Percent"]
|
||||||
|
}
|
||||||
|
|
||||||
if (opt$tbl_format == "markdown") {
|
if (opt$tbl_format == "markdown") {
|
||||||
cat("\n")
|
cat("\n")
|
||||||
|
@ -41,7 +41,7 @@ read.4D <- function(file,
|
|||||||
encoding = "UTF-8") {
|
encoding = "UTF-8") {
|
||||||
|
|
||||||
if (info == TRUE) {
|
if (info == TRUE) {
|
||||||
message("Importing data... ", appendLF = FALSE)
|
message("Importing ", file, "... ", appendLF = FALSE)
|
||||||
}
|
}
|
||||||
data_4D <- utils::read.table(file = file,
|
data_4D <- utils::read.table(file = file,
|
||||||
row.names = row.names,
|
row.names = row.names,
|
||||||
|
27
man/freq.Rd
27
man/freq.Rd
@ -54,7 +54,7 @@ top_freq(f, n)
|
|||||||
\item{n}{number of top \emph{n} items to return, use -n for the bottom \emph{n} items. It will include more than \code{n} rows if there are ties.}
|
\item{n}{number of top \emph{n} items to return, use -n for the bottom \emph{n} items. It will include more than \code{n} rows if there are ties.}
|
||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
A \code{data.frame} with an additional class \code{"frequency_tbl"}
|
A \code{data.frame} (with an additional class \code{"frequency_tbl"}) with five columns: \code{item}, \code{count}, \code{percent}, \code{cum_count} and \code{cum_percent}.
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Create a frequency table of a vector with items or a data frame. Supports quasiquotation and markdown for reports. \code{top_freq} can be used to get the top/bottom \emph{n} items of a frequency table, with counts as names.
|
Create a frequency table of a vector with items or a data frame. Supports quasiquotation and markdown for reports. \code{top_freq} can be used to get the top/bottom \emph{n} items of a frequency table, with counts as names.
|
||||||
@ -95,55 +95,66 @@ septic_patients[, "hospital_id"] \%>\% freq()
|
|||||||
septic_patients \%>\% freq("hospital_id")
|
septic_patients \%>\% freq("hospital_id")
|
||||||
septic_patients \%>\% freq(hospital_id) #<- easiest to remember (tidyverse)
|
septic_patients \%>\% freq(hospital_id) #<- easiest to remember (tidyverse)
|
||||||
|
|
||||||
|
|
||||||
# you could also use `select` or `pull` to get your variables
|
# you could also use `select` or `pull` to get your variables
|
||||||
septic_patients \%>\%
|
septic_patients \%>\%
|
||||||
filter(hospital_id == "A") \%>\%
|
filter(hospital_id == "A") \%>\%
|
||||||
select(mo) \%>\%
|
select(mo) \%>\%
|
||||||
freq()
|
freq()
|
||||||
|
|
||||||
|
|
||||||
# multiple selected variables will be pasted together
|
# multiple selected variables will be pasted together
|
||||||
septic_patients \%>\%
|
septic_patients \%>\%
|
||||||
left_join_microorganisms \%>\%
|
left_join_microorganisms \%>\%
|
||||||
filter(hospital_id == "A") \%>\%
|
filter(hospital_id == "A") \%>\%
|
||||||
freq(genus, species)
|
freq(genus, species)
|
||||||
|
|
||||||
|
|
||||||
# group a variable and analyse another
|
# group a variable and analyse another
|
||||||
septic_patients \%>\%
|
septic_patients \%>\%
|
||||||
group_by(hospital_id) \%>\%
|
group_by(hospital_id) \%>\%
|
||||||
freq(gender)
|
freq(gender)
|
||||||
|
|
||||||
|
|
||||||
# get top 10 bugs of hospital A as a vector
|
# get top 10 bugs of hospital A as a vector
|
||||||
septic_patients \%>\%
|
septic_patients \%>\%
|
||||||
filter(hospital_id == "A") \%>\%
|
filter(hospital_id == "A") \%>\%
|
||||||
freq(mo) \%>\%
|
freq(mo) \%>\%
|
||||||
top_freq(10)
|
top_freq(10)
|
||||||
|
|
||||||
|
|
||||||
# save frequency table to an object
|
# save frequency table to an object
|
||||||
years <- septic_patients \%>\%
|
years <- septic_patients \%>\%
|
||||||
mutate(year = format(date, "\%Y")) \%>\%
|
mutate(year = format(date, "\%Y")) \%>\%
|
||||||
freq(year)
|
freq(year)
|
||||||
|
|
||||||
|
|
||||||
# show only the top 5
|
# show only the top 5
|
||||||
years \%>\% print(nmax = 5)
|
years \%>\% print(nmax = 5)
|
||||||
|
|
||||||
|
|
||||||
# save to an object with formatted percentages
|
# save to an object with formatted percentages
|
||||||
years <- format(years)
|
years <- format(years)
|
||||||
|
|
||||||
|
|
||||||
# print a histogram of numeric values
|
# print a histogram of numeric values
|
||||||
septic_patients \%>\%
|
septic_patients \%>\%
|
||||||
freq(age) \%>\%
|
freq(age) \%>\%
|
||||||
hist()
|
hist()
|
||||||
|
|
||||||
|
|
||||||
# or print all points to a regular plot
|
# or print all points to a regular plot
|
||||||
septic_patients \%>\%
|
septic_patients \%>\%
|
||||||
freq(age) \%>\%
|
freq(age) \%>\%
|
||||||
plot()
|
plot()
|
||||||
|
|
||||||
|
|
||||||
# transform to a data.frame or tibble
|
# transform to a data.frame or tibble
|
||||||
septic_patients \%>\%
|
septic_patients \%>\%
|
||||||
freq(age) \%>\%
|
freq(age) \%>\%
|
||||||
as.data.frame()
|
as.data.frame()
|
||||||
|
|
||||||
|
|
||||||
# or transform (back) to a vector
|
# or transform (back) to a vector
|
||||||
septic_patients \%>\%
|
septic_patients \%>\%
|
||||||
freq(age) \%>\%
|
freq(age) \%>\%
|
||||||
@ -155,11 +166,23 @@ identical(septic_patients \%>\%
|
|||||||
sort(),
|
sort(),
|
||||||
sort(septic_patients$age)) # TRUE
|
sort(septic_patients$age)) # TRUE
|
||||||
|
|
||||||
# it also supports `table` objects:
|
|
||||||
|
# it also supports `table` objects
|
||||||
table(septic_patients$gender,
|
table(septic_patients$gender,
|
||||||
septic_patients$age) \%>\%
|
septic_patients$age) \%>\%
|
||||||
freq(sep = " **sep** ")
|
freq(sep = " **sep** ")
|
||||||
|
|
||||||
|
|
||||||
|
# only get selected columns
|
||||||
|
septic_patients \%>\%
|
||||||
|
freq(hospital_id) \%>\%
|
||||||
|
select(item, percent)
|
||||||
|
|
||||||
|
septic_patients \%>\%
|
||||||
|
freq(hospital_id) \%>\%
|
||||||
|
select(-count, -cum_count)
|
||||||
|
|
||||||
|
|
||||||
# check differences between frequency tables
|
# check differences between frequency tables
|
||||||
diff(freq(septic_patients$trim),
|
diff(freq(septic_patients$trim),
|
||||||
freq(septic_patients$trsu))
|
freq(septic_patients$trsu))
|
||||||
|
@ -11,6 +11,8 @@ test_that("frequency table works", {
|
|||||||
expect_equal(nrow(freq(septic_patients$date)),
|
expect_equal(nrow(freq(septic_patients$date)),
|
||||||
length(unique(septic_patients$date)))
|
length(unique(septic_patients$date)))
|
||||||
|
|
||||||
|
expect_output(print(septic_patients %>% freq(age)))
|
||||||
|
expect_output(print(septic_patients %>% freq(age, nmax = 5)))
|
||||||
expect_output(print(septic_patients %>% freq(age, nmax = Inf)))
|
expect_output(print(septic_patients %>% freq(age, nmax = Inf)))
|
||||||
expect_output(print(freq(septic_patients$age, nmax = Inf)))
|
expect_output(print(freq(septic_patients$age, nmax = Inf)))
|
||||||
expect_output(print(freq(septic_patients$age, nmax = NA)))
|
expect_output(print(freq(septic_patients$age, nmax = NA)))
|
||||||
@ -123,6 +125,12 @@ test_that("frequency table works", {
|
|||||||
expect_error(septic_patients %>% freq(peni, oxac, clox, amox, amcl,
|
expect_error(septic_patients %>% freq(peni, oxac, clox, amox, amcl,
|
||||||
ampi, pita, czol, cfep, cfur))
|
ampi, pita, czol, cfep, cfur))
|
||||||
|
|
||||||
|
# (un)select columns
|
||||||
|
expect_equal(septic_patients %>% freq(hospital_id) %>% select(item) %>% ncol(),
|
||||||
|
1)
|
||||||
|
expect_equal(septic_patients %>% freq(hospital_id) %>% select(-item) %>% ncol(),
|
||||||
|
4)
|
||||||
|
|
||||||
# run diff
|
# run diff
|
||||||
expect_output(print(
|
expect_output(print(
|
||||||
diff(freq(septic_patients$amcl),
|
diff(freq(septic_patients$amcl),
|
||||||
|
30
tests/testthat/test-read.4d.R
Normal file
30
tests/testthat/test-read.4d.R
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
context("read.4d.R")
|
||||||
|
|
||||||
|
test_that("read 4D works", {
|
||||||
|
|
||||||
|
library(dplyr)
|
||||||
|
test1 <- data.frame(Patientnr = "ABC",
|
||||||
|
MV = "M",
|
||||||
|
Monsternr = "0123",
|
||||||
|
Afnamedat = "10-11-12",
|
||||||
|
Bepaling = "bk",
|
||||||
|
Afd. = "ABC",
|
||||||
|
Spec = "ABC",
|
||||||
|
Matbijz. = "ABC",
|
||||||
|
Mat = "ABC",
|
||||||
|
Mocode = "esccol",
|
||||||
|
PENI = "R",
|
||||||
|
stringsAsFactors = FALSE)
|
||||||
|
tf <- tempfile()
|
||||||
|
write.table(test1, file = tf, quote = F, sep = "\t")
|
||||||
|
|
||||||
|
x <- read.4D(tf, skip = 0)
|
||||||
|
unlink(tf)
|
||||||
|
|
||||||
|
expect_equal(ncol(x), 11)
|
||||||
|
expect_equal(class(x$date_received), "Date")
|
||||||
|
expect_equal(class(x$mo), "mo")
|
||||||
|
expect_equal(as.character(x$mo), "B_ESCHR_COL")
|
||||||
|
expect_equal(is.rsi(x$peni), TRUE)
|
||||||
|
|
||||||
|
})
|
Loading…
Reference in New Issue
Block a user