mirror of
https://github.com/msberends/AMR.git
synced 2024-12-26 05:26:13 +01:00
freq: support for table
This commit is contained in:
parent
18c91786bf
commit
fc30d3fb13
@ -1,3 +1,4 @@
|
|||||||
^.*\.Rproj$
|
^.*\.Rproj$
|
||||||
^\.Rproj\.user$
|
^\.Rproj\.user$
|
||||||
.travis.yml
|
.travis.yml
|
||||||
|
.zenodo.json
|
||||||
|
@ -43,7 +43,8 @@ Suggests:
|
|||||||
testthat (>= 1.0.2),
|
testthat (>= 1.0.2),
|
||||||
covr (>= 3.0.1),
|
covr (>= 3.0.1),
|
||||||
rmarkdown,
|
rmarkdown,
|
||||||
rstudioapi
|
rstudioapi,
|
||||||
|
tidyr
|
||||||
VignetteBuilder: knitr
|
VignetteBuilder: knitr
|
||||||
URL: https://github.com/msberends/AMR
|
URL: https://github.com/msberends/AMR
|
||||||
BugReports: https://github.com/msberends/AMR/issues
|
BugReports: https://github.com/msberends/AMR/issues
|
||||||
|
@ -4,6 +4,8 @@ S3method(as.data.frame,frequency_tbl)
|
|||||||
S3method(as.double,mic)
|
S3method(as.double,mic)
|
||||||
S3method(as.integer,mic)
|
S3method(as.integer,mic)
|
||||||
S3method(as.numeric,mic)
|
S3method(as.numeric,mic)
|
||||||
|
S3method(as.vector,frequency_tbl)
|
||||||
|
S3method(as_tibble,frequency_tbl)
|
||||||
S3method(barplot,mic)
|
S3method(barplot,mic)
|
||||||
S3method(barplot,rsi)
|
S3method(barplot,rsi)
|
||||||
S3method(hist,frequency_tbl)
|
S3method(hist,frequency_tbl)
|
||||||
@ -69,6 +71,8 @@ exportMethods(as.data.frame.frequency_tbl)
|
|||||||
exportMethods(as.double.mic)
|
exportMethods(as.double.mic)
|
||||||
exportMethods(as.integer.mic)
|
exportMethods(as.integer.mic)
|
||||||
exportMethods(as.numeric.mic)
|
exportMethods(as.numeric.mic)
|
||||||
|
exportMethods(as.vector.frequency_tbl)
|
||||||
|
exportMethods(as_tibble.frequency_tbl)
|
||||||
exportMethods(barplot.mic)
|
exportMethods(barplot.mic)
|
||||||
exportMethods(barplot.rsi)
|
exportMethods(barplot.rsi)
|
||||||
exportMethods(hist.frequency_tbl)
|
exportMethods(hist.frequency_tbl)
|
||||||
@ -147,6 +151,7 @@ importFrom(stats,sd)
|
|||||||
importFrom(tibble,tibble)
|
importFrom(tibble,tibble)
|
||||||
importFrom(utils,View)
|
importFrom(utils,View)
|
||||||
importFrom(utils,browseVignettes)
|
importFrom(utils,browseVignettes)
|
||||||
|
importFrom(utils,installed.packages)
|
||||||
importFrom(utils,object.size)
|
importFrom(utils,object.size)
|
||||||
importFrom(utils,packageDescription)
|
importFrom(utils,packageDescription)
|
||||||
importFrom(utils,read.delim)
|
importFrom(utils,read.delim)
|
||||||
|
2
NEWS.md
2
NEWS.md
@ -4,7 +4,9 @@
|
|||||||
* For convience, descriptive statistical functions `kurtosis` and `skewness` that are lacking in base R - they are generic functions and have support for vectors, data.frames and matrices
|
* For convience, descriptive statistical functions `kurtosis` and `skewness` that are lacking in base R - they are generic functions and have support for vectors, data.frames and matrices
|
||||||
* New for frequency tables (function `freq`):
|
* New for frequency tables (function `freq`):
|
||||||
* A vignette to explain its usage
|
* A vignette to explain its usage
|
||||||
|
* Support for `table` to use as input: `freq(table(x, y))`
|
||||||
* Support for existing functions `hist` and `plot` to use a frequency table as input: `hist(freq(df$age))`
|
* Support for existing functions `hist` and `plot` to use a frequency table as input: `hist(freq(df$age))`
|
||||||
|
* Support for `as.vector`, `as.data.frame` and `as_tibble`
|
||||||
* Support for quasiquotation: `freq(mydata, mycolumn)` is the same as `mydata %>% freq(mycolumn)`
|
* Support for quasiquotation: `freq(mydata, mycolumn)` is the same as `mydata %>% freq(mycolumn)`
|
||||||
* Function `top_freq` function to return the top/below *n* items as vector
|
* Function `top_freq` function to return the top/below *n* items as vector
|
||||||
* Header of frequency tables now also show Mean Absolute Deviaton (MAD) and Interquartile Range (IQR)
|
* Header of frequency tables now also show Mean Absolute Deviaton (MAD) and Interquartile Range (IQR)
|
||||||
|
100
R/freq.R
100
R/freq.R
@ -19,8 +19,8 @@
|
|||||||
#' Frequency table
|
#' Frequency table
|
||||||
#'
|
#'
|
||||||
#' Create a frequency table of a vector with items or a data frame. Supports quasiquotation and markdown for reports. \code{top_freq} can be used to get the top/bottom \emph{n} items of a frequency table, with counts as names.
|
#' Create a frequency table of a vector with items or a data frame. Supports quasiquotation and markdown for reports. \code{top_freq} can be used to get the top/bottom \emph{n} items of a frequency table, with counts as names.
|
||||||
#' @param x vector with items, or a \code{data.frame}
|
#' @param x vector of any class or a \code{\link{data.frame}}, \code{\link{tibble}} or \code{\link{table}}
|
||||||
#' @param ... up to nine different columns of \code{x} to calculate frequencies from, see Examples
|
#' @param ... up to nine different columns of \code{x} when \code{x} is a \code{data.frame} or \code{tibble}, to calculate frequencies from - see Examples
|
||||||
#' @param sort.count sort on count, i.e. frequencies. This will be \code{TRUE} at default for everything except for factors.
|
#' @param sort.count sort on count, i.e. frequencies. This will be \code{TRUE} at default for everything except for factors.
|
||||||
#' @param nmax number of row to print. The default, \code{15}, uses \code{\link{getOption}("max.print.freq")}. Use \code{nmax = 0}, \code{nmax = Inf}, \code{nmax = NULL} or \code{nmax = NA} to print all rows.
|
#' @param nmax number of row to print. The default, \code{15}, uses \code{\link{getOption}("max.print.freq")}. Use \code{nmax = 0}, \code{nmax = Inf}, \code{nmax = NULL} or \code{nmax = NA} to print all rows.
|
||||||
#' @param na.rm a logical value indicating whether \code{NA} values should be removed from the frequency table. The header will always print the amount of \code{NA}s.
|
#' @param na.rm a logical value indicating whether \code{NA} values should be removed from the frequency table. The header will always print the amount of \code{NA}s.
|
||||||
@ -56,7 +56,7 @@
|
|||||||
#' @importFrom stats fivenum sd mad
|
#' @importFrom stats fivenum sd mad
|
||||||
#' @importFrom grDevices boxplot.stats
|
#' @importFrom grDevices boxplot.stats
|
||||||
#' @importFrom dplyr %>% select pull n_distinct group_by arrange desc mutate summarise n_distinct
|
#' @importFrom dplyr %>% select pull n_distinct group_by arrange desc mutate summarise n_distinct
|
||||||
#' @importFrom utils browseVignettes
|
#' @importFrom utils browseVignettes installed.packages
|
||||||
#' @importFrom tibble tibble
|
#' @importFrom tibble tibble
|
||||||
#' @keywords summary summarise frequency freq
|
#' @keywords summary summarise frequency freq
|
||||||
#' @rdname freq
|
#' @rdname freq
|
||||||
@ -72,20 +72,15 @@
|
|||||||
#' septic_patients$hospital_id %>% freq()
|
#' septic_patients$hospital_id %>% freq()
|
||||||
#' septic_patients[, "hospital_id"] %>% freq()
|
#' septic_patients[, "hospital_id"] %>% freq()
|
||||||
#' septic_patients %>% freq("hospital_id")
|
#' septic_patients %>% freq("hospital_id")
|
||||||
#' septic_patients %>% freq(hospital_id) # <- easiest to remember when used to tidyverse
|
#' septic_patients %>% freq(hospital_id) #<- easiest to remember when you're used to tidyverse
|
||||||
#'
|
#'
|
||||||
#' # you could use `select`...
|
#' # you could also use `select` or `pull` to get your variables
|
||||||
#' septic_patients %>%
|
#' septic_patients %>%
|
||||||
#' filter(hospital_id == "A") %>%
|
#' filter(hospital_id == "A") %>%
|
||||||
#' select(bactid) %>%
|
#' select(bactid) %>%
|
||||||
#' freq()
|
#' freq()
|
||||||
#'
|
#'
|
||||||
#' # ... or you use `freq` to select it immediately
|
#' # multiple selected variables will be pasted together
|
||||||
#' septic_patients %>%
|
|
||||||
#' filter(hospital_id == "A") %>%
|
|
||||||
#' freq(bactid)
|
|
||||||
#'
|
|
||||||
#' # select multiple columns; they will be pasted together
|
|
||||||
#' septic_patients %>%
|
#' septic_patients %>%
|
||||||
#' left_join_microorganisms %>%
|
#' left_join_microorganisms %>%
|
||||||
#' filter(hospital_id == "A") %>%
|
#' filter(hospital_id == "A") %>%
|
||||||
@ -102,13 +97,40 @@
|
|||||||
#' mutate(year = format(date, "%Y")) %>%
|
#' mutate(year = format(date, "%Y")) %>%
|
||||||
#' freq(year)
|
#' freq(year)
|
||||||
#'
|
#'
|
||||||
#' # print only top 5
|
#' # show only the top 5
|
||||||
#' years %>% print(nmax = 5)
|
#' years %>% print(nmax = 5)
|
||||||
#'
|
#'
|
||||||
#' # transform to plain data.frame
|
#' # print a histogram of numeric values
|
||||||
|
#' septic_patients %>%
|
||||||
|
#' freq(age) %>%
|
||||||
|
#' hist() # prettier: ggplot(septic_patients, aes(age)) + geom_histogram()
|
||||||
|
#'
|
||||||
|
#' # or print all points to a regular plot
|
||||||
|
#' septic_patients %>%
|
||||||
|
#' freq(age) %>%
|
||||||
|
#' plot()
|
||||||
|
#'
|
||||||
|
#' # transform to a data.frame or tibble
|
||||||
#' septic_patients %>%
|
#' septic_patients %>%
|
||||||
#' freq(age) %>%
|
#' freq(age) %>%
|
||||||
#' as.data.frame()
|
#' as.data.frame()
|
||||||
|
#'
|
||||||
|
#' # or transform (back) to a vector
|
||||||
|
#' septic_patients %>%
|
||||||
|
#' freq(age) %>%
|
||||||
|
#' as.vector()
|
||||||
|
#'
|
||||||
|
#' identical(septic_patients %>%
|
||||||
|
#' freq(age) %>%
|
||||||
|
#' as.vector() %>%
|
||||||
|
#' sort(),
|
||||||
|
#' sort(septic_patients$age)
|
||||||
|
#' ) # TRUE
|
||||||
|
#'
|
||||||
|
#' # also supports table:
|
||||||
|
#' table(septic_patients$sex,
|
||||||
|
#' septic_patients$age) %>%
|
||||||
|
#' freq()
|
||||||
frequency_tbl <- function(x,
|
frequency_tbl <- function(x,
|
||||||
...,
|
...,
|
||||||
sort.count = TRUE,
|
sort.count = TRUE,
|
||||||
@ -138,6 +160,24 @@ frequency_tbl <- function(x,
|
|||||||
} else {
|
} else {
|
||||||
cols <- NULL
|
cols <- NULL
|
||||||
}
|
}
|
||||||
|
} else if (any(class(x) == 'table')) {
|
||||||
|
if (!"tidyr" %in% rownames(installed.packages())) {
|
||||||
|
stop('transformation from `table` to frequency table requires the tidyr package.', call. = FALSE)
|
||||||
|
}
|
||||||
|
values <- x %>%
|
||||||
|
as.data.frame(stringsAsFactors = FALSE) %>%
|
||||||
|
# delete last variable: these are frequencies
|
||||||
|
select(-ncol(.)) %>%
|
||||||
|
# paste all other columns:
|
||||||
|
tidyr::unite(sep = sep) %>%
|
||||||
|
.[, 1]
|
||||||
|
counts <- x %>%
|
||||||
|
as.data.frame(stringsAsFactors = FALSE) %>%
|
||||||
|
# get last variable: these are frequencies
|
||||||
|
pull(ncol(.))
|
||||||
|
x <- rep(values, counts)
|
||||||
|
x.name <- NULL
|
||||||
|
cols <- NULL
|
||||||
} else {
|
} else {
|
||||||
x.name <- NULL
|
x.name <- NULL
|
||||||
cols <- NULL
|
cols <- NULL
|
||||||
@ -523,41 +563,47 @@ as.data.frame.frequency_tbl <- function(x, ...) {
|
|||||||
as.data.frame.data.frame(x, ...)
|
as.data.frame.data.frame(x, ...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#' @noRd
|
||||||
|
#' @exportMethod as_tibble.frequency_tbl
|
||||||
|
#' @export
|
||||||
|
#' @importFrom dplyr as_tibble
|
||||||
|
as_tibble.frequency_tbl <- function(x, validate = TRUE, ..., rownames = NA) {
|
||||||
|
attr(x, 'package') <- NULL
|
||||||
|
attr(x, 'package.version') <- NULL
|
||||||
|
attr(x, 'opt') <- NULL
|
||||||
|
as_tibble(x = as.data.frame(x), validate = validate, ..., rownames = rownames)
|
||||||
|
}
|
||||||
|
|
||||||
#' @noRd
|
#' @noRd
|
||||||
#' @exportMethod hist.frequency_tbl
|
#' @exportMethod hist.frequency_tbl
|
||||||
#' @export
|
#' @export
|
||||||
#' @importFrom dplyr %>% pull
|
|
||||||
#' @importFrom graphics hist
|
#' @importFrom graphics hist
|
||||||
hist.frequency_tbl <- function(x, ...) {
|
hist.frequency_tbl <- function(x, ...) {
|
||||||
|
|
||||||
opt <- attr(x, 'opt')
|
opt <- attr(x, 'opt')
|
||||||
|
|
||||||
if (!is.null(opt$vars)) {
|
if (!is.null(opt$vars)) {
|
||||||
title <- opt$vars
|
title <- opt$vars
|
||||||
} else {
|
} else {
|
||||||
title <- ""
|
title <- ""
|
||||||
}
|
}
|
||||||
|
hist(as.vector(x), main = paste("Histogram of", title), xlab = title, ...)
|
||||||
items <- x %>% pull(item)
|
|
||||||
counts <- x %>% pull(count)
|
|
||||||
vect <- rep(items, counts)
|
|
||||||
hist(vect, main = paste("Histogram of", title), xlab = title, ...)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#' @noRd
|
#' @noRd
|
||||||
#' @exportMethod plot.frequency_tbl
|
#' @exportMethod plot.frequency_tbl
|
||||||
#' @export
|
#' @export
|
||||||
#' @importFrom dplyr %>% pull
|
|
||||||
plot.frequency_tbl <- function(x, y, ...) {
|
plot.frequency_tbl <- function(x, y, ...) {
|
||||||
opt <- attr(x, 'opt')
|
opt <- attr(x, 'opt')
|
||||||
|
|
||||||
if (!is.null(opt$vars)) {
|
if (!is.null(opt$vars)) {
|
||||||
title <- opt$vars
|
title <- opt$vars
|
||||||
} else {
|
} else {
|
||||||
title <- ""
|
title <- ""
|
||||||
}
|
}
|
||||||
|
plot(x = x$item, y = x$count, ylab = "Count", xlab = title, ...)
|
||||||
items <- x %>% pull(item)
|
}
|
||||||
counts <- x %>% pull(count)
|
|
||||||
plot(x = items, y = counts, ylab = "Count", xlab = title, ...)
|
#' @noRd
|
||||||
|
#' @exportMethod as.vector.frequency_tbl
|
||||||
|
#' @export
|
||||||
|
as.vector.frequency_tbl <- function(x, mode = "any") {
|
||||||
|
as.vector(rep(x$item, x$count), mode = mode)
|
||||||
}
|
}
|
||||||
|
@ -22,6 +22,7 @@ globalVariables(c('abname',
|
|||||||
'bactid',
|
'bactid',
|
||||||
'cnt',
|
'cnt',
|
||||||
'count',
|
'count',
|
||||||
|
'counts',
|
||||||
'cum_count',
|
'cum_count',
|
||||||
'cum_percent',
|
'cum_percent',
|
||||||
'date_lab',
|
'date_lab',
|
||||||
@ -50,6 +51,7 @@ globalVariables(c('abname',
|
|||||||
'septic_patients',
|
'septic_patients',
|
||||||
'species',
|
'species',
|
||||||
'umcg',
|
'umcg',
|
||||||
|
'values',
|
||||||
'View',
|
'View',
|
||||||
'y',
|
'y',
|
||||||
'.'))
|
'.'))
|
||||||
|
118
README.md
118
README.md
@ -47,9 +47,12 @@ With the `MDRO` function (abbreviation of Multi Drug Resistant Organisms), you c
|
|||||||
This package is available on CRAN and also here on GitHub.
|
This package is available on CRAN and also here on GitHub.
|
||||||
|
|
||||||
### From CRAN (recommended)
|
### From CRAN (recommended)
|
||||||
|
Latest released version on CRAN:
|
||||||
|
|
||||||
[![CRAN_Badge](https://img.shields.io/cran/v/AMR.svg?label=CRAN&colorB=3679BC)](http://cran.r-project.org/package=AMR)
|
[![CRAN_Badge](https://img.shields.io/cran/v/AMR.svg?label=CRAN&colorB=3679BC)](http://cran.r-project.org/package=AMR)
|
||||||
|
|
||||||
Downloads via RStudio CRAN server (downloads by all other CRAN mirrors not measured):
|
Downloads via RStudio CRAN server (downloads by all other CRAN mirrors **not** measured, including the official https://cran.r-project.org):
|
||||||
|
|
||||||
[![CRAN_Downloads](https://cranlogs.r-pkg.org/badges/grand-total/AMR)](http://cran.r-project.org/package=AMR)
|
[![CRAN_Downloads](https://cranlogs.r-pkg.org/badges/grand-total/AMR)](http://cran.r-project.org/package=AMR)
|
||||||
[![CRAN_Downloads](https://cranlogs.r-pkg.org/badges/AMR)](https://cranlogs.r-pkg.org/downloads/daily/last-month/AMR)
|
[![CRAN_Downloads](https://cranlogs.r-pkg.org/badges/AMR)](https://cranlogs.r-pkg.org/downloads/daily/last-month/AMR)
|
||||||
|
|
||||||
@ -122,80 +125,91 @@ after
|
|||||||
```
|
```
|
||||||
|
|
||||||
### Frequency tables
|
### Frequency tables
|
||||||
Base R lacks a simple function to create frequency tables. We created such a function that works with almost all data types: `freq` (or `frequency_tbl`).
|
Base R lacks a simple function to create frequency tables. We created such a function that works with almost all data types: `freq` (or `frequency_tbl`). It can be used in two ways:
|
||||||
```r
|
```r
|
||||||
## Factors sort on item by default:
|
# Like base R:
|
||||||
|
freq(mydata$myvariable)
|
||||||
|
|
||||||
freq(septic_patients$hospital_id)
|
# And like tidyverse:
|
||||||
|
mydata %>% freq(myvariable)
|
||||||
|
```
|
||||||
|
|
||||||
|
Factors sort on item by default:
|
||||||
|
```r
|
||||||
|
septic_patients %>% freq(hospital_id)
|
||||||
|
# Frequency table of `hospital_id`
|
||||||
# Class: factor
|
# Class: factor
|
||||||
# Length: 2000 (of which NA: 0 = 0.0%)
|
# Length: 2000 (of which NA: 0 = 0.0%)
|
||||||
# Unique: 5
|
# Unique: 5
|
||||||
#
|
#
|
||||||
# Item Count Percent Cum. Count Cum. Percent (Factor Level)
|
# Item Count Percent Cum. Count Cum. Percent (Factor Level)
|
||||||
# ----- ------ -------- ----------- ------------- ---------------
|
# --- ----- ------ -------- ----------- ------------- ---------------
|
||||||
# A 233 11.7% 233 11.7% 1
|
# 1 A 233 11.7% 233 11.7% 1
|
||||||
# B 583 29.1% 816 40.8% 2
|
# 2 B 583 29.1% 816 40.8% 2
|
||||||
# C 221 11.1% 1037 51.8% 3
|
# 3 C 221 11.1% 1037 51.8% 3
|
||||||
# D 650 32.5% 1687 84.4% 4
|
# 4 D 650 32.5% 1687 84.4% 4
|
||||||
# E 313 15.7% 2000 100.0% 5
|
# 5 E 313 15.7% 2000 100.0% 5
|
||||||
|
```
|
||||||
|
|
||||||
|
This can be changed with the `sort.count` parameter:
|
||||||
## This can be changed with the `sort.count` parameter:
|
```r
|
||||||
|
septic_patients %>% freq(hospital_id, sort.count = TRUE)
|
||||||
freq(septic_patients$hospital_id, sort.count = TRUE)
|
# Frequency table of `hospital_id`
|
||||||
# Class: factor
|
# Class: factor
|
||||||
# Length: 2000 (of which NA: 0 = 0.0%)
|
# Length: 2000 (of which NA: 0 = 0.0%)
|
||||||
# Unique: 5
|
# Unique: 5
|
||||||
#
|
#
|
||||||
# Item Count Percent Cum. Count Cum. Percent (Factor Level)
|
# Item Count Percent Cum. Count Cum. Percent (Factor Level)
|
||||||
# ----- ------ -------- ----------- ------------- ---------------
|
# --- ----- ------ -------- ----------- ------------- ---------------
|
||||||
# D 650 32.5% 650 32.5% 4
|
# 1 D 650 32.5% 650 32.5% 4
|
||||||
# B 583 29.1% 1233 61.7% 2
|
# 2 B 583 29.1% 1233 61.7% 2
|
||||||
# E 313 15.7% 1546 77.3% 5
|
# 3 E 313 15.7% 1546 77.3% 5
|
||||||
# A 233 11.7% 1779 88.9% 1
|
# 4 A 233 11.7% 1779 88.9% 1
|
||||||
# C 221 11.1% 2000 100.0% 3
|
# 5 C 221 11.1% 2000 100.0% 3
|
||||||
|
```
|
||||||
|
|
||||||
|
All other types, like numbers, characters and dates, sort on count by default:
|
||||||
## Other types, like numbers or dates, sort on count by default:
|
```r
|
||||||
|
septic_patients %>% freq(date)
|
||||||
> freq(septic_patients$date)
|
# Frequency table of `date`
|
||||||
# Class: Date
|
# Class: Date
|
||||||
# Length: 2000 (of which NA: 0 = 0.0%)
|
# Length: 2000 (of which NA: 0 = 0.0%)
|
||||||
# Unique: 1662
|
# Unique: 1662
|
||||||
#
|
#
|
||||||
# Oldest: 2 January 2001
|
# Oldest: 2 January 2001
|
||||||
# Newest: 18 October 2017 (+6133)
|
# Newest: 18 October 2017 (+6133)
|
||||||
|
# Median: 6 December 2009 (~53%)
|
||||||
#
|
#
|
||||||
# Item Count Percent Cum. Count Cum. Percent
|
# Item Count Percent Cum. Count Cum. Percent
|
||||||
# ----------- ------ -------- ----------- -------------
|
# --- ----------- ------ -------- ----------- -------------
|
||||||
# 2008-12-24 5 0.2% 5 0.2%
|
# 1 2008-12-24 5 0.2% 5 0.2%
|
||||||
# 2010-12-10 4 0.2% 9 0.4%
|
# 2 2010-12-10 4 0.2% 9 0.4%
|
||||||
# 2011-03-03 4 0.2% 13 0.6%
|
# 3 2011-03-03 4 0.2% 13 0.6%
|
||||||
# 2013-06-24 4 0.2% 17 0.8%
|
# 4 2013-06-24 4 0.2% 17 0.8%
|
||||||
# 2017-09-01 4 0.2% 21 1.1%
|
# 5 2017-09-01 4 0.2% 21 1.1%
|
||||||
# 2002-09-02 3 0.2% 24 1.2%
|
# 6 2002-09-02 3 0.2% 24 1.2%
|
||||||
# 2003-10-14 3 0.2% 27 1.4%
|
# 7 2003-10-14 3 0.2% 27 1.4%
|
||||||
# 2004-06-25 3 0.2% 30 1.5%
|
# 8 2004-06-25 3 0.2% 30 1.5%
|
||||||
# 2004-06-27 3 0.2% 33 1.7%
|
# 9 2004-06-27 3 0.2% 33 1.7%
|
||||||
# 2004-10-29 3 0.2% 36 1.8%
|
# 10 2004-10-29 3 0.2% 36 1.8%
|
||||||
# 2005-09-27 3 0.2% 39 2.0%
|
# 11 2005-09-27 3 0.2% 39 2.0%
|
||||||
# 2006-08-01 3 0.2% 42 2.1%
|
# 12 2006-08-01 3 0.2% 42 2.1%
|
||||||
# 2006-10-10 3 0.2% 45 2.2%
|
# 13 2006-10-10 3 0.2% 45 2.2%
|
||||||
# 2007-11-16 3 0.2% 48 2.4%
|
# 14 2007-11-16 3 0.2% 48 2.4%
|
||||||
# 2008-03-09 3 0.2% 51 2.5%
|
# 15 2008-03-09 3 0.2% 51 2.5%
|
||||||
# ... and 1647 more (n = 1949; 97.5%). Use `nmax` to show more rows.
|
# [ reached getOption("max.print.freq") -- omitted 1647 entries, n = 1949 (97.5%) ]
|
||||||
|
```
|
||||||
|
For numeric values, some extra descriptive statistics will be calculated:
|
||||||
## For numeric values, some extra descriptive statistics will be calculated:
|
```r
|
||||||
|
freq(runif(n = 10, min = 1, max = 5))
|
||||||
> freq(runif(n = 10, min = 1, max = 5))
|
# Frequency table
|
||||||
# Class: numeric
|
# Class: numeric
|
||||||
# Length: 10 (of which NA: 0 = 0.0%)
|
# Length: 10 (of which NA: 0 = 0.0%)
|
||||||
# Unique: 10
|
# Unique: 10
|
||||||
#
|
#
|
||||||
# Mean: 3
|
# Mean: 2.9
|
||||||
# Std. dev.: 0.93 (CV: 0.31)
|
# Std. dev.: 1.3 (CV: 0.43, MAD: 1.5)
|
||||||
# Five-Num: 1.1 | 2.3 | 3.1 | 3.8 | 4.0 (CQV: 0.25)
|
# Five-Num: 1.5 | 1.7 | 2.6 | 4.0 | 4.7 (IQR: 2.3, CQV: 0.4)
|
||||||
# Outliers: 0
|
# Outliers: 0
|
||||||
#
|
#
|
||||||
# Item Count Percent Cum. Count Cum. Percent
|
# Item Count Percent Cum. Count Cum. Percent
|
||||||
|
46
man/freq.Rd
46
man/freq.Rd
@ -21,9 +21,9 @@ top_freq(f, n)
|
|||||||
15), ...)
|
15), ...)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{x}{vector with items, or a \code{data.frame}}
|
\item{x}{vector of any class or a \code{\link{data.frame}}, \code{\link{tibble}} or \code{\link{table}}}
|
||||||
|
|
||||||
\item{...}{up to nine different columns of \code{x} to calculate frequencies from, see Examples}
|
\item{...}{up to nine different columns of \code{x} when \code{x} is a \code{data.frame} or \code{tibble}, to calculate frequencies from - see Examples}
|
||||||
|
|
||||||
\item{sort.count}{sort on count, i.e. frequencies. This will be \code{TRUE} at default for everything except for factors.}
|
\item{sort.count}{sort on count, i.e. frequencies. This will be \code{TRUE} at default for everything except for factors.}
|
||||||
|
|
||||||
@ -83,20 +83,15 @@ freq(septic_patients[, "hospital_id"])
|
|||||||
septic_patients$hospital_id \%>\% freq()
|
septic_patients$hospital_id \%>\% freq()
|
||||||
septic_patients[, "hospital_id"] \%>\% freq()
|
septic_patients[, "hospital_id"] \%>\% freq()
|
||||||
septic_patients \%>\% freq("hospital_id")
|
septic_patients \%>\% freq("hospital_id")
|
||||||
septic_patients \%>\% freq(hospital_id) # <- easiest to remember when used to tidyverse
|
septic_patients \%>\% freq(hospital_id) #<- easiest to remember when you're used to tidyverse
|
||||||
|
|
||||||
# you could use `select`...
|
# you could also use `select` or `pull` to get your variables
|
||||||
septic_patients \%>\%
|
septic_patients \%>\%
|
||||||
filter(hospital_id == "A") \%>\%
|
filter(hospital_id == "A") \%>\%
|
||||||
select(bactid) \%>\%
|
select(bactid) \%>\%
|
||||||
freq()
|
freq()
|
||||||
|
|
||||||
# ... or you use `freq` to select it immediately
|
# multiple selected variables will be pasted together
|
||||||
septic_patients \%>\%
|
|
||||||
filter(hospital_id == "A") \%>\%
|
|
||||||
freq(bactid)
|
|
||||||
|
|
||||||
# select multiple columns; they will be pasted together
|
|
||||||
septic_patients \%>\%
|
septic_patients \%>\%
|
||||||
left_join_microorganisms \%>\%
|
left_join_microorganisms \%>\%
|
||||||
filter(hospital_id == "A") \%>\%
|
filter(hospital_id == "A") \%>\%
|
||||||
@ -113,13 +108,40 @@ years <- septic_patients \%>\%
|
|||||||
mutate(year = format(date, "\%Y")) \%>\%
|
mutate(year = format(date, "\%Y")) \%>\%
|
||||||
freq(year)
|
freq(year)
|
||||||
|
|
||||||
# print only top 5
|
# show only the top 5
|
||||||
years \%>\% print(nmax = 5)
|
years \%>\% print(nmax = 5)
|
||||||
|
|
||||||
# transform to plain data.frame
|
# print a histogram of numeric values
|
||||||
|
septic_patients \%>\%
|
||||||
|
freq(age) \%>\%
|
||||||
|
hist() # prettier: ggplot(septic_patients, aes(age)) + geom_histogram()
|
||||||
|
|
||||||
|
# or print all points to a regular plot
|
||||||
|
septic_patients \%>\%
|
||||||
|
freq(age) \%>\%
|
||||||
|
plot()
|
||||||
|
|
||||||
|
# transform to a data.frame or tibble
|
||||||
septic_patients \%>\%
|
septic_patients \%>\%
|
||||||
freq(age) \%>\%
|
freq(age) \%>\%
|
||||||
as.data.frame()
|
as.data.frame()
|
||||||
|
|
||||||
|
# or transform (back) to a vector
|
||||||
|
septic_patients \%>\%
|
||||||
|
freq(age) \%>\%
|
||||||
|
as.vector()
|
||||||
|
|
||||||
|
identical(septic_patients \%>\%
|
||||||
|
freq(age) \%>\%
|
||||||
|
as.vector() \%>\%
|
||||||
|
sort(),
|
||||||
|
sort(septic_patients$age)
|
||||||
|
) # TRUE
|
||||||
|
|
||||||
|
# also supports table:
|
||||||
|
table(septic_patients$sex,
|
||||||
|
septic_patients$age) \%>\%
|
||||||
|
freq()
|
||||||
}
|
}
|
||||||
\keyword{freq}
|
\keyword{freq}
|
||||||
\keyword{frequency}
|
\keyword{frequency}
|
||||||
|
@ -9,12 +9,16 @@ test_that("frequency table works", {
|
|||||||
expect_equal(nrow(freq(septic_patients$date)),
|
expect_equal(nrow(freq(septic_patients$date)),
|
||||||
length(unique(septic_patients$date)))
|
length(unique(septic_patients$date)))
|
||||||
|
|
||||||
# int
|
# character
|
||||||
|
expect_output(print(freq(septic_patients$bactid)))
|
||||||
|
# integer
|
||||||
expect_output(print(freq(septic_patients$age)))
|
expect_output(print(freq(septic_patients$age)))
|
||||||
# date
|
# date
|
||||||
expect_output(print(freq(septic_patients$date)))
|
expect_output(print(freq(septic_patients$date)))
|
||||||
# factor
|
# factor
|
||||||
expect_output(print(freq(septic_patients$hospital_id)))
|
expect_output(print(freq(septic_patients$hospital_id)))
|
||||||
|
# table
|
||||||
|
expect_output(print(freq(table(septic_patients$sex, septic_patients$age))))
|
||||||
|
|
||||||
library(dplyr)
|
library(dplyr)
|
||||||
expect_output(septic_patients %>% select(1:2) %>% freq() %>% print())
|
expect_output(septic_patients %>% select(1:2) %>% freq() %>% print())
|
||||||
@ -53,5 +57,14 @@ test_that("frequency table works", {
|
|||||||
plot(freq(septic_patients, age))
|
plot(freq(septic_patients, age))
|
||||||
hist(freq(septic_patients, age))
|
hist(freq(septic_patients, age))
|
||||||
|
|
||||||
|
# check vector
|
||||||
|
expect_identical(septic_patients %>%
|
||||||
|
freq(age) %>%
|
||||||
|
as.vector() %>%
|
||||||
|
sort(),
|
||||||
|
septic_patients %>%
|
||||||
|
pull(age) %>%
|
||||||
|
sort())
|
||||||
|
|
||||||
})
|
})
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user