2018-04-18 12:24:54 +02:00
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# AUTHORS #
# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) #
# #
# LICENCE #
2018-12-16 22:45:12 +01:00
# This package is free software; you can redistribute it and/or modify #
2018-04-18 12:24:54 +02:00
# it under the terms of the GNU General Public License version 2.0, #
# as published by the Free Software Foundation. #
# #
2018-12-16 22:45:12 +01:00
# This R package is distributed in the hope that it will be useful, #
2018-04-18 12:24:54 +02:00
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
2018-12-16 22:45:12 +01:00
# GNU General Public License version 2.0 for more details. #
2018-04-18 12:24:54 +02:00
# ==================================================================== #
#' Frequency table
#'
2018-07-01 21:40:37 +02:00
#' Create a frequency table of a vector with items or a data frame. Supports quasiquotation and markdown for reports. \code{top_freq} can be used to get the top/bottom \emph{n} items of a frequency table, with counts as names.
2018-11-06 16:41:59 +01:00
#' @param x vector of any class or a \code{\link{data.frame}}, \code{\link{tibble}} (may contain a grouping variable) or \code{\link{table}}
2018-07-09 14:02:58 +02:00
#' @param ... up to nine different columns of \code{x} when \code{x} is a \code{data.frame} or \code{tibble}, to calculate frequencies from - see Examples
2018-11-06 16:41:59 +01:00
#' @param sort.count sort on count, i.e. frequencies. This will be \code{TRUE} at default for everything except when using grouping variables.
2018-07-03 11:30:40 +02:00
#' @param nmax number of row to print. The default, \code{15}, uses \code{\link{getOption}("max.print.freq")}. Use \code{nmax = 0}, \code{nmax = Inf}, \code{nmax = NULL} or \code{nmax = NA} to print all rows.
2018-10-23 09:42:26 +02:00
#' @param na.rm a logical value indicating whether \code{NA} values should be removed from the frequency table. The header (if set) will always print the amount of \code{NA}s.
2018-06-19 15:20:14 +02:00
#' @param row.names a logical value indicating whether row indices should be printed as \code{1:nrow(x)}
2018-10-22 12:32:59 +02:00
#' @param markdown a logical value indicating whether the frequency table should be printed in markdown format. This will print all rows and is default behaviour in non-interactive R sessions (like when knitting RMarkdown files).
#' @param digits how many significant digits are to be used for numeric values in the header (not for the items themselves, that depends on \code{\link{getOption}("digits")})
2018-09-10 15:45:25 +02:00
#' @param quote a logical value indicating whether or not strings should be printed with surrounding quotes
2018-10-19 21:52:08 +02:00
#' @param header a logical value indicating whether an informative header should be printed
2018-10-31 12:10:49 +01:00
#' @param title text to show above frequency table, at default to tries to coerce from the variables passed to \code{x}
2018-10-23 09:42:26 +02:00
#' @param na a character string to should be used to show empty (\code{NA}) values (only useful when \code{na.rm = FALSE})
2018-12-22 22:39:34 +01:00
#' @param droplevels a logical value indicating whether in factors empty levels should be dropped
2018-04-18 12:24:54 +02:00
#' @param sep a character string to separate the terms when selecting multiple columns
2018-12-10 10:13:40 +01:00
#' @inheritParams base::format
2018-07-01 21:40:37 +02:00
#' @param f a frequency table
2018-06-20 14:47:37 +02:00
#' @param n number of top \emph{n} items to return, use -n for the bottom \emph{n} items. It will include more than \code{n} rows if there are ties.
2018-07-03 11:30:40 +02:00
#' @details Frequency tables (or frequency distributions) are summaries of the distribution of values in a sample. With the `freq` function, you can create univariate frequency tables. Multiple variables will be pasted into one variable, so it forces a univariate distribution. This package also has a vignette available to explain the use of this function further, run \code{browseVignettes("AMR")} to read it.
2018-06-20 14:47:37 +02:00
#'
2018-10-22 12:32:59 +02:00
#' For numeric values of any class, these additional values will all be calculated with \code{na.rm = TRUE} and shown into the header:
2018-04-18 12:24:54 +02:00
#' \itemize{
#' \item{Mean, using \code{\link[base]{mean}}}
2018-07-03 11:30:40 +02:00
#' \item{Standard Deviation, using \code{\link[stats]{sd}}}
#' \item{Coefficient of Variation (CV), the standard deviation divided by the mean}
#' \item{Mean Absolute Deviation (MAD), using \code{\link[stats]{mad}}}
#' \item{Tukey Five-Number Summaries (minimum, Q1, median, Q3, maximum), using \code{\link[stats]{fivenum}}}
#' \item{Interquartile Range (IQR) calculated as \code{Q3 - Q1} using the Tukey Five-Number Summaries, i.e. \strong{not} using the \code{\link[stats]{quantile}} function}
#' \item{Coefficient of Quartile Variation (CQV, sometimes called coefficient of dispersion), calculated as \code{(Q3 - Q1) / (Q3 + Q1)} using the Tukey Five-Number Summaries}
#' \item{Outliers (total count and unique count), using \code{\link[grDevices]{boxplot.stats}}}
2018-04-18 12:24:54 +02:00
#' }
2018-06-20 14:47:37 +02:00
#'
2018-10-22 12:32:59 +02:00
#' For dates and times of any class, these additional values will be calculated with \code{na.rm = TRUE} and shown into the header:
2018-06-20 14:47:37 +02:00
#' \itemize{
2018-07-08 22:14:55 +02:00
#' \item{Oldest, using \code{\link{min}}}
#' \item{Newest, using \code{\link{max}}, with difference between newest and oldest}
2018-06-20 14:47:37 +02:00
#' \item{Median, using \code{\link[stats]{median}}, with percentage since oldest}
#' }
#'
2018-12-22 22:39:34 +01:00
#' In factors, all factor levels that are not existing in the input data will be dropped.
2018-07-03 11:30:40 +02:00
#'
2018-06-20 14:47:37 +02:00
#' The function \code{top_freq} uses \code{\link[dplyr]{top_n}} internally and will include more than \code{n} rows if there are ties.
2018-07-03 11:30:40 +02:00
#' @importFrom stats fivenum sd mad
2018-04-18 12:24:54 +02:00
#' @importFrom grDevices boxplot.stats
2018-12-22 22:39:34 +01:00
#' @importFrom dplyr %>% arrange arrange_at desc filter_at funs group_by mutate mutate_at n n_distinct pull select summarise tibble ungroup vars all_vars
2018-11-06 16:41:59 +01:00
#' @importFrom utils browseVignettes
2018-10-09 13:53:33 +02:00
#' @importFrom hms is.hms
2018-11-02 10:27:57 +01:00
#' @importFrom crayon red green silver
2018-04-18 12:24:54 +02:00
#' @keywords summary summarise frequency freq
#' @rdname freq
2018-07-01 21:40:37 +02:00
#' @name freq
2018-11-19 13:00:22 +01:00
#' @return A \code{data.frame} (with an additional class \code{"frequency_tbl"}) with five columns: \code{item}, \code{count}, \code{percent}, \code{cum_count} and \code{cum_percent}.
2018-04-18 12:24:54 +02:00
#' @export
#' @examples
#' library(dplyr)
#'
2018-07-01 21:40:37 +02:00
#' # this all gives the same result:
2018-04-18 12:24:54 +02:00
#' freq(septic_patients$hospital_id)
2018-07-01 21:40:37 +02:00
#' freq(septic_patients[, "hospital_id"])
#' septic_patients$hospital_id %>% freq()
#' septic_patients[, "hospital_id"] %>% freq()
#' septic_patients %>% freq("hospital_id")
2018-11-06 16:41:59 +01:00
#' septic_patients %>% freq(hospital_id) #<- easiest to remember (tidyverse)
2018-04-18 12:24:54 +02:00
#'
2018-11-19 13:00:22 +01:00
#'
2018-07-09 14:02:58 +02:00
#' # you could also use `select` or `pull` to get your variables
2018-04-18 12:24:54 +02:00
#' septic_patients %>%
#' filter(hospital_id == "A") %>%
2018-08-31 13:36:19 +02:00
#' select(mo) %>%
2018-04-18 12:24:54 +02:00
#' freq()
#'
2018-11-19 13:00:22 +01:00
#'
2018-07-09 14:02:58 +02:00
#' # multiple selected variables will be pasted together
2018-04-18 12:24:54 +02:00
#' septic_patients %>%
#' left_join_microorganisms %>%
#' filter(hospital_id == "A") %>%
2018-07-01 21:40:37 +02:00
#' freq(genus, species)
2018-04-18 12:24:54 +02:00
#'
2018-11-19 13:00:22 +01:00
#'
2018-11-06 16:41:59 +01:00
#' # group a variable and analyse another
#' septic_patients %>%
#' group_by(hospital_id) %>%
#' freq(gender)
#'
2018-11-19 13:00:22 +01:00
#'
2018-07-03 11:30:40 +02:00
#' # get top 10 bugs of hospital A as a vector
#' septic_patients %>%
#' filter(hospital_id == "A") %>%
2018-08-31 13:36:19 +02:00
#' freq(mo) %>%
2018-07-03 11:30:40 +02:00
#' top_freq(10)
#'
2018-11-19 13:00:22 +01:00
#'
2018-04-18 12:24:54 +02:00
#' # save frequency table to an object
#' years <- septic_patients %>%
#' mutate(year = format(date, "%Y")) %>%
2018-07-01 21:40:37 +02:00
#' freq(year)
2018-06-20 14:47:37 +02:00
#'
2018-11-19 13:00:22 +01:00
#'
2018-07-09 14:02:58 +02:00
#' # show only the top 5
2018-07-03 11:30:40 +02:00
#' years %>% print(nmax = 5)
#'
2018-11-19 13:00:22 +01:00
#'
2018-07-16 16:41:48 +02:00
#' # save to an object with formatted percentages
#' years <- format(years)
#'
2018-11-19 13:00:22 +01:00
#'
2018-07-09 14:02:58 +02:00
#' # print a histogram of numeric values
#' septic_patients %>%
#' freq(age) %>%
2018-10-01 11:39:43 +02:00
#' hist()
2018-07-09 14:02:58 +02:00
#'
2018-11-19 13:00:22 +01:00
#'
2018-07-09 14:02:58 +02:00
#' # or print all points to a regular plot
#' septic_patients %>%
#' freq(age) %>%
#' plot()
#'
2018-11-19 13:00:22 +01:00
#'
2018-07-09 14:02:58 +02:00
#' # transform to a data.frame or tibble
2018-06-20 14:47:37 +02:00
#' septic_patients %>%
2018-07-03 11:30:40 +02:00
#' freq(age) %>%
#' as.data.frame()
2018-07-09 14:02:58 +02:00
#'
2018-11-19 13:00:22 +01:00
#'
2018-07-09 14:02:58 +02:00
#' # or transform (back) to a vector
#' septic_patients %>%
#' freq(age) %>%
#' as.vector()
#'
#' identical(septic_patients %>%
#' freq(age) %>%
#' as.vector() %>%
#' sort(),
2018-07-16 16:41:48 +02:00
#' sort(septic_patients$age)) # TRUE
2018-07-09 14:02:58 +02:00
#'
2018-11-19 13:00:22 +01:00
#'
#' # it also supports `table` objects
2018-09-29 21:54:32 +02:00
#' table(septic_patients$gender,
2018-07-09 14:02:58 +02:00
#' septic_patients$age) %>%
2018-07-16 16:41:48 +02:00
#' freq(sep = " **sep** ")
#'
2018-11-19 13:00:22 +01:00
#'
#' # only get selected columns
#' septic_patients %>%
#' freq(hospital_id) %>%
#' select(item, percent)
#'
#' septic_patients %>%
#' freq(hospital_id) %>%
#' select(-count, -cum_count)
#'
#'
2018-10-01 11:39:43 +02:00
#' # check differences between frequency tables
#' diff(freq(septic_patients$trim),
#' freq(septic_patients$trsu))
2018-07-01 21:40:37 +02:00
frequency_tbl <- function ( x ,
... ,
sort.count = TRUE ,
nmax = getOption ( " max.print.freq" ) ,
na.rm = TRUE ,
row.names = TRUE ,
2018-10-22 12:32:59 +02:00
markdown = ! interactive ( ) ,
2018-07-01 21:40:37 +02:00
digits = 2 ,
2018-09-10 15:45:25 +02:00
quote = FALSE ,
2018-10-19 21:52:08 +02:00
header = ! markdown ,
2018-10-31 12:10:49 +01:00
title = NULL ,
2018-10-23 09:42:26 +02:00
na = " <NA>" ,
2018-12-22 22:39:34 +01:00
droplevels = TRUE ,
2018-12-10 10:13:40 +01:00
sep = " " ,
decimal.mark = getOption ( " OutDec" ) ,
big.mark = ifelse ( decimal.mark != " ," , " ," , " ." ) ) {
2018-07-01 21:40:37 +02:00
2018-07-10 12:27:07 +02:00
mult.columns <- 0
2018-11-06 16:41:59 +01:00
x.group = character ( 0 )
df <- NULL
2018-12-22 22:39:34 +01:00
# x_haslevels <- !is.null(levels(x))
2018-08-24 11:08:20 +02:00
x.name <- NULL
cols <- NULL
2018-12-22 22:39:34 +01:00
if ( any ( class ( x ) == " list" ) ) {
2018-08-24 11:08:20 +02:00
cols <- names ( x )
x <- as.data.frame ( x , stringsAsFactors = FALSE )
x.name <- " a list"
2018-12-22 22:39:34 +01:00
} else if ( any ( class ( x ) == " matrix" ) ) {
2018-08-24 11:08:20 +02:00
x <- as.data.frame ( x , stringsAsFactors = FALSE )
x.name <- " a matrix"
cols <- colnames ( x )
2018-12-22 22:39:34 +01:00
if ( all ( cols %like% " V[0-9]" ) ) {
2018-08-24 11:08:20 +02:00
cols <- NULL
}
}
2018-12-22 22:39:34 +01:00
if ( any ( class ( x ) == " data.frame" ) ) {
2018-11-06 16:41:59 +01:00
x.group <- group_vars ( x )
if ( length ( x.group ) > 1 ) {
x.group <- x.group [1L ]
warning ( " freq supports one grouping variable, only `" , x.group , " ` will be kept." , call. = FALSE )
}
2018-08-24 11:08:20 +02:00
if ( is.null ( x.name ) ) {
x.name <- deparse ( substitute ( x ) )
}
2018-07-01 21:40:37 +02:00
if ( x.name == " ." ) {
x.name <- NULL
}
2018-07-01 22:23:34 +02:00
dots <- base :: eval ( base :: substitute ( base :: alist ( ... ) ) )
2018-07-01 21:40:37 +02:00
ndots <- length ( dots )
2018-08-24 11:08:20 +02:00
if ( ndots < 10 ) {
2018-07-01 21:40:37 +02:00
cols <- as.character ( dots )
2018-07-01 22:23:34 +02:00
if ( ! all ( cols %in% colnames ( x ) ) ) {
2018-12-22 22:39:34 +01:00
stop ( " one or more columns not found: `" , paste ( cols , collapse = " `, `" ) , " `" , call. = FALSE )
2018-07-01 22:23:34 +02:00
}
2018-11-06 16:41:59 +01:00
if ( length ( x.group ) > 0 ) {
x.group_cols <- c ( x.group , cols )
2018-12-22 22:39:34 +01:00
# if (droplevels == TRUE) {
# x <- x %>% mutate_at(vars(x.group_cols), droplevels)
# }
suppressWarnings (
df <- x %>%
group_by_at ( vars ( x.group_cols ) ) %>%
summarise ( count = n ( ) )
)
2018-11-08 16:10:03 +01:00
if ( na.rm == TRUE ) {
df <- df %>% filter_at ( vars ( cols ) , all_vars ( ! is.na ( .) ) )
}
2018-11-06 16:41:59 +01:00
if ( ! missing ( sort.count ) ) {
if ( sort.count == TRUE ) {
df <- df %>% arrange_at ( c ( x.group , " count" ) , desc )
}
}
df <- df %>%
mutate ( cum_count = cumsum ( count ) )
df.topleft <- df [1 , 1 ]
df <- df %>%
ungroup ( ) %>%
# do not repeat group labels
mutate_at ( vars ( x.group ) , funs ( ifelse ( lag ( .) == ., " " , .) ) )
df [1 , 1 ] <- df.topleft
colnames ( df ) [1 : 2 ] <- c ( " group" , " item" )
2018-12-22 22:39:34 +01:00
if ( ! is.null ( levels ( df $ item ) ) & droplevels == TRUE ) {
# is factor
df <- df %>% filter ( count != 0 )
}
2018-11-06 16:41:59 +01:00
}
2018-08-24 11:08:20 +02:00
if ( length ( cols ) > 0 ) {
x <- x [ , cols ]
}
2018-07-01 21:40:37 +02:00
} else if ( ndots >= 10 ) {
2018-12-22 22:39:34 +01:00
stop ( " A maximum of 9 columns can be analysed at the same time." , call. = FALSE )
2018-07-01 21:40:37 +02:00
} else {
cols <- NULL
}
2018-12-22 22:39:34 +01:00
} else if ( any ( class ( x ) == " table" ) ) {
2018-10-22 12:32:59 +02:00
x <- as.data.frame ( x , stringsAsFactors = FALSE )
# now this DF contains 3 columns: the 2 vars and a Freq column
# paste the first 2 cols and repeat them Freq times:
x <- rep ( x = do.call ( paste , c ( x [colnames ( x ) [1 : 2 ] ] , sep = sep ) ) ,
2018-10-23 09:42:26 +02:00
times = x $ Freq )
2018-07-10 12:27:07 +02:00
x.name <- " a `table` object"
2018-07-09 14:02:58 +02:00
cols <- NULL
2018-10-22 12:32:59 +02:00
#mult.columns <- 2
2018-07-01 21:40:37 +02:00
} else {
x.name <- NULL
cols <- NULL
}
2018-04-18 12:24:54 +02:00
if ( ! is.null ( ncol ( x ) ) ) {
2018-12-22 22:39:34 +01:00
if ( ncol ( x ) == 1 & any ( class ( x ) == " data.frame" ) ) {
2018-04-18 12:24:54 +02:00
x <- x %>% pull ( 1 )
} else if ( ncol ( x ) < 10 ) {
mult.columns <- ncol ( x )
2018-10-22 12:32:59 +02:00
x <- do.call ( paste , c ( x [colnames ( x ) ] , sep = sep ) )
2018-04-18 12:24:54 +02:00
} else {
2018-12-22 22:39:34 +01:00
stop ( " A maximum of 9 columns can be analysed at the same time." , call. = FALSE )
2018-04-18 12:24:54 +02:00
}
}
if ( mult.columns > 1 ) {
2018-12-22 22:39:34 +01:00
NAs <- x [is.na ( x ) | x == trimws ( strrep ( " NA " , mult.columns ) ) ]
2018-04-18 12:24:54 +02:00
} else {
NAs <- x [is.na ( x ) ]
}
2018-07-23 14:14:03 +02:00
2018-04-18 12:24:54 +02:00
if ( na.rm == TRUE ) {
2018-07-23 14:14:03 +02:00
x_class <- class ( x )
2018-04-18 12:24:54 +02:00
x <- x [ ! x %in% NAs ]
2018-07-23 14:14:03 +02:00
class ( x ) <- x_class
2018-04-18 12:24:54 +02:00
}
2018-10-19 21:52:08 +02:00
header_txt <- character ( 0 )
2018-04-18 12:24:54 +02:00
2018-12-22 22:39:34 +01:00
markdown_line <- " "
2018-04-18 12:24:54 +02:00
if ( markdown == TRUE ) {
2018-12-22 22:39:34 +01:00
markdown_line <- " \n"
2018-04-18 12:24:54 +02:00
}
2018-12-22 22:39:34 +01:00
x_align <- " l"
2018-04-18 12:24:54 +02:00
if ( mult.columns > 0 ) {
2018-12-22 22:39:34 +01:00
header_txt <- header_txt %>% paste0 ( markdown_line , " Columns: " , mult.columns )
2018-04-18 12:24:54 +02:00
} else {
2018-12-22 22:39:34 +01:00
header_txt <- header_txt %>% paste0 ( markdown_line , " Class: " , class ( x ) %>% rev ( ) %>% paste ( collapse = " > " ) )
2018-09-17 09:42:09 +02:00
if ( ! mode ( x ) %in% class ( x ) ) {
2018-10-19 21:52:08 +02:00
header_txt <- header_txt %>% paste0 ( silver ( paste0 ( " (" , mode ( x ) , " )" ) ) )
2018-10-18 12:10:10 +02:00
}
}
2018-12-14 10:08:51 +01:00
if ( ( length ( NAs ) + length ( x ) > 0 ) > 0 ) {
2018-12-22 22:39:34 +01:00
na_txt <- paste0 ( NAs %>% length ( ) %>% format ( decimal.mark = decimal.mark , big.mark = big.mark ) , " = " ,
2018-12-10 10:13:40 +01:00
( NAs %>% length ( ) / ( NAs %>% length ( ) + x %>% length ( ) ) ) %>% percent ( force_zero = TRUE , round = digits , decimal.mark = decimal.mark ) %>%
2018-12-22 22:39:34 +01:00
sub ( " NaN" , " 0" , ., fixed = TRUE ) )
2018-11-24 20:25:09 +01:00
if ( ! na_txt %like% " ^0 =" ) {
na_txt <- red ( na_txt )
} else {
na_txt <- green ( na_txt )
}
2018-12-22 22:39:34 +01:00
na_txt <- paste0 ( " (of which NA: " , na_txt , " )" )
2018-11-02 10:27:57 +01:00
} else {
2018-11-24 20:25:09 +01:00
na_txt <- " "
2018-04-18 12:24:54 +02:00
}
2018-11-24 20:25:09 +01:00
2018-12-22 22:39:34 +01:00
if ( ! is.null ( levels ( x ) ) ) {
n_levels <- x %>% levels ( ) %>% length ( )
n_levels_empty <- n_levels - x %>% droplevels ( ) %>% levels ( ) %>% length ( )
n_levels_list <- levels ( x )
if ( n_levels > 5 ) {
n_levels_list <- c ( n_levels_list [1 : 5 ] , " ..." )
}
if ( is.ordered ( x ) ) {
n_levels_list <- paste0 ( levels ( x ) , collapse = " < " )
} else {
n_levels_list <- paste0 ( levels ( x ) , collapse = " , " )
}
header_txt <- header_txt %>% paste0 ( markdown_line , " \nLevels: " , n_levels_list )
# drop levels of non-existing factor values,
# since dplyr >= 0.8.0 does not do this anymore in group_by
if ( droplevels == TRUE ) {
x <- droplevels ( x )
}
}
header_txt <- header_txt %>% paste0 ( markdown_line , " \nLength: " , ( NAs %>% length ( ) + x %>% length ( ) ) %>% format ( decimal.mark = decimal.mark , big.mark = big.mark ) ,
" " , na_txt )
header_txt <- header_txt %>% paste0 ( markdown_line , " \nUnique: " , x %>% n_distinct ( ) %>% format ( decimal.mark = decimal.mark , big.mark = big.mark ) )
2018-04-18 12:24:54 +02:00
2018-09-17 09:42:09 +02:00
if ( NROW ( x ) > 0 & any ( class ( x ) == " character" ) ) {
2018-12-22 22:39:34 +01:00
header_txt <- header_txt %>% paste0 ( " \n" )
header_txt <- header_txt %>% paste0 ( markdown_line , " \nShortest: " , x %>% base :: nchar ( ) %>% base :: min ( na.rm = TRUE ) %>% format ( decimal.mark = decimal.mark , big.mark = big.mark ) )
header_txt <- header_txt %>% paste0 ( markdown_line , " \nLongest: " , x %>% base :: nchar ( ) %>% base :: max ( na.rm = TRUE ) %>% format ( decimal.mark = decimal.mark , big.mark = big.mark ) )
2018-12-10 10:13:40 +01:00
}
if ( NROW ( x ) > 0 & any ( class ( x ) == " mo" ) ) {
2018-12-22 22:39:34 +01:00
header_txt <- header_txt %>% paste0 ( " \n" )
header_txt <- header_txt %>% paste0 ( markdown_line , " \nFamilies: " , x %>% mo_family ( ) %>% n_distinct ( ) %>% format ( decimal.mark = decimal.mark , big.mark = big.mark ) )
header_txt <- header_txt %>% paste0 ( markdown_line , " \nGenera: " , x %>% mo_genus ( ) %>% n_distinct ( ) %>% format ( decimal.mark = decimal.mark , big.mark = big.mark ) )
header_txt <- header_txt %>% paste0 ( markdown_line , " \nSpecies: " , x %>% mo_species ( ) %>% n_distinct ( ) %>% format ( decimal.mark = decimal.mark , big.mark = big.mark ) )
2018-09-17 09:42:09 +02:00
}
2018-11-30 12:05:59 +01:00
if ( NROW ( x ) > 0 & any ( class ( x ) == " difftime" ) & ! is.hms ( x ) ) {
2018-12-22 22:39:34 +01:00
header_txt <- header_txt %>% paste0 ( " \n" )
header_txt <- header_txt %>% paste ( markdown_line , " \nUnits: " , attributes ( x ) $ units )
2018-10-12 16:35:18 +02:00
x <- as.double ( x )
2018-10-19 21:52:08 +02:00
# after this, the numeric header_txt continues
2018-10-12 16:35:18 +02:00
}
2018-12-22 22:39:34 +01:00
if ( NROW ( x ) > 0 & any ( class ( x ) %in% c ( " double" , " integer" , " numeric" , " raw" , " single" ) ) ) {
2018-04-18 12:24:54 +02:00
# right align number
2018-07-03 11:30:40 +02:00
Tukey_five <- stats :: fivenum ( x , na.rm = TRUE )
2018-12-22 22:39:34 +01:00
x_align <- " r"
header_txt <- header_txt %>% paste0 ( " \n" )
header_txt <- header_txt %>% paste ( markdown_line , " \nMean: " , x %>% base :: mean ( na.rm = TRUE ) %>% format ( digits = digits , decimal.mark = decimal.mark , big.mark = big.mark ) )
header_txt <- header_txt %>% paste0 ( markdown_line , " \nStd. dev.: " , x %>% stats :: sd ( na.rm = TRUE ) %>% format ( digits = digits , decimal.mark = decimal.mark , big.mark = big.mark ) ,
" (CV: " , x %>% cv ( na.rm = TRUE ) %>% format ( digits = digits , decimal.mark = decimal.mark , big.mark = big.mark ) ,
" , MAD: " , x %>% stats :: mad ( na.rm = TRUE ) %>% format ( digits = digits , decimal.mark = decimal.mark , big.mark = big.mark ) , " )" )
header_txt <- header_txt %>% paste0 ( markdown_line , " \nFive-Num: " , Tukey_five %>% format ( digits = digits , decimal.mark = decimal.mark , big.mark = big.mark ) %>% trimws ( ) %>% paste ( collapse = " | " ) ,
" (IQR: " , ( Tukey_five [4 ] - Tukey_five [2 ] ) %>% format ( digits = digits , decimal.mark = decimal.mark , big.mark = big.mark ) ,
" , CQV: " , x %>% cqv ( na.rm = TRUE ) %>% format ( digits = digits , decimal.mark = decimal.mark , big.mark = big.mark ) , " )" )
2018-04-18 12:24:54 +02:00
outlier_length <- length ( boxplot.stats ( x ) $ out )
2018-12-22 22:39:34 +01:00
header_txt <- header_txt %>% paste0 ( markdown_line , " \nOutliers: " , outlier_length )
2018-04-18 12:24:54 +02:00
if ( outlier_length > 0 ) {
2018-12-22 22:39:34 +01:00
header_txt <- header_txt %>% paste0 ( " (unique count: " , boxplot.stats ( x ) $ out %>% n_distinct ( ) , " )" )
2018-04-18 12:24:54 +02:00
}
}
2018-08-03 09:59:39 +02:00
if ( NROW ( x ) > 0 & any ( class ( x ) == " rsi" ) ) {
2018-12-22 22:39:34 +01:00
header_txt <- header_txt %>% paste0 ( " \n" )
2018-10-23 09:42:26 +02:00
cnt_S <- sum ( x == " S" , na.rm = TRUE )
2018-11-02 10:27:57 +01:00
cnt_IR <- sum ( x %in% c ( " I" , " R" ) , na.rm = TRUE )
2018-12-22 22:39:34 +01:00
header_txt <- header_txt %>% paste ( markdown_line , " \n%IR: " ,
2018-12-10 10:13:40 +01:00
( cnt_IR / sum ( ! is.na ( x ) , na.rm = TRUE ) ) %>% percent ( force_zero = TRUE , round = digits , decimal.mark = decimal.mark ) ,
2018-12-22 22:39:34 +01:00
paste0 ( " (ratio S : IR = 1.0 : " , ( cnt_IR / cnt_S ) %>% format ( digits = 1 , nsmall = 1 , decimal.mark = decimal.mark , big.mark = big.mark ) , " )" ) )
2018-11-02 10:27:57 +01:00
if ( NROW ( x ) < 30 ) {
2018-12-22 22:39:34 +01:00
header_txt <- header_txt %>% paste ( markdown_line , red ( " \nToo few isolates for reliable resistance interpretation." ) )
2018-11-02 10:27:57 +01:00
}
2018-08-01 22:37:28 +02:00
}
2018-04-18 12:24:54 +02:00
formatdates <- " %e %B %Y" # = d mmmm yyyy
2018-10-09 13:53:33 +02:00
if ( is.hms ( x ) ) {
2018-04-18 12:24:54 +02:00
x <- x %>% as.POSIXlt ( )
formatdates <- " %H:%M:%S"
}
2018-12-22 22:39:34 +01:00
if ( NROW ( x ) > 0 & any ( class ( x ) %in% c ( " Date" , " POSIXct" , " POSIXlt" ) ) ) {
header_txt <- header_txt %>% paste0 ( " \n" )
2018-06-19 15:20:14 +02:00
mindate <- x %>% min ( na.rm = TRUE )
maxdate <- x %>% max ( na.rm = TRUE )
2018-12-22 22:39:34 +01:00
maxdate_days <- difftime ( maxdate , mindate , units = " auto" ) %>% as.double ( )
2018-06-19 15:20:14 +02:00
mediandate <- x %>% median ( na.rm = TRUE )
2018-12-22 22:39:34 +01:00
median_days <- difftime ( mediandate , mindate , units = " auto" ) %>% as.double ( )
2018-06-20 14:47:37 +02:00
2018-08-23 21:27:15 +02:00
if ( formatdates == " %H:%M:%S" ) {
# hms
2018-12-22 22:39:34 +01:00
header_txt <- header_txt %>% paste0 ( markdown_line , " \nEarliest: " , mindate %>% format ( formatdates ) %>% trimws ( ) )
header_txt <- header_txt %>% paste0 ( markdown_line , " \nLatest: " , maxdate %>% format ( formatdates ) %>% trimws ( ) ,
" (+" , difftime ( maxdate , mindate , units = " mins" ) %>% as.double ( ) %>% format ( digits = digits , decimal.mark = decimal.mark , big.mark = big.mark ) , " min.)" )
2018-08-23 21:27:15 +02:00
} else {
# other date formats
2018-12-22 22:39:34 +01:00
header_txt <- header_txt %>% paste0 ( markdown_line , " \nOldest: " , mindate %>% format ( formatdates ) %>% trimws ( ) )
header_txt <- header_txt %>% paste0 ( markdown_line , " \nNewest: " , maxdate %>% format ( formatdates ) %>% trimws ( ) ,
" (+" , difftime ( maxdate , mindate , units = " auto" ) %>% as.double ( ) %>% format ( digits = digits , decimal.mark = decimal.mark , big.mark = big.mark ) , " )" )
2018-08-23 21:27:15 +02:00
}
2018-12-22 22:39:34 +01:00
header_txt <- header_txt %>% paste0 ( markdown_line , " \nMedian: " , mediandate %>% format ( formatdates ) %>% trimws ( ) ,
" (~" , percent ( median_days / maxdate_days , round = 0 , decimal.mark = decimal.mark ) , " )" )
2018-04-18 12:24:54 +02:00
}
2018-12-22 22:39:34 +01:00
if ( any ( class ( x ) == " POSIXlt" ) ) {
2018-04-18 12:24:54 +02:00
x <- x %>% format ( formatdates )
}
2018-05-09 11:44:46 +02:00
nmax.set <- ! missing ( nmax )
2018-07-01 21:40:37 +02:00
if ( ! nmax.set & is.null ( nmax ) & is.null ( base :: getOption ( " max.print.freq" , default = NULL ) ) ) {
2018-05-09 11:44:46 +02:00
# default for max print setting
nmax <- 15
2018-07-01 21:40:37 +02:00
} else if ( is.null ( nmax ) ) {
nmax <- length ( x )
2018-05-09 11:44:46 +02:00
}
2018-07-03 11:30:40 +02:00
if ( nmax %in% c ( 0 , Inf , NA , NULL ) ) {
2018-04-18 12:24:54 +02:00
nmax <- length ( x )
}
2018-12-22 22:39:34 +01:00
column_names <- c ( " Item" , " Count" , " Percent" , " Cum. Count" , " Cum. Percent" )
column_names_df <- c ( " item" , " count" , " percent" , " cum_count" , " cum_percent" )
column_align <- c ( x_align , " r" , " r" , " r" , " r" )
2018-10-23 09:42:26 +02:00
2018-11-06 16:41:59 +01:00
if ( is.null ( df ) ) {
# create table with counts and percentages
df <- tibble ( item = x ) %>%
group_by ( item ) %>%
summarise ( count = n ( ) )
2018-10-23 09:42:26 +02:00
2018-11-06 16:41:59 +01:00
# sort according to setting
if ( sort.count == TRUE ) {
df <- df %>% arrange ( desc ( count ) , item )
} else {
df <- df %>% arrange ( item )
}
} else {
column_names <- c ( " Group" , column_names )
column_names_df <- c ( " group" , column_names_df )
column_align <- c ( " l" , column_align )
}
2018-04-18 12:24:54 +02:00
2018-12-22 22:39:34 +01:00
if ( df $ item %>% paste ( collapse = " ," ) %like% " \033" ) {
2018-08-24 14:18:38 +02:00
# remove escape char
# see https://en.wikipedia.org/wiki/Escape_character#ASCII_escape_character
2018-12-22 22:39:34 +01:00
df <- df %>% mutate ( item = item %>% gsub ( " \033" , " " , ., fixed = TRUE ) )
2018-04-18 12:24:54 +02:00
}
2018-09-10 15:45:25 +02:00
if ( quote == TRUE ) {
df $ item <- paste0 ( ' "' , df $ item , ' "' )
2018-11-06 16:41:59 +01:00
if ( length ( x.group ) != 0 ) {
df $ group <- paste0 ( ' "' , df $ group , ' "' )
}
2018-09-10 15:45:25 +02:00
}
2018-07-01 21:40:37 +02:00
df <- as.data.frame ( df , stringsAsFactors = FALSE )
df $ percent <- df $ count / base :: sum ( df $ count , na.rm = TRUE )
2018-11-06 16:41:59 +01:00
if ( length ( x.group ) == 0 ) {
df $ cum_count <- base :: cumsum ( df $ count )
}
2018-07-01 21:40:37 +02:00
df $ cum_percent <- df $ cum_count / base :: sum ( df $ count , na.rm = TRUE )
2018-11-06 16:41:59 +01:00
if ( length ( x.group ) != 0 ) {
# sort columns
df <- df [ , column_names_df ]
}
2018-04-18 12:24:54 +02:00
2018-07-01 21:40:37 +02:00
if ( markdown == TRUE ) {
2018-12-22 22:39:34 +01:00
tbl_format <- " markdown"
2018-05-09 11:44:46 +02:00
} else {
2018-12-22 22:39:34 +01:00
tbl_format <- " pandoc"
2018-04-18 12:24:54 +02:00
}
2018-05-09 11:44:46 +02:00
2018-10-31 12:10:49 +01:00
if ( ! is.null ( title ) ) {
2018-11-06 16:41:59 +01:00
title <- trimws ( gsub ( " ^Frequency table of" , " " , title [1L ] , ignore.case = TRUE ) )
2018-10-31 12:10:49 +01:00
}
2018-10-23 09:42:26 +02:00
structure ( .Data = df ,
2018-12-22 22:39:34 +01:00
class = c ( " frequency_tbl" , class ( df ) ) ,
2018-11-06 16:41:59 +01:00
opt = list ( title = title ,
data = x.name ,
2018-10-23 09:42:26 +02:00
vars = cols ,
2018-11-06 16:41:59 +01:00
group_var = x.group ,
2018-10-23 09:42:26 +02:00
header = header ,
header_txt = header_txt ,
row_names = row.names ,
column_names = column_names ,
column_align = column_align ,
2018-12-10 10:13:40 +01:00
decimal.mark = decimal.mark ,
big.mark = big.mark ,
2018-10-23 09:42:26 +02:00
tbl_format = tbl_format ,
na = na ,
nmax = nmax ,
nmax.set = nmax.set ) )
2018-04-18 12:24:54 +02:00
}
#' @rdname freq
#' @export
2018-07-01 21:40:37 +02:00
freq <- frequency_tbl
2018-06-20 14:47:37 +02:00
#' @rdname freq
#' @export
#' @importFrom dplyr top_n pull
top_freq <- function ( f , n ) {
2018-12-22 22:39:34 +01:00
if ( ! " frequency_tbl" %in% class ( f ) ) {
stop ( " top_freq can only be applied to frequency tables" , call. = FALSE )
2018-06-20 14:47:37 +02:00
}
if ( ! is.numeric ( n ) | length ( n ) != 1L ) {
2018-12-22 22:39:34 +01:00
stop ( " For top_freq, `nmax` must be a number of length 1" , call. = FALSE )
2018-06-20 14:47:37 +02:00
}
top <- f %>% top_n ( n , count )
vect <- top %>% pull ( item )
names ( vect ) <- top %>% pull ( count )
if ( length ( vect ) > abs ( n ) ) {
message ( " top_freq: selecting " , length ( vect ) , " items instead of " , abs ( n ) , " , because of ties" )
}
vect
}
2018-10-01 11:39:43 +02:00
#' @noRd
2018-09-29 21:54:32 +02:00
#' @exportMethod diff.frequency_tbl
#' @importFrom dplyr %>% full_join mutate
#' @export
diff.frequency_tbl <- function ( x , y , ... ) {
# check classes
if ( ! " frequency_tbl" %in% class ( x )
| ! " frequency_tbl" %in% class ( y ) ) {
stop ( " Both x and y must be a frequency table." )
}
2018-10-01 14:44:40 +02:00
cat ( " Differences between frequency tables" )
if ( identical ( x , y ) ) {
cat ( " \n\nNo differences found.\n" )
return ( invisible ( ) )
}
2018-09-29 21:54:32 +02:00
x.attr <- attributes ( x ) $ opt
# only keep item and count
x <- x [ , 1 : 2 ]
y <- y [ , 1 : 2 ]
x <- x %>%
full_join ( y ,
by = colnames ( x ) [1 ] ,
suffix = c ( " .x" , " .y" ) ) %>%
mutate (
diff = case_when (
is.na ( count.y ) ~ - count.x ,
is.na ( count.x ) ~ count.y ,
TRUE ~ count.y - count.x ) ) %>%
mutate (
diff.percent = percent (
diff / count.x ,
2018-10-01 11:39:43 +02:00
force_zero = TRUE ) ) %>%
2018-12-22 22:39:34 +01:00
mutate ( diff = ifelse ( diff %like% " ^-" ,
2018-10-01 11:39:43 +02:00
diff ,
paste0 ( " +" , diff ) ) ,
2018-12-22 22:39:34 +01:00
diff.percent = ifelse ( diff.percent %like% " ^-" ,
2018-10-01 11:39:43 +02:00
diff.percent ,
paste0 ( " +" , diff.percent ) ) )
2018-09-29 21:54:32 +02:00
print (
knitr :: kable ( x ,
format = x.attr $ tbl_format ,
col.names = c ( " Item" , " Count #1" , " Count #2" , " Difference" , " Diff. percent" ) ,
2018-10-01 14:44:40 +02:00
align = paste0 ( x.attr $ column_align [1 ] , " rrrr" ) ,
2018-09-29 21:54:32 +02:00
padding = 1 )
)
}
2018-07-03 11:30:40 +02:00
#' @rdname freq
2018-07-01 21:40:37 +02:00
#' @exportMethod print.frequency_tbl
#' @importFrom knitr kable
#' @importFrom dplyr n_distinct
2018-10-22 12:32:59 +02:00
#' @importFrom crayon bold silver
2018-07-01 21:40:37 +02:00
#' @export
2018-11-16 21:57:55 +01:00
print.frequency_tbl <- function ( x , nmax = getOption ( " max.print.freq" , default = 15 ) ,
2018-12-10 10:13:40 +01:00
markdown = ! interactive ( ) , header = ! markdown ,
decimal.mark = getOption ( " OutDec" ) ,
big.mark = ifelse ( decimal.mark != " ," , " ," , " ." ) ,
... ) {
2018-07-01 21:40:37 +02:00
2018-12-22 22:39:34 +01:00
opt <- attr ( x , " opt" )
2018-07-01 21:40:37 +02:00
2018-08-24 11:08:20 +02:00
if ( length ( opt $ vars ) == 0 ) {
opt $ vars <- NULL
}
2018-11-06 16:41:59 +01:00
if ( is.null ( opt $ title ) ) {
if ( ! is.null ( opt $ data ) & ! is.null ( opt $ vars ) ) {
title <- paste0 ( " `" , paste0 ( opt $ vars , collapse = " ` and `" ) , " ` from " , opt $ data )
} else if ( ! is.null ( opt $ data ) & is.null ( opt $ vars ) ) {
title <- opt $ data
} else if ( is.null ( opt $ data ) & ! is.null ( opt $ vars ) ) {
title <- paste0 ( " `" , paste0 ( opt $ vars , collapse = " ` and `" ) , " `" )
} else {
title <- " "
}
if ( title != " " & length ( opt $ group_var ) != 0 ) {
group_var <- paste0 ( " (grouped by `" , opt $ group_var , " `)" )
if ( opt $ tbl_format == " pandoc" ) {
group_var <- silver ( group_var )
}
title <- paste ( title , group_var )
}
2018-11-24 20:25:09 +01:00
title <- trimws ( title )
if ( title == " " ) {
title <- " Frequency table"
} else {
title <- paste ( " Frequency table of" , trimws ( title ) )
}
2018-07-01 21:40:37 +02:00
} else {
2018-11-06 16:41:59 +01:00
title <- opt $ title
2018-07-01 21:40:37 +02:00
}
2018-07-03 11:30:40 +02:00
if ( ! missing ( nmax ) ) {
opt $ nmax <- nmax
opt $ nmax.set <- TRUE
}
2018-12-10 10:13:40 +01:00
if ( ! missing ( decimal.mark ) ) {
opt $ decimal.mark <- decimal.mark
}
if ( ! missing ( big.mark ) ) {
opt $ big.mark <- big.mark
}
2018-08-23 21:27:15 +02:00
dots <- list ( ... )
if ( " markdown" %in% names ( dots ) ) {
if ( dots $ markdown == TRUE ) {
opt $ tbl_format <- " markdown"
} else {
opt $ tbl_format <- " pandoc"
}
}
2018-11-16 21:57:55 +01:00
if ( ! missing ( markdown ) ) {
opt $ tbl_format <- " markdown"
}
if ( ! missing ( header ) ) {
opt $ header <- header
}
2018-07-03 11:30:40 +02:00
2018-10-22 13:06:54 +02:00
# bold title
2018-10-18 12:10:10 +02:00
if ( opt $ tbl_format == " pandoc" ) {
2018-10-22 13:06:54 +02:00
title <- bold ( title )
} else if ( opt $ tbl_format == " markdown" ) {
2018-11-02 10:27:57 +01:00
title <- paste0 ( " \n**" , title , " **" )
2018-10-18 12:10:10 +02:00
}
2018-10-19 21:52:08 +02:00
if ( opt $ header == TRUE ) {
cat ( title , " \n" )
if ( ! is.null ( opt $ header_txt ) ) {
cat ( opt $ header_txt )
}
2018-10-22 12:32:59 +02:00
} else if ( opt $ tbl_format == " markdown" ) {
# do print title as caption in markdown
cat ( " \n" , title , sep = " " )
2018-07-01 21:40:37 +02:00
}
if ( NROW ( x ) == 0 ) {
2018-12-22 22:39:34 +01:00
cat ( " \n\nNo observations.\n" )
2018-07-01 21:40:37 +02:00
return ( invisible ( ) )
}
# save old NA setting for kable
opt.old <- options ( ) $ knitr.kable.NA
2018-10-23 09:42:26 +02:00
if ( is.null ( opt $ na ) ) {
opt $ na <- " <NA>"
}
options ( knitr.kable.NA = opt $ na )
2018-07-01 21:40:37 +02:00
if ( nrow ( x ) > opt $ nmax & opt $ tbl_format != " markdown" ) {
x.rows <- nrow ( x )
2018-12-22 22:39:34 +01:00
x.unprinted <- base :: sum ( x [ ( opt $ nmax + 1 ) : nrow ( x ) , " count" ] , na.rm = TRUE )
2018-07-01 21:40:37 +02:00
x.printed <- base :: sum ( x $ count ) - x.unprinted
2018-07-03 11:30:40 +02:00
if ( opt $ nmax.set == TRUE ) {
nmax <- opt $ nmax
} else {
nmax <- getOption ( " max.print.freq" , default = 15 )
}
x <- x [1 : nmax , ]
2018-07-01 21:40:37 +02:00
if ( opt $ nmax.set == TRUE ) {
2018-12-22 22:39:34 +01:00
footer <- paste ( " [ reached `nmax = " , opt $ nmax , " `" , sep = " " )
2018-07-01 21:40:37 +02:00
} else {
footer <- ' [ reached getOption("max.print.freq")'
}
footer <- paste ( footer ,
2018-12-22 22:39:34 +01:00
" -- omitted " ,
2018-12-10 10:13:40 +01:00
format ( x.rows - opt $ nmax , big.mark = opt $ big.mark ) ,
2018-12-22 22:39:34 +01:00
" entries, n = " ,
2018-12-10 10:13:40 +01:00
format ( x.unprinted , big.mark = opt $ big.mark ) ,
2018-12-22 22:39:34 +01:00
" (" ,
2018-12-10 10:13:40 +01:00
( x.unprinted / ( x.unprinted + x.printed ) ) %>% percent ( force_zero = TRUE , decimal.mark = opt $ decimal.mark ) ,
2018-12-22 22:39:34 +01:00
" ) ]\n" , sep = " " )
2018-10-22 12:32:59 +02:00
if ( opt $ tbl_format == " pandoc" ) {
footer <- silver ( footer ) # only silver in regular printing
}
2018-07-01 21:40:37 +02:00
} else {
footer <- NULL
}
2018-11-19 13:00:22 +01:00
if ( " item" %in% colnames ( x ) ) {
2018-12-22 22:39:34 +01:00
if ( any ( class ( x $ item ) %in% c ( " double" , " integer" , " numeric" , " raw" , " single" ) ) ) {
2018-12-10 10:13:40 +01:00
x $ item <- format ( x $ item , decimal.mark = opt $ decimal.mark , big.mark = opt $ big.mark )
2018-11-19 13:06:07 +01:00
}
2018-11-19 13:00:22 +01:00
} else {
opt $ column_names <- opt $ column_names [ ! opt $ column_names == " Item" ]
}
if ( " count" %in% colnames ( x ) ) {
if ( all ( x $ count == 1 ) ) {
2018-12-22 22:39:34 +01:00
warning ( " All observations are unique." , call. = FALSE )
2018-11-19 13:00:22 +01:00
}
2018-12-10 10:13:40 +01:00
x $ count <- format ( x $ count , decimal.mark = opt $ decimal.mark , big.mark = opt $ big.mark )
2018-11-19 13:00:22 +01:00
} else {
opt $ column_names <- opt $ column_names [ ! opt $ column_names == " Count" ]
}
if ( " percent" %in% colnames ( x ) ) {
2018-12-10 10:13:40 +01:00
x $ percent <- percent ( x $ percent , force_zero = TRUE , decimal.mark = opt $ decimal.mark )
2018-11-19 13:00:22 +01:00
} else {
opt $ column_names <- opt $ column_names [ ! opt $ column_names == " Percent" ]
}
if ( " cum_count" %in% colnames ( x ) ) {
2018-12-10 10:13:40 +01:00
x $ cum_count <- format ( x $ cum_count , decimal.mark = opt $ decimal.mark , big.mark = opt $ big.mark )
2018-11-19 13:00:22 +01:00
} else {
opt $ column_names <- opt $ column_names [ ! opt $ column_names == " Cum. Count" ]
}
if ( " cum_percent" %in% colnames ( x ) ) {
2018-12-10 10:13:40 +01:00
x $ cum_percent <- percent ( x $ cum_percent , force_zero = TRUE , decimal.mark = opt $ decimal.mark )
2018-11-19 13:00:22 +01:00
} else {
opt $ column_names <- opt $ column_names [ ! opt $ column_names == " Cum. Percent" ]
}
2018-07-01 21:40:37 +02:00
2018-10-23 09:42:26 +02:00
if ( opt $ tbl_format == " markdown" ) {
2018-11-02 10:27:57 +01:00
cat ( " \n" )
2018-10-23 09:42:26 +02:00
}
2018-07-01 21:40:37 +02:00
print (
knitr :: kable ( x ,
format = opt $ tbl_format ,
row.names = opt $ row_names ,
col.names = opt $ column_names ,
align = opt $ column_align ,
padding = 1 )
)
if ( ! is.null ( footer ) ) {
cat ( footer )
}
2018-10-23 09:42:26 +02:00
if ( opt $ tbl_format == " markdown" ) {
cat ( " \n\n" )
} else {
2018-12-22 22:39:34 +01:00
cat ( " \n" )
2018-10-23 09:42:26 +02:00
}
2018-07-01 21:40:37 +02:00
# reset old kable setting
options ( knitr.kable.NA = opt.old )
return ( invisible ( ) )
}
2018-06-20 14:47:37 +02:00
2018-07-03 11:30:40 +02:00
#' @noRd
#' @exportMethod as.data.frame.frequency_tbl
#' @export
as.data.frame.frequency_tbl <- function ( x , ... ) {
2018-12-22 22:39:34 +01:00
attr ( x , " package" ) <- NULL
attr ( x , " opt" ) <- NULL
2018-07-03 11:30:40 +02:00
as.data.frame.data.frame ( x , ... )
}
2018-07-08 22:14:55 +02:00
2018-07-09 14:02:58 +02:00
#' @noRd
#' @exportMethod as_tibble.frequency_tbl
#' @export
#' @importFrom dplyr as_tibble
as_tibble.frequency_tbl <- function ( x , validate = TRUE , ... , rownames = NA ) {
2018-12-22 22:39:34 +01:00
attr ( x , " package" ) <- NULL
attr ( x , " opt" ) <- NULL
2018-07-09 14:02:58 +02:00
as_tibble ( x = as.data.frame ( x ) , validate = validate , ... , rownames = rownames )
}
2018-07-08 22:14:55 +02:00
#' @noRd
#' @exportMethod hist.frequency_tbl
#' @export
#' @importFrom graphics hist
2018-12-22 22:39:34 +01:00
hist.frequency_tbl <- function ( x , breaks = " Sturges" , main = NULL , xlab = NULL , ... ) {
opt <- attr ( x , " opt" )
2018-11-01 17:06:08 +01:00
if ( ! class ( x $ item ) %in% c ( " numeric" , " double" , " integer" , " Date" ) ) {
2018-12-22 22:39:34 +01:00
stop ( " `x` must be numeric or Date." , call. = FALSE )
2018-11-01 17:06:08 +01:00
}
2018-07-08 22:14:55 +02:00
if ( ! is.null ( opt $ vars ) ) {
title <- opt $ vars
2018-11-01 17:06:08 +01:00
} else if ( ! is.null ( opt $ data ) ) {
title <- opt $ data
2018-07-08 22:14:55 +02:00
} else {
2018-11-01 17:06:08 +01:00
title <- " frequency table"
}
if ( class ( x $ item ) == " Date" ) {
x <- as.Date ( as.vector ( x ) , origin = " 1970-01-01" )
} else {
x <- as.vector ( x )
}
if ( is.null ( main ) ) {
main <- paste ( " Histogram of" , title )
2018-07-08 22:14:55 +02:00
}
2018-12-22 22:39:34 +01:00
if ( is.null ( xlab ) ) {
xlab <- title
}
hist ( x , main = main , xlab = xlab , breaks = breaks , ... )
2018-07-08 22:14:55 +02:00
}
#' @noRd
#' @exportMethod plot.frequency_tbl
#' @export
plot.frequency_tbl <- function ( x , y , ... ) {
2018-12-22 22:39:34 +01:00
opt <- attr ( x , " opt" )
2018-07-08 22:14:55 +02:00
if ( ! is.null ( opt $ vars ) ) {
title <- opt $ vars
} else {
title <- " "
}
2018-07-09 14:02:58 +02:00
plot ( x = x $ item , y = x $ count , ylab = " Count" , xlab = title , ... )
}
2018-07-08 22:14:55 +02:00
2018-07-09 14:02:58 +02:00
#' @noRd
#' @exportMethod as.vector.frequency_tbl
#' @export
as.vector.frequency_tbl <- function ( x , mode = " any" ) {
as.vector ( rep ( x $ item , x $ count ) , mode = mode )
2018-07-08 22:14:55 +02:00
}
2018-07-16 16:41:48 +02:00
#' @noRd
#' @exportMethod format.frequency_tbl
#' @export
format.frequency_tbl <- function ( x , digits = 1 , ... ) {
2018-12-22 22:39:34 +01:00
opt <- attr ( x , " opt" )
2018-07-16 16:41:48 +02:00
if ( opt $ nmax.set == TRUE ) {
nmax <- opt $ nmax
} else {
nmax <- getOption ( " max.print.freq" , default = 15 )
}
x <- x [1 : nmax , ]
x $ percent <- percent ( x $ percent , round = digits , force_zero = TRUE )
x $ cum_percent <- percent ( x $ cum_percent , round = digits , force_zero = TRUE )
base :: format.data.frame ( x , ... )
}