mirror of
https://github.com/msberends/AMR.git
synced 2025-07-09 01:22:25 +02:00
breaks param, tidyr dep change, freq markdown
This commit is contained in:
23
R/abname.R
23
R/abname.R
@ -133,18 +133,19 @@ abname <- function(abcode,
|
||||
abcode[i] <- abx[which(abx[,from] == abcode[i]),] %>% pull(to) %>% .[1]
|
||||
}
|
||||
|
||||
# when nothing found, try first chars of official name
|
||||
# if (is.na(abcode[i])) {
|
||||
# abcode[i] <- antibiotics %>%
|
||||
# filter(official %like% paste0('^', abcode.bak[i])) %>%
|
||||
# pull(to) %>%
|
||||
# .[1]
|
||||
# next
|
||||
# }
|
||||
|
||||
if (is.na(abcode[i]) | length(abcode[i] == 0)) {
|
||||
abcode[i] <- abcode.bak[i]
|
||||
warning('Code "', abcode.bak[i], '" not found in antibiotics list.', call. = FALSE)
|
||||
# try as.atc
|
||||
try(suppressWarnings(
|
||||
abcode[i] <- as.atc(abcode[i])
|
||||
), silent = TRUE)
|
||||
if (is.na(abcode[i])) {
|
||||
# still not found
|
||||
abcode[i] <- abcode.bak[i]
|
||||
warning('Code "', abcode.bak[i], '" not found in antibiotics list.', call. = FALSE)
|
||||
} else {
|
||||
# fill in the found ATC code
|
||||
abcode[i] <- abname(abcode[i], from = "atc", to = to)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
8
R/atc.R
8
R/atc.R
@ -37,6 +37,7 @@
|
||||
#' as.atc("J01FA01")
|
||||
#' as.atc("Erythromycin")
|
||||
#' as.atc("eryt")
|
||||
#' as.atc(" eryt 123")
|
||||
#' as.atc("ERYT")
|
||||
#' as.atc("ERY")
|
||||
#' as.atc("Erythrocin") # Trade name
|
||||
@ -50,6 +51,10 @@
|
||||
as.atc <- function(x) {
|
||||
|
||||
x.new <- rep(NA_character_, length(x))
|
||||
x <- trimws(x, which = "both")
|
||||
# keep only a-z when it's not an ATC code
|
||||
x[!x %like% "[A-Z][0-9]{2}[A-Z]{2}[0-9]{2}"] <- gsub("[^a-zA-Z]+", "", x[!x %like% "[A-Z][0-9]{2}[A-Z]{2}[0-9]{2}"])
|
||||
|
||||
x.bak <- x
|
||||
x <- unique(x[!is.na(x)])
|
||||
failures <- character(0)
|
||||
@ -64,7 +69,7 @@ as.atc <- function(x) {
|
||||
x.new[is.na(x.new) & x.bak == x[i]] <- found[1L]
|
||||
}
|
||||
|
||||
# try ATC in code form, even if it does not exist in the antibiotics data set YET
|
||||
# try ATC in ATC code form, even if it does not exist in the antibiotics data set YET
|
||||
if (length(found) == 0 & x[i] %like% '[A-Z][0-9][0-9][A-Z][A-Z][0-9][0-9]') {
|
||||
warning("ATC code ", x[i], " is not yet in the `antibiotics` data set.")
|
||||
fail <- FALSE
|
||||
@ -134,7 +139,6 @@ as.atc <- function(x) {
|
||||
call. = FALSE)
|
||||
}
|
||||
class(x.new) <- "atc"
|
||||
attr(x.new, 'package') <- 'AMR'
|
||||
x.new
|
||||
}
|
||||
|
||||
|
111
R/freq.R
111
R/freq.R
@ -23,10 +23,10 @@
|
||||
#' @param ... up to nine different columns of \code{x} when \code{x} is a \code{data.frame} or \code{tibble}, to calculate frequencies from - see Examples
|
||||
#' @param sort.count sort on count, i.e. frequencies. This will be \code{TRUE} at default for everything except for factors.
|
||||
#' @param nmax number of row to print. The default, \code{15}, uses \code{\link{getOption}("max.print.freq")}. Use \code{nmax = 0}, \code{nmax = Inf}, \code{nmax = NULL} or \code{nmax = NA} to print all rows.
|
||||
#' @param na.rm a logical value indicating whether \code{NA} values should be removed from the frequency table. The header_txt will always print the amount of \code{NA}s.
|
||||
#' @param na.rm a logical value indicating whether \code{NA} values should be removed from the frequency table. The header will always print the amount of \code{NA}s.
|
||||
#' @param row.names a logical value indicating whether row indices should be printed as \code{1:nrow(x)}
|
||||
#' @param markdown print table in markdown format (this forces \code{nmax = NA})
|
||||
#' @param digits how many significant digits are to be used for numeric values in the header_txt (not for the items themselves, that depends on \code{\link{getOption}("digits")})
|
||||
#' @param markdown a logical value indicating whether the frequency table should be printed in markdown format. This will print all rows and is default behaviour in non-interactive R sessions (like when knitting RMarkdown files).
|
||||
#' @param digits how many significant digits are to be used for numeric values in the header (not for the items themselves, that depends on \code{\link{getOption}("digits")})
|
||||
#' @param quote a logical value indicating whether or not strings should be printed with surrounding quotes
|
||||
#' @param header a logical value indicating whether an informative header should be printed
|
||||
#' @param sep a character string to separate the terms when selecting multiple columns
|
||||
@ -34,7 +34,7 @@
|
||||
#' @param n number of top \emph{n} items to return, use -n for the bottom \emph{n} items. It will include more than \code{n} rows if there are ties.
|
||||
#' @details Frequency tables (or frequency distributions) are summaries of the distribution of values in a sample. With the `freq` function, you can create univariate frequency tables. Multiple variables will be pasted into one variable, so it forces a univariate distribution. This package also has a vignette available to explain the use of this function further, run \code{browseVignettes("AMR")} to read it.
|
||||
#'
|
||||
#' For numeric values of any class, these additional values will all be calculated with \code{na.rm = TRUE} and shown into the header_txt:
|
||||
#' For numeric values of any class, these additional values will all be calculated with \code{na.rm = TRUE} and shown into the header:
|
||||
#' \itemize{
|
||||
#' \item{Mean, using \code{\link[base]{mean}}}
|
||||
#' \item{Standard Deviation, using \code{\link[stats]{sd}}}
|
||||
@ -46,7 +46,7 @@
|
||||
#' \item{Outliers (total count and unique count), using \code{\link[grDevices]{boxplot.stats}}}
|
||||
#' }
|
||||
#'
|
||||
#' For dates and times of any class, these additional values will be calculated with \code{na.rm = TRUE} and shown into the header_txt:
|
||||
#' For dates and times of any class, these additional values will be calculated with \code{na.rm = TRUE} and shown into the header:
|
||||
#' \itemize{
|
||||
#' \item{Oldest, using \code{\link{min}}}
|
||||
#' \item{Newest, using \code{\link{max}}, with difference between newest and oldest}
|
||||
@ -140,21 +140,13 @@
|
||||
#' # check differences between frequency tables
|
||||
#' diff(freq(septic_patients$trim),
|
||||
#' freq(septic_patients$trsu))
|
||||
#'
|
||||
#' \dontrun{
|
||||
#' # send frequency table to clipboard (e.g. for pasting in Excel)
|
||||
#' septic_patients %>%
|
||||
#' freq(age) %>%
|
||||
#' format() %>% # this will format the percentages
|
||||
#' clipboard_export()
|
||||
#' }
|
||||
frequency_tbl <- function(x,
|
||||
...,
|
||||
sort.count = TRUE,
|
||||
nmax = getOption("max.print.freq"),
|
||||
na.rm = TRUE,
|
||||
row.names = TRUE,
|
||||
markdown = FALSE,
|
||||
markdown = !interactive(),
|
||||
digits = 2,
|
||||
quote = FALSE,
|
||||
header = !markdown,
|
||||
@ -201,17 +193,14 @@ frequency_tbl <- function(x,
|
||||
cols <- NULL
|
||||
}
|
||||
} else if (any(class(x) == 'table')) {
|
||||
if (!"tidyr" %in% rownames(installed.packages())) {
|
||||
stop('transformation from `table` to frequency table requires the tidyr package.', call. = FALSE)
|
||||
}
|
||||
x <- x %>%
|
||||
as.data.frame(stringsAsFactors = FALSE) %>%
|
||||
# paste first two columns
|
||||
tidyr::unite(col = "Pasted", 1:2, sep = sep, remove = TRUE)
|
||||
x <- rep(x %>% pull(Pasted), x %>% pull(Freq))
|
||||
x <- as.data.frame(x, stringsAsFactors = FALSE)
|
||||
# now this DF contains 3 columns: the 2 vars and a Freq column
|
||||
# paste the first 2 cols and repeat them Freq times:
|
||||
x <- rep(x = do.call(paste, c(x[colnames(x)[1:2]], sep = sep)),
|
||||
times = x$Freq)
|
||||
x.name <- "a `table` object"
|
||||
cols <- NULL
|
||||
mult.columns <- 2
|
||||
#mult.columns <- 2
|
||||
} else {
|
||||
x.name <- NULL
|
||||
cols <- NULL
|
||||
@ -221,74 +210,8 @@ frequency_tbl <- function(x,
|
||||
if (ncol(x) == 1 & any(class(x) == 'data.frame')) {
|
||||
x <- x %>% pull(1)
|
||||
} else if (ncol(x) < 10) {
|
||||
|
||||
mult.columns <- ncol(x)
|
||||
|
||||
colnames(x) <- LETTERS[1:ncol(x)]
|
||||
if (ncol(x) == 2) {
|
||||
x$total <- paste(x$A %>% as.character(),
|
||||
x$B %>% as.character(),
|
||||
sep = sep)
|
||||
} else if (ncol(x) == 3) {
|
||||
x$total <- paste(x$A %>% as.character(),
|
||||
x$B %>% as.character(),
|
||||
x$C %>% as.character(),
|
||||
sep = sep)
|
||||
} else if (ncol(x) == 4) {
|
||||
x$total <- paste(x$A %>% as.character(),
|
||||
x$B %>% as.character(),
|
||||
x$C %>% as.character(),
|
||||
x$D %>% as.character(),
|
||||
sep = sep)
|
||||
} else if (ncol(x) == 5) {
|
||||
x$total <- paste(x$A %>% as.character(),
|
||||
x$B %>% as.character(),
|
||||
x$C %>% as.character(),
|
||||
x$D %>% as.character(),
|
||||
x$E %>% as.character(),
|
||||
sep = sep)
|
||||
} else if (ncol(x) == 6) {
|
||||
x$total <- paste(x$A %>% as.character(),
|
||||
x$B %>% as.character(),
|
||||
x$C %>% as.character(),
|
||||
x$D %>% as.character(),
|
||||
x$E %>% as.character(),
|
||||
x$F %>% as.character(),
|
||||
sep = sep)
|
||||
} else if (ncol(x) == 7) {
|
||||
x$total <- paste(x$A %>% as.character(),
|
||||
x$B %>% as.character(),
|
||||
x$C %>% as.character(),
|
||||
x$D %>% as.character(),
|
||||
x$E %>% as.character(),
|
||||
x$F %>% as.character(),
|
||||
x$G %>% as.character(),
|
||||
sep = sep)
|
||||
} else if (ncol(x) == 8) {
|
||||
x$total <- paste(x$A %>% as.character(),
|
||||
x$B %>% as.character(),
|
||||
x$C %>% as.character(),
|
||||
x$D %>% as.character(),
|
||||
x$E %>% as.character(),
|
||||
x$F %>% as.character(),
|
||||
x$G %>% as.character(),
|
||||
x$H %>% as.character(),
|
||||
sep = sep)
|
||||
} else if (ncol(x) == 9) {
|
||||
x$total <- paste(x$A %>% as.character(),
|
||||
x$B %>% as.character(),
|
||||
x$C %>% as.character(),
|
||||
x$D %>% as.character(),
|
||||
x$E %>% as.character(),
|
||||
x$F %>% as.character(),
|
||||
x$G %>% as.character(),
|
||||
x$H %>% as.character(),
|
||||
x$I %>% as.character(),
|
||||
sep = sep)
|
||||
}
|
||||
|
||||
x <- x$total
|
||||
|
||||
x <- do.call(paste, c(x[colnames(x)], sep = sep))
|
||||
} else {
|
||||
stop('A maximum of 9 columns can be analysed at the same time.', call. = FALSE)
|
||||
}
|
||||
@ -585,7 +508,7 @@ diff.frequency_tbl <- function(x, y, ...) {
|
||||
#' @exportMethod print.frequency_tbl
|
||||
#' @importFrom knitr kable
|
||||
#' @importFrom dplyr n_distinct
|
||||
#' @importFrom crayon bold
|
||||
#' @importFrom crayon bold silver
|
||||
#' @export
|
||||
print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default = 15), ...) {
|
||||
|
||||
@ -629,6 +552,9 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
|
||||
if (!is.null(opt$header_txt)) {
|
||||
cat(opt$header_txt)
|
||||
}
|
||||
} else if (opt$tbl_format == "markdown") {
|
||||
# do print title as caption in markdown
|
||||
cat("\n", title, sep = "")
|
||||
}
|
||||
|
||||
if (NROW(x) == 0) {
|
||||
@ -671,6 +597,9 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
|
||||
' (',
|
||||
(x.unprinted / (x.unprinted + x.printed)) %>% percent(force_zero = TRUE),
|
||||
') ]\n', sep = '')
|
||||
if (opt$tbl_format == "pandoc") {
|
||||
footer <- silver(footer) # only silver in regular printing
|
||||
}
|
||||
} else {
|
||||
footer <- NULL
|
||||
}
|
||||
|
@ -23,6 +23,7 @@
|
||||
#' @param position position adjustment of bars, either \code{"fill"} (default when \code{fun} is \code{\link{count_df}}), \code{"stack"} (default when \code{fun} is \code{\link{portion_df}}) or \code{"dodge"}
|
||||
#' @param x variable to show on x axis, either \code{"Antibiotic"} (default) or \code{"Interpretation"} or a grouping variable
|
||||
#' @param fill variable to categorise using the plots legend, either \code{"Antibiotic"} (default) or \code{"Interpretation"} or a grouping variable
|
||||
#' @param breaks numeric vector of positions
|
||||
#' @param facet variable to split plots by, either \code{"Interpretation"} (default) or \code{"Antibiotic"} or a grouping variable
|
||||
#' @param translate_ab a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations into, using \code{\link{abname}}. Default behaviour is to translate to official names according to the WHO. Use \code{translate_ab = FALSE} to disable translation.
|
||||
#' @param fun function to transform \code{data}, either \code{\link{count_df}} (default) or \code{\link{portion_df}}
|
||||
@ -136,6 +137,7 @@ ggplot_rsi <- function(data,
|
||||
fill = "Interpretation",
|
||||
# params = list(),
|
||||
facet = NULL,
|
||||
breaks = seq(0, 1, 0.1),
|
||||
translate_ab = "official",
|
||||
fun = count_df,
|
||||
nrow = NULL,
|
||||
@ -189,7 +191,7 @@ ggplot_rsi <- function(data,
|
||||
if (fun_name == "portion_df"
|
||||
| (fun_name == "count_df" & position == "fill")) {
|
||||
# portions, so use y scale with percentage
|
||||
p <- p + scale_y_percent()
|
||||
p <- p + scale_y_percent(breaks = breaks)
|
||||
}
|
||||
|
||||
if (fun_name == "count_df" & datalabels == TRUE) {
|
||||
@ -281,9 +283,9 @@ facet_rsi <- function(facet = c("Interpretation", "Antibiotic"), nrow = NULL) {
|
||||
|
||||
#' @rdname ggplot_rsi
|
||||
#' @export
|
||||
scale_y_percent <- function() {
|
||||
ggplot2::scale_y_continuous(breaks = seq(0, 1, 0.1),
|
||||
labels = percent(seq(0, 1, 0.1)))
|
||||
scale_y_percent <- function(breaks = seq(0, 1, 0.1)) {
|
||||
ggplot2::scale_y_continuous(breaks = breaks,
|
||||
labels = percent(breaks))
|
||||
}
|
||||
|
||||
#' @rdname ggplot_rsi
|
||||
|
Reference in New Issue
Block a user