AMR/R/availability.R

103 lines
4.5 KiB
R
Raw Normal View History

2019-02-04 12:24:07 +01:00
# ==================================================================== #
# TITLE #
2022-10-05 09:12:22 +02:00
# AMR: An R Package for Working with Antimicrobial Resistance Data #
2019-02-04 12:24:07 +01:00
# #
# SOURCE #
2020-07-08 14:48:06 +02:00
# https://github.com/msberends/AMR #
2019-02-04 12:24:07 +01:00
# #
2022-10-05 09:12:22 +02:00
# CITE AS #
# Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C #
# (2022). AMR: An R Package for Working with Antimicrobial Resistance #
# Data. Journal of Statistical Software, 104(3), 1-31. #
# doi:10.18637/jss.v104.i03 #
# #
2022-12-27 15:16:15 +01:00
# Developed at the University of Groningen and the University Medical #
# Center Groningen in The Netherlands, in collaboration with many #
# colleagues from around the world, see our website. #
2019-02-04 12:24:07 +01:00
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
2020-10-08 11:16:03 +02:00
# #
# Visit our website for the full manual and a complete tutorial about #
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
2019-02-04 12:24:07 +01:00
# ==================================================================== #
#' Check Availability of Columns
2019-02-04 12:24:07 +01:00
#'
#' Easy check for data availability of all columns in a data set. This makes it easy to get an idea of which antimicrobial combinations can be used for calculation with e.g. [susceptibility()] and [resistance()].
#' @param tbl a [data.frame] or [list]
#' @param width number of characters to present the visual availability - the default is filling the width of the console
#' @details The function returns a [data.frame] with columns `"resistant"` and `"visual_resistance"`. The values in that columns are calculated with [resistance()].
#' @return [data.frame] with column names of `tbl` as row names
2019-02-04 12:24:07 +01:00
#' @export
#' @examples
#' availability(example_isolates)
2021-05-24 09:00:11 +02:00
#' \donttest{
#' if (require("dplyr")) {
#' example_isolates %>%
2022-08-27 20:49:37 +02:00
#' filter(mo == as.mo("Escherichia coli")) %>%
2023-01-21 23:47:20 +01:00
#' select_if(is.sir) %>%
#' availability()
2020-05-16 21:40:50 +02:00
#' }
2021-05-24 09:00:11 +02:00
#' }
2019-03-26 15:34:04 +01:00
availability <- function(tbl, width = NULL) {
meet_criteria(tbl, allow_class = "data.frame")
meet_criteria(width, allow_class = c("numeric", "integer"), has_length = 1, allow_NULL = TRUE, is_positive = TRUE, is_finite = TRUE)
2022-08-28 10:31:50 +02:00
2022-08-27 20:49:37 +02:00
tbl <- as.data.frame(tbl, stringsAsFactors = FALSE)
2022-08-28 10:31:50 +02:00
x <- vapply(FUN.VALUE = double(1), tbl, function(x) {
2022-08-28 10:31:50 +02:00
1 - sum(is.na(x)) / length(x)
2019-10-11 17:21:02 +02:00
})
n <- vapply(FUN.VALUE = double(1), tbl, function(x) length(x[!is.na(x)]))
2023-01-21 23:47:20 +01:00
R <- vapply(FUN.VALUE = double(1), tbl, function(x) ifelse(is.sir(x), resistance(x, minimum = 0), NA_real_))
2019-08-25 22:53:22 +02:00
R_print <- character(length(R))
R_print[!is.na(R)] <- percentage(R[!is.na(R)])
2019-08-25 22:53:22 +02:00
R_print[is.na(R)] <- ""
2022-08-28 10:31:50 +02:00
2019-03-26 15:34:04 +01:00
if (is.null(width)) {
2022-12-17 14:31:33 +01:00
width <- getOption("width", 100) -
2019-03-26 15:34:04 +01:00
(max(nchar(colnames(tbl))) +
2022-08-28 10:31:50 +02:00
# count col
8 +
# available % column
10 +
# resistant % column
10 +
# extra margin
5)
2019-03-26 15:34:04 +01:00
width <- width / 2
}
2022-08-28 10:31:50 +02:00
2019-08-25 22:53:22 +02:00
if (length(R[is.na(R)]) == ncol(tbl)) {
2019-03-26 15:34:04 +01:00
width <- width * 2 + 10
}
2022-08-28 10:31:50 +02:00
2019-08-25 22:53:22 +02:00
x_chars_R <- strrep("#", round(width * R, digits = 2))
x_chars_SI <- strrep("-", width - nchar(x_chars_R))
vis_resistance <- paste0("|", x_chars_R, x_chars_SI, "|")
vis_resistance[is.na(R)] <- ""
2022-08-28 10:31:50 +02:00
2019-03-26 15:34:04 +01:00
x_chars <- strrep("#", round(x, digits = 2) / (1 / width))
x_chars_empty <- strrep("-", width - nchar(x_chars))
2022-08-28 10:31:50 +02:00
df <- data.frame(
count = n,
available = percentage(x),
visual_availabilty = paste0("|", x_chars, x_chars_empty, "|"),
resistant = R_print,
visual_resistance = vis_resistance,
stringsAsFactors = FALSE
)
2019-08-25 22:53:22 +02:00
if (length(R[is.na(R)]) == ncol(tbl)) {
2022-08-27 20:49:37 +02:00
df[, 1:3, drop = FALSE]
2019-03-26 15:34:04 +01:00
} else {
df
}
2019-02-04 12:24:07 +01:00
}