AMR/R/disk.R

203 lines
6.3 KiB
R
Raw Normal View History

2019-05-10 16:44:59 +02:00
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# SOURCE #
2020-07-08 14:48:06 +02:00
# https://github.com/msberends/AMR #
2019-05-10 16:44:59 +02:00
# #
# LICENCE #
# (c) 2018-2020 Berends MS, Luz CF et al. #
2019-05-10 16:44:59 +02:00
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
2020-07-08 14:48:06 +02:00
# Visit our website for more info: https://msberends.github.io/AMR. #
2019-05-10 16:44:59 +02:00
# ==================================================================== #
#' Transform input to disk diffusion diameters
2019-05-10 16:44:59 +02:00
#'
#' This transforms a vector to a new class [`disk`], which is a disk diffusion growth zone size (around an antibiotic disk) in millimetres between 6 and 50.
#' @inheritSection lifecycle Stable lifecycle
2019-05-10 16:44:59 +02:00
#' @rdname as.disk
#' @param x vector
#' @param na.rm a logical indicating whether missing values should be removed
#' @details Interpret disk values as RSI values with [as.rsi()]. It supports guidelines from EUCAST and CLSI.
#' @return An [integer] with additional class [`disk`]
2019-11-06 14:43:23 +01:00
#' @aliases disk
2019-05-10 16:44:59 +02:00
#' @export
#' @seealso [as.rsi()]
2019-05-10 16:44:59 +02:00
#' @inheritSection AMR Read more on our website!
#' @examples
2020-05-16 21:40:50 +02:00
#' \dontrun{
2020-02-17 14:38:01 +01:00
#' # transform existing disk zones to the `disk` class
2020-02-17 15:07:25 +01:00
#' library(dplyr)
2020-02-17 14:38:01 +01:00
#' df <- data.frame(microorganism = "E. coli",
#' AMP = 20,
#' CIP = 14,
#' GEN = 18,
#' TOB = 16)
2020-02-17 15:07:25 +01:00
#' df <- df %>% mutate_at(vars(AMP:TOB), as.disk)
2020-02-17 14:38:01 +01:00
#' df
#'
#' # interpret disk values, see ?as.rsi
#' as.rsi(x = as.disk(18),
#' mo = "Strep pneu", # `mo` will be coerced with as.mo()
#' ab = "ampicillin", # and `ab` with as.ab()
2019-05-10 16:44:59 +02:00
#' guideline = "EUCAST")
2020-02-17 14:38:01 +01:00
#'
#' as.rsi(df)
2020-05-16 21:40:50 +02:00
#' }
2019-05-10 16:44:59 +02:00
as.disk <- function(x, na.rm = FALSE) {
2020-05-19 12:08:49 +02:00
if (!is.disk(x)) {
x <- x %pm>% unlist()
2019-05-10 16:44:59 +02:00
if (na.rm == TRUE) {
x <- x[!is.na(x)]
}
x.bak <- x
2020-07-13 09:17:24 +02:00
2019-05-10 16:44:59 +02:00
na_before <- length(x[is.na(x)])
2020-07-13 09:17:24 +02:00
# heavily based on the function from our cleaner package:
clean_double2 <- function(x, remove = "[^0-9.,-]", fixed = FALSE) {
x <- gsub(",", ".", x)
# remove ending dot/comma
x <- gsub("[,.]$", "", x)
# only keep last dot/comma
reverse <- function(x) sapply(lapply(strsplit(x, NULL), rev), paste, collapse = "")
x <- sub("{{dot}}", ".",
gsub(".", "",
reverse(sub(".", "}}tod{{",
reverse(x),
fixed = TRUE)),
fixed = TRUE),
fixed = TRUE)
x_clean <- gsub(remove, "", x, ignore.case = TRUE, fixed = fixed)
# remove everything that is not a number or dot
as.numeric(gsub("[^0-9.]+", "", x_clean))
}
# round up and make it an integer
x <- as.integer(ceiling(clean_double2(x)))
2020-07-13 09:17:24 +02:00
2019-12-21 10:56:06 +01:00
# disks can never be less than 6 mm (size of smallest disk) or more than 50 mm
x[x < 6 | x > 50] <- NA_integer_
2019-05-10 16:44:59 +02:00
na_after <- length(x[is.na(x)])
2020-07-13 09:17:24 +02:00
2019-05-10 16:44:59 +02:00
if (na_before != na_after) {
list_missing <- x.bak[is.na(x) & !is.na(x.bak)] %pm>%
unique() %pm>%
2019-05-10 16:44:59 +02:00
sort()
2019-10-11 17:21:02 +02:00
list_missing <- paste0('"', list_missing, '"', collapse = ", ")
warning(na_after - na_before, " results truncated (",
2019-05-10 16:44:59 +02:00
round(((na_after - na_before) / length(x)) * 100),
2019-10-11 17:21:02 +02:00
"%) that were invalid disk zones: ",
2019-05-10 16:44:59 +02:00
list_missing, call. = FALSE)
}
}
2020-05-19 12:08:49 +02:00
structure(as.integer(x),
class = c("disk", "integer"))
2019-05-10 16:44:59 +02:00
}
2020-02-20 13:19:23 +01:00
all_valid_disks <- function(x) {
x_disk <- suppressWarnings(as.disk(x[!is.na(x)]))
!any(is.na(x_disk)) & !all(is.na(x))
}
2019-05-10 16:44:59 +02:00
#' @rdname as.disk
#' @export
is.disk <- function(x) {
2020-02-10 14:18:15 +01:00
inherits(x, "disk")
2019-05-10 16:44:59 +02:00
}
# will be exported using s3_register() in R/zzz.R
2020-08-26 11:33:54 +02:00
pillar_shaft.disk <- function(x, ...) {
out <- trimws(format(x))
out[is.na(x)] <- font_na(NA)
create_pillar_column(out, align = "right", width = 2)
2020-08-26 11:33:54 +02:00
}
# will be exported using s3_register() in R/zzz.R
2020-08-26 11:33:54 +02:00
type_sum.disk <- function(x, ...) {
"disk"
}
2020-05-28 16:48:55 +02:00
#' @method print disk
2019-05-10 16:44:59 +02:00
#' @export
#' @noRd
print.disk <- function(x, ...) {
2020-05-27 16:37:49 +02:00
cat("Class <disk>\n")
2019-05-10 16:44:59 +02:00
print(as.integer(x), quote = FALSE)
}
2020-05-28 16:48:55 +02:00
#' @method [ disk
2020-03-14 14:05:43 +01:00
#' @export
#' @noRd
"[.disk" <- function(x, ...) {
y <- NextMethod()
attributes(y) <- attributes(x)
y
}
2020-05-28 16:48:55 +02:00
#' @method [[ disk
2020-03-14 14:05:43 +01:00
#' @export
#' @noRd
"[[.disk" <- function(x, ...) {
y <- NextMethod()
attributes(y) <- attributes(x)
y
}
2020-05-28 16:48:55 +02:00
#' @method [<- disk
2020-03-14 14:05:43 +01:00
#' @export
#' @noRd
"[<-.disk" <- function(i, j, ..., value) {
2020-04-13 21:09:56 +02:00
value <- as.disk(value)
2020-03-14 14:05:43 +01:00
y <- NextMethod()
attributes(y) <- attributes(i)
y
}
2020-05-28 16:48:55 +02:00
#' @method [[<- disk
2020-03-14 14:05:43 +01:00
#' @export
#' @noRd
"[[<-.disk" <- function(i, j, ..., value) {
2020-04-13 21:09:56 +02:00
value <- as.disk(value)
2020-03-14 14:05:43 +01:00
y <- NextMethod()
attributes(y) <- attributes(i)
y
}
2020-05-28 16:48:55 +02:00
#' @method c disk
2020-03-14 14:05:43 +01:00
#' @export
#' @noRd
c.disk <- function(x, ...) {
y <- NextMethod()
2020-04-13 21:09:56 +02:00
y <- as.disk(y)
2020-03-14 14:05:43 +01:00
attributes(y) <- attributes(x)
y
}
#' @method unique disk
#' @export
#' @noRd
unique.disk <- function(x, incomparables = FALSE, ...) {
y <- NextMethod()
attributes(y) <- attributes(x)
y
}
2020-09-28 01:08:55 +02:00
# will be exported using s3_register() in R/zzz.R
get_skimmers.disk <- function(column) {
sfl <- import_fn("sfl", "skimr", error_on_fail = FALSE)
inline_hist <- import_fn("inline_hist", "skimr", error_on_fail = FALSE)
sfl(
skim_type = "disk",
2020-09-28 11:00:59 +02:00
min = ~min(as.double(.), na.rm = TRUE),
max = ~max(as.double(.), na.rm = TRUE),
2020-09-28 01:08:55 +02:00
median = ~stats::median(as.double(.), na.rm = TRUE),
2020-09-28 11:00:59 +02:00
n_unique = ~pm_n_distinct(., na.rm = TRUE),
2020-09-28 01:08:55 +02:00
hist = ~inline_hist(stats::na.omit(as.double(.)))
)
}