AMR/R/mic.R

511 lines
13 KiB
R
Raw Normal View History

2018-02-21 11:52:31 +01:00
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Data Analysis for R #
2018-02-21 11:52:31 +01:00
# #
2019-01-02 23:24:07 +01:00
# SOURCE #
2020-07-08 14:48:06 +02:00
# https://github.com/msberends/AMR #
2018-02-21 11:52:31 +01:00
# #
# LICENCE #
2020-12-27 00:30:28 +01:00
# (c) 2018-2021 Berends MS, Luz CF et al. #
2020-10-08 11:16:03 +02:00
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
2018-02-21 11:52:31 +01:00
# #
2019-01-02 23:24:07 +01:00
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
2020-10-08 11:16:03 +02:00
# #
# Visit our website for the full manual and a complete tutorial about #
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
2018-02-21 11:52:31 +01:00
# ==================================================================== #
#' Transform Input to Minimum Inhibitory Concentrations (MIC)
2018-02-21 11:52:31 +01:00
#'
#' This transforms a vector to a new class [`mic`], which is an ordered [factor] with valid minimum inhibitory concentrations (MIC) as levels. Invalid MIC values will be translated as `NA` with a warning.
#' @inheritSection lifecycle Stable Lifecycle
2018-02-21 11:52:31 +01:00
#' @rdname as.mic
#' @param x vector
#' @param na.rm a logical indicating whether missing values should be removed
#' @details To interpret MIC values as RSI values, use [as.rsi()] on MIC values. It supports guidelines from EUCAST and CLSI.
#' @return Ordered [factor] with additional class [`mic`]
#' @aliases mic
2018-02-21 11:52:31 +01:00
#' @export
#' @seealso [as.rsi()]
#' @inheritSection AMR Read more on Our Website!
2018-02-22 20:48:48 +01:00
#' @examples
#' mic_data <- as.mic(c(">=32", "1.0", "1", "1.00", 8, "<=0.128", "8", "16", "16"))
#' is.mic(mic_data)
2018-04-02 16:05:09 +02:00
#'
2018-06-19 10:05:38 +02:00
#' # this can also coerce combined MIC/RSI values:
#' as.mic("<=0.002; S") # will return <=0.002
2021-03-05 15:36:39 +01:00
#'
#' # mathematical processing treats MICs as numeric values
#' fivenum(mic_data)
#' quantile(mic_data)
#' all(mic_data < 512)
2018-06-19 10:05:38 +02:00
#'
2019-05-10 16:44:59 +02:00
#' # interpret MIC values
#' as.rsi(x = as.mic(2),
#' mo = as.mo("S. pneumoniae"),
#' ab = "AMX",
#' guideline = "EUCAST")
#' as.rsi(x = as.mic(4),
#' mo = as.mo("S. pneumoniae"),
#' ab = "AMX",
#' guideline = "EUCAST")
#'
#' # plot MIC values, see ?plot
2018-02-22 20:48:48 +01:00
#' plot(mic_data)
#' plot(mic_data, mo = "E. coli", ab = "cipro")
2018-02-21 11:52:31 +01:00
as.mic <- function(x, na.rm = FALSE) {
meet_criteria(x, allow_class = c("mic", "character", "numeric", "integer"), allow_NA = TRUE)
meet_criteria(na.rm, allow_class = "logical", has_length = 1)
2018-02-21 11:52:31 +01:00
if (is.mic(x)) {
x
} else {
x <- unlist(x)
2018-02-21 11:52:31 +01:00
if (na.rm == TRUE) {
x <- x[!is.na(x)]
}
x.bak <- x
2020-07-13 09:17:24 +02:00
2018-06-19 10:05:38 +02:00
# comma to period
2019-10-11 17:21:02 +02:00
x <- gsub(",", ".", x, fixed = TRUE)
2020-02-20 13:19:23 +01:00
# transform Unicode for >= and <=
x <- gsub("\u2264", "<=", x, fixed = TRUE)
x <- gsub("\u2265", ">=", x, fixed = TRUE)
2018-06-19 10:05:38 +02:00
# remove space between operator and number ("<= 0.002" -> "<=0.002")
2019-10-11 17:21:02 +02:00
x <- gsub("(<|=|>) +", "\\1", x)
# transform => to >= and =< to <=
2019-10-11 17:21:02 +02:00
x <- gsub("=<", "<=", x, fixed = TRUE)
2020-02-20 13:19:23 +01:00
x <- gsub("=>", ">=", x, fixed = TRUE)
2020-07-30 12:37:01 +02:00
# dots without a leading zero must start with 0
x <- gsub("([^0-9]|^)[.]", "\\10.", x)
# values like "<=0.2560.512" should be 0.512
2019-10-11 17:21:02 +02:00
x <- gsub(".*[.].*[.]", "0.", x)
2018-02-21 11:52:31 +01:00
# remove ending .0
2019-10-11 17:21:02 +02:00
x <- gsub("[.]+0$", "", x)
2018-02-21 11:52:31 +01:00
# remove all after last digit
2019-10-11 17:21:02 +02:00
x <- gsub("[^0-9]+$", "", x)
# keep only one zero before dot
x <- gsub("0+[.]", "0.", x)
# starting 00 is probably 0.0 if there's no dot yet
x[!x %like% "[.]"] <- gsub("^00", "0.0", x[!x %like% "[.]"])
2018-02-21 11:52:31 +01:00
# remove last zeroes
2019-10-11 17:21:02 +02:00
x <- gsub("([.].?)0+$", "\\1", x)
x <- gsub("(.*[.])0+$", "\\10", x)
2018-12-29 22:24:19 +01:00
# remove ending .0 again
2019-10-11 17:21:02 +02:00
x[x %like% "[.]"] <- gsub("0+$", "", x[x %like% "[.]"])
# never end with dot
x <- gsub("[.]$", "", x)
2018-06-19 10:05:38 +02:00
# force to be character
x <- as.character(x)
# trim it
x <- trimws(x)
2019-05-10 16:44:59 +02:00
## previously unempty values now empty - should return a warning later on
x[x.bak != "" & x == ""] <- "invalid"
2020-07-13 09:17:24 +02:00
2019-05-10 16:44:59 +02:00
# these are allowed MIC values and will become factor levels
ops <- c("<", "<=", "", ">=", ">")
lvls <- c(c(t(vapply(FUN.VALUE = character(9), ops, function(x) paste0(x, "0.00", 1:9)))),
unique(c(t(vapply(FUN.VALUE = character(104), ops, function(x) paste0(x, sort(as.double(paste0("0.0",
sort(c(1:99, 125, 128, 256, 512, 625)))))))))),
unique(c(t(vapply(FUN.VALUE = character(103), ops, function(x) paste0(x, sort(as.double(paste0("0.",
c(1:99, 125, 128, 256, 512))))))))),
c(t(vapply(FUN.VALUE = character(10), ops, function(x) paste0(x, sort(c(1:9, 1.5)))))),
c(t(vapply(FUN.VALUE = character(45), ops, function(x) paste0(x, c(10:98)[9:98 %% 2 == TRUE])))),
c(t(vapply(FUN.VALUE = character(15), ops, function(x) paste0(x, sort(c(2 ^ c(7:10), 80 * c(2:12))))))))
2020-07-13 09:17:24 +02:00
na_before <- x[is.na(x) | x == ""] %pm>% length()
2018-02-21 11:52:31 +01:00
x[!x %in% lvls] <- NA
na_after <- x[is.na(x) | x == ""] %pm>% length()
2020-07-13 09:17:24 +02:00
2018-02-21 11:52:31 +01:00
if (na_before != na_after) {
list_missing <- x.bak[is.na(x) & !is.na(x.bak) & x.bak != ""] %pm>%
unique() %pm>%
sort() %pm>%
vector_and(quotes = TRUE)
2020-11-10 16:35:56 +01:00
warning_(na_after - na_before, " results truncated (",
round(((na_after - na_before) / length(x)) * 100),
"%) that were invalid MICs: ",
list_missing, call = FALSE)
2018-02-21 11:52:31 +01:00
}
2020-07-13 09:17:24 +02:00
set_clean_class(factor(x, levels = lvls, ordered = TRUE),
new_class = c("mic", "ordered", "factor"))
2018-02-21 11:52:31 +01:00
}
}
2020-02-20 13:19:23 +01:00
all_valid_mics <- function(x) {
if (!inherits(x, c("mic", "character", "factor", "numeric", "integer"))) {
return(FALSE)
}
2020-06-26 10:21:22 +02:00
x_mic <- tryCatch(suppressWarnings(as.mic(x[!is.na(x)])),
error = function(e) NA)
2020-12-17 16:22:25 +01:00
!any(is.na(x_mic)) && !all(is.na(x))
2020-02-20 13:19:23 +01:00
}
2018-02-21 11:52:31 +01:00
#' @rdname as.mic
#' @export
is.mic <- function(x) {
2020-02-10 14:18:15 +01:00
inherits(x, "mic")
2018-02-21 11:52:31 +01:00
}
2020-05-28 16:48:55 +02:00
#' @method as.double mic
2018-02-21 11:52:31 +01:00
#' @export
#' @noRd
as.double.mic <- function(x, ...) {
2020-12-09 09:40:50 +01:00
as.double(gsub("[<=>]+", "", as.character(x)))
2018-02-21 11:52:31 +01:00
}
2020-05-28 16:48:55 +02:00
#' @method as.integer mic
2018-02-21 11:52:31 +01:00
#' @export
#' @noRd
as.integer.mic <- function(x, ...) {
2020-12-09 09:40:50 +01:00
as.integer(gsub("[<=>]+", "", as.character(x)))
2018-02-21 11:52:31 +01:00
}
2020-05-28 16:48:55 +02:00
#' @method as.numeric mic
2018-02-21 11:52:31 +01:00
#' @export
#' @noRd
as.numeric.mic <- function(x, ...) {
2020-12-09 09:40:50 +01:00
as.numeric(gsub("[<=>]+", "", as.character(x)))
2018-02-21 11:52:31 +01:00
}
2020-05-28 16:48:55 +02:00
#' @method droplevels mic
2018-12-29 22:24:19 +01:00
#' @export
#' @noRd
droplevels.mic <- function(x, exclude = if (any(is.na(levels(x)))) NULL else NA, as.mic = TRUE, ...) {
2018-12-29 22:24:19 +01:00
x <- droplevels.factor(x, exclude = exclude, ...)
if (as.mic == TRUE) {
class(x) <- c("mic", "ordered", "factor")
}
2018-12-29 22:24:19 +01:00
x
}
# will be exported using s3_register() in R/zzz.R
2020-08-26 11:33:54 +02:00
pillar_shaft.mic <- function(x, ...) {
2020-12-09 09:40:50 +01:00
crude_numbers <- as.double(x)
operators <- gsub("[^<=>]+", "", as.character(x))
pasted <- trimws(paste0(operators, trimws(format(crude_numbers))))
out <- pasted
out[is.na(x)] <- font_na(NA)
2020-12-09 09:40:50 +01:00
out <- gsub("(<|=|>)", font_silver("\\1"), out)
create_pillar_column(out, align = "right", width = max(nchar(pasted)))
2020-08-26 11:33:54 +02:00
}
# will be exported using s3_register() in R/zzz.R
2020-08-26 11:33:54 +02:00
type_sum.mic <- function(x, ...) {
"mic"
}
2020-05-28 16:48:55 +02:00
#' @method print mic
2018-02-21 11:52:31 +01:00
#' @export
#' @noRd
print.mic <- function(x, ...) {
2020-05-27 16:37:49 +02:00
cat("Class <mic>\n")
2018-08-01 22:37:28 +02:00
print(as.character(x), quote = FALSE)
2018-02-21 11:52:31 +01:00
}
2020-05-28 16:48:55 +02:00
#' @method summary mic
2018-02-21 11:52:31 +01:00
#' @export
#' @noRd
summary.mic <- function(object, ...) {
x <- object
n_total <- length(x)
2018-02-21 11:52:31 +01:00
x <- x[!is.na(x)]
n <- length(x)
value <- c("Class" = "mic",
"<NA>" = n_total - n,
"Min." = as.character(sort(x)[1]),
"Max." = as.character(sort(x)[n]))
class(value) <- c("summaryDefault", "table")
value
2018-02-21 11:52:31 +01:00
}
2020-05-28 16:48:55 +02:00
#' @method [ mic
2020-04-13 21:09:56 +02:00
#' @export
#' @noRd
"[.mic" <- function(x, ...) {
y <- NextMethod()
attributes(y) <- attributes(x)
y
}
2020-05-28 16:48:55 +02:00
#' @method [[ mic
2020-04-13 21:09:56 +02:00
#' @export
#' @noRd
"[[.mic" <- function(x, ...) {
y <- NextMethod()
attributes(y) <- attributes(x)
y
}
2020-05-28 16:48:55 +02:00
#' @method [<- mic
2020-04-13 21:09:56 +02:00
#' @export
#' @noRd
"[<-.mic" <- function(i, j, ..., value) {
value <- as.mic(value)
y <- NextMethod()
attributes(y) <- attributes(i)
y
}
2020-05-28 16:48:55 +02:00
#' @method [[<- mic
2020-04-13 21:09:56 +02:00
#' @export
#' @noRd
"[[<-.mic" <- function(i, j, ..., value) {
value <- as.mic(value)
y <- NextMethod()
attributes(y) <- attributes(i)
y
}
2020-05-28 16:48:55 +02:00
#' @method c mic
2020-04-13 21:09:56 +02:00
#' @export
#' @noRd
c.mic <- function(x, ...) {
y <- unlist(lapply(list(...), as.character))
x <- as.character(x)
as.mic(c(x, y))
2020-04-13 21:09:56 +02:00
}
#' @method unique mic
#' @export
#' @noRd
unique.mic <- function(x, incomparables = FALSE, ...) {
y <- NextMethod()
attributes(y) <- attributes(x)
y
}
2020-09-28 01:08:55 +02:00
2021-03-05 15:36:39 +01:00
#' @method range mic
#' @export
#' @noRd
range.mic <- function(..., na.rm = FALSE) {
rng <- sort(c(...))
if (na.rm == TRUE) {
rng <- rng[!is.na(rng)]
}
out <- c(as.character(rng[1]), as.character(rng[length(rng)]))
as.double(as.mic(out))
}
#' @method min mic
#' @export
#' @noRd
min.mic <- function(..., na.rm = FALSE) {
rng <- sort(c(...))
if (na.rm == TRUE) {
rng <- rng[!is.na(rng)]
}
as.double(as.mic(as.character(rng[1])))
}
#' @method max mic
#' @export
#' @noRd
max.mic <- function(..., na.rm = FALSE) {
rng <- sort(c(...))
if (na.rm == TRUE) {
rng <- rng[!is.na(rng)]
}
as.double(as.mic(as.character(rng[length(rng)])))
}
#' @method sum mic
#' @export
#' @noRd
sum.mic <- function(..., na.rm = FALSE) {
rng <- sort(c(...))
if (na.rm == TRUE) {
rng <- rng[!is.na(rng)]
}
sum(as.double(rng))
}
#' @method all mic
#' @export
#' @noRd
all.mic <- function(..., na.rm = FALSE) {
rng <- sort(c(...))
if (na.rm == TRUE) {
rng <- rng[!is.na(rng)]
}
all(as.double(rng))
}
#' @method any mic
#' @export
#' @noRd
any.mic <- function(..., na.rm = FALSE) {
rng <- sort(c(...))
if (na.rm == TRUE) {
rng <- rng[!is.na(rng)]
}
any(as.double(rng))
}
#' @method mean mic
#' @export
#' @noRd
mean.mic <- function(x, na.rm = FALSE, ...) {
mean(as.double(x), na.rm = na.rm, ...)
}
#' @method median mic
#' @export
#' @noRd
median.mic <- function(x, na.rm = FALSE, ...) {
median(as.double(x), na.rm = na.rm, ...)
}
#' @method quantile mic
#' @export
#' @noRd
quantile.mic <- function(x, probs = seq(0, 1, 0.25), na.rm = FALSE,
names = TRUE, type = 7, ...) {
quantile(as.double(x), props = props, na.rm = na.rm, names = names, type = type, ...)
}
#' @method floor mic
#' @export
#' @noRd
floor.mic <- function(x) {
floor(as.double(x))
}
#' @method ceiling mic
#' @export
#' @noRd
ceiling.mic <- function(x) {
ceiling(as.double(x))
}
#' @method + mic
#' @export
#' @noRd
`+.mic` <- function(e1, e2) {
as.double(e1) + as.double(e2)
}
#' @method - mic
#' @export
#' @noRd
`-.mic` <- function(e1, e2) {
as.double(e1) - as.double(e2)
}
#' @method * mic
#' @export
#' @noRd
`*.mic` <- function(e1, e2) {
as.double(e1) * as.double(e2)
}
#' @method / mic
#' @export
#' @noRd
`/.mic` <- function(e1, e2) {
as.double(e1) / as.double(e2)
}
#' @method ^ mic
#' @export
#' @noRd
`^.mic` <- function(e1, e2) {
as.double(e1) ^ as.double(e2)
}
#' @method %% mic
#' @export
#' @noRd
`%%.mic` <- function(e1, e2) {
as.double(e1) %% as.double(e2)
}
#' @method %/% mic
#' @export
#' @noRd
`%/%.mic` <- function(e1, e2) {
as.double(e1) %/% as.double(e2)
}
#' @method == mic
#' @export
#' @noRd
`==.mic` <- function(e1, e2) {
as.double(e1) == as.double(e2)
}
#' @method != mic
#' @export
#' @noRd
`!=.mic` <- function(e1, e2) {
as.double(e1) != as.double(e2)
}
#' @method < mic
#' @export
#' @noRd
`<.mic` <- function(e1, e2) {
as.double(e1) < as.double(e2)
}
#' @method <= mic
#' @export
#' @noRd
`<=.mic` <- function(e1, e2) {
as.double(e1) <= as.double(e2)
}
#' @method >= mic
#' @export
#' @noRd
`>=.mic` <- function(e1, e2) {
as.double(e1) >= as.double(e2)
}
#' @method > mic
#' @export
#' @noRd
`>.mic` <- function(e1, e2) {
as.double(e1) > as.double(e2)
}
#' @method sort mic
#' @export
#' @noRd
sort.mic <- function(x, decreasing = FALSE, ...) {
if (decreasing == TRUE) {
ord <- order(-as.double(x))
} else {
ord <- order(as.double(x))
}
x[ord]
}
#' @method hist mic
#' @export
#' @noRd
hist.mic <- function(x, ...) {
warning_("Use `plot()` or `ggplot()` for plotting MIC values", call = FALSE)
hist(as.double(x), ...)
}
2020-09-28 01:08:55 +02:00
# will be exported using s3_register() in R/zzz.R
get_skimmers.mic <- function(column) {
2020-12-17 16:22:25 +01:00
skimr::sfl(
2020-09-28 01:08:55 +02:00
skim_type = "mic",
2020-10-20 21:00:57 +02:00
min = ~as.character(sort(stats::na.omit(.))[1]),
2020-09-28 01:08:55 +02:00
max = ~as.character(sort(stats::na.omit(.))[length(stats::na.omit(.))]),
median = ~as.character(stats::na.omit(.)[as.double(stats::na.omit(.)) == median(as.double(stats::na.omit(.)))])[1],
2020-09-28 11:00:59 +02:00
n_unique = ~pm_n_distinct(., na.rm = TRUE),
2020-12-17 16:22:25 +01:00
hist_log2 = ~skimr::inline_hist(log2(as.double(stats::na.omit(.))))
2020-09-28 01:08:55 +02:00
)
}