mirror of
https://github.com/msberends/AMR.git
synced 2025-07-08 08:32:04 +02:00
con WHONET, filter ab class
This commit is contained in:
268
R/filter_ab_class.R
Normal file
268
R/filter_ab_class.R
Normal file
@ -0,0 +1,268 @@
|
||||
# ==================================================================== #
|
||||
# TITLE #
|
||||
# Antimicrobial Resistance (AMR) Analysis #
|
||||
# #
|
||||
# SOURCE #
|
||||
# https://gitlab.com/msberends/AMR #
|
||||
# #
|
||||
# LICENCE #
|
||||
# (c) 2019 Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) #
|
||||
# #
|
||||
# This R package is free software; you can freely use and distribute #
|
||||
# it for both personal and commercial purposes under the terms of the #
|
||||
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
|
||||
# the Free Software Foundation. #
|
||||
# #
|
||||
# This R package was created for academic research and was publicly #
|
||||
# released in the hope that it will be useful, but it comes WITHOUT #
|
||||
# ANY WARRANTY OR LIABILITY. #
|
||||
# Visit our website for more info: https://msberends.gitab.io/AMR. #
|
||||
# ==================================================================== #
|
||||
|
||||
#' Filter on antibiotic class
|
||||
#'
|
||||
#' Filter on specific antibiotic variables based on their class (ATC groups).
|
||||
#' @param tbl a data set
|
||||
#' @param ab_class an antimicrobial class, like \code{"carbapenems"}
|
||||
#' @param result an antibiotic result: S, I or R (or a combination of more of them)
|
||||
#' @param scope the scope to check which variables to check, can be \code{"any"} (default) or \code{"all"}
|
||||
#' @param ... parameters passed on to \code{\link[dplyr]{filter_at}}
|
||||
#' @details The \code{\code{antibiotics}} data set will be searched for \code{ab_class} in the columns \code{atc_group1} and \code{atc_group2} (case-insensitive). Next, \code{tbl} will be checked for column names with a value in any abbreviations, codes or official names found in the \code{antibiotics} data set.
|
||||
#' @rdname filter_ab_class
|
||||
#' @importFrom dplyr filter_at %>% select vars any_vars all_vars
|
||||
#' @importFrom crayon bold blue
|
||||
#' @export
|
||||
#' @examples
|
||||
#' library(dplyr)
|
||||
#'
|
||||
#' # filter on isolates that have any result for any aminoglycoside
|
||||
#' septic_patients %>% filter_aminoglycosides()
|
||||
#'
|
||||
#' # this is essentially the same as:
|
||||
#' septic_patients %>%
|
||||
#' filter_at(.vars = vars(c("gent", "tobr", "amik", "kana")),
|
||||
#' .vars_predicate = any_vars(. %in% c("S", "I", "R")))
|
||||
#'
|
||||
#'
|
||||
#' # filter on isolates that show resistance to ANY aminoglycoside
|
||||
#' septic_patients %>% filter_aminoglycosides("R")
|
||||
#'
|
||||
#' # filter on isolates that show resistance to ALL aminoglycosides
|
||||
#' septic_patients %>% filter_aminoglycosides("R", "all")
|
||||
#'
|
||||
#' # filter on isolates that show resistance to
|
||||
#' # any aminoglycoside and any fluoroquinolone
|
||||
#' septic_patients %>%
|
||||
#' filter_aminoglycosides("R", "any") %>%
|
||||
#' filter_fluoroquinolones("R", "any")
|
||||
filter_ab_class <- function(tbl,
|
||||
ab_class,
|
||||
result = NULL,
|
||||
scope = "any",
|
||||
...) {
|
||||
scope <- scope[1L]
|
||||
if (is.null(result)) {
|
||||
result <- c("S", "I", "R")
|
||||
}
|
||||
|
||||
if (!all(result %in% c("S", "I", "R"))) {
|
||||
stop("`result` must be one or more of: S, I, R", call. = FALSE)
|
||||
}
|
||||
if (!all(scope %in% c("any", "all"))) {
|
||||
stop("`scope` must be one of: any, all", call. = FALSE)
|
||||
}
|
||||
|
||||
vars_df <- colnames(tbl)[tolower(colnames(tbl)) %in% tolower(ab_class_vars(ab_class))]
|
||||
atc_groups <- ab_class_atcgroups(ab_class)
|
||||
|
||||
if (length(vars_df) > 0) {
|
||||
if (length(result) == 1) {
|
||||
operator <- " is "
|
||||
} else {
|
||||
operator <- " is one of "
|
||||
}
|
||||
if (scope == "any") {
|
||||
scope_txt <- " or "
|
||||
scope_fn <- any_vars
|
||||
} else {
|
||||
scope_txt <- " and "
|
||||
scope_fn <- all_vars
|
||||
}
|
||||
message(blue(paste0("Filtering on ", atc_groups, ": ", scope, " of ",
|
||||
paste(bold(vars_df), collapse = scope_txt), operator, toString(result))))
|
||||
tbl %>%
|
||||
filter_at(.vars = vars(vars_df),
|
||||
.vars_predicate = scope_fn(. %in% result),
|
||||
...)
|
||||
} else {
|
||||
warning(paste0("no antibiotics of class ", atc_groups, " found, leaving data unchanged"), call. = FALSE)
|
||||
tbl
|
||||
}
|
||||
}
|
||||
|
||||
#' @rdname filter_ab_class
|
||||
#' @export
|
||||
filter_aminoglycosides <- function(tbl,
|
||||
result = NULL,
|
||||
scope = "any",
|
||||
...) {
|
||||
filter_ab_class(tbl = tbl,
|
||||
ab_class = "aminoglycoside",
|
||||
result = result,
|
||||
scope = scope,
|
||||
...)
|
||||
}
|
||||
|
||||
#' @rdname filter_ab_class
|
||||
#' @export
|
||||
filter_carbapenems <- function(tbl,
|
||||
result = NULL,
|
||||
scope = "any",
|
||||
...) {
|
||||
filter_ab_class(tbl = tbl,
|
||||
ab_class = "carbapenem",
|
||||
result = result,
|
||||
scope = scope,
|
||||
...)
|
||||
}
|
||||
|
||||
#' @rdname filter_ab_class
|
||||
#' @export
|
||||
filter_cephalosporins <- function(tbl,
|
||||
result = NULL,
|
||||
scope = "any",
|
||||
...) {
|
||||
filter_ab_class(tbl = tbl,
|
||||
ab_class = "cephalosporin",
|
||||
result = result,
|
||||
scope = scope,
|
||||
...)
|
||||
}
|
||||
|
||||
#' @rdname filter_ab_class
|
||||
#' @export
|
||||
filter_1st_cephalosporins <- function(tbl,
|
||||
result = NULL,
|
||||
scope = "any",
|
||||
...) {
|
||||
filter_ab_class(tbl = tbl,
|
||||
ab_class = "first-generation cephalosporin",
|
||||
result = result,
|
||||
scope = scope,
|
||||
...)
|
||||
}
|
||||
|
||||
#' @rdname filter_ab_class
|
||||
#' @export
|
||||
filter_2nd_cephalosporins <- function(tbl,
|
||||
result = NULL,
|
||||
scope = "any",
|
||||
...) {
|
||||
filter_ab_class(tbl = tbl,
|
||||
ab_class = "second-generation cephalosporin",
|
||||
result = result,
|
||||
scope = scope,
|
||||
...)
|
||||
}
|
||||
|
||||
#' @rdname filter_ab_class
|
||||
#' @export
|
||||
filter_3rd_cephalosporins <- function(tbl,
|
||||
result = NULL,
|
||||
scope = "any",
|
||||
...) {
|
||||
filter_ab_class(tbl = tbl,
|
||||
ab_class = "third-generation cephalosporin",
|
||||
result = result,
|
||||
scope = scope,
|
||||
...)
|
||||
}
|
||||
|
||||
#' @rdname filter_ab_class
|
||||
#' @export
|
||||
filter_4th_cephalosporins <- function(tbl,
|
||||
result = NULL,
|
||||
scope = "any",
|
||||
...) {
|
||||
filter_ab_class(tbl = tbl,
|
||||
ab_class = "fourth-generation cephalosporin",
|
||||
result = result,
|
||||
scope = scope,
|
||||
...)
|
||||
}
|
||||
|
||||
#' @rdname filter_ab_class
|
||||
#' @export
|
||||
filter_fluoroquinolones <- function(tbl,
|
||||
result = NULL,
|
||||
scope = "any",
|
||||
...) {
|
||||
filter_ab_class(tbl = tbl,
|
||||
ab_class = "fluoroquinolone",
|
||||
result = result,
|
||||
scope = scope,
|
||||
...)
|
||||
}
|
||||
|
||||
#' @rdname filter_ab_class
|
||||
#' @export
|
||||
filter_glycopeptides <- function(tbl,
|
||||
result = NULL,
|
||||
scope = "any",
|
||||
...) {
|
||||
filter_ab_class(tbl = tbl,
|
||||
ab_class = "glycopeptide",
|
||||
result = result,
|
||||
scope = scope,
|
||||
...)
|
||||
}
|
||||
|
||||
#' @rdname filter_ab_class
|
||||
#' @export
|
||||
filter_macrolides <- function(tbl,
|
||||
result = NULL,
|
||||
scope = "any",
|
||||
...) {
|
||||
filter_ab_class(tbl = tbl,
|
||||
ab_class = "macrolide",
|
||||
result = result,
|
||||
scope = scope,
|
||||
...)
|
||||
}
|
||||
|
||||
#' @rdname filter_ab_class
|
||||
#' @export
|
||||
filter_tetracyclines <- function(tbl,
|
||||
result = NULL,
|
||||
scope = "any",
|
||||
...) {
|
||||
filter_ab_class(tbl = tbl,
|
||||
ab_class = "tetracycline",
|
||||
result = result,
|
||||
scope = scope,
|
||||
...)
|
||||
}
|
||||
|
||||
#' @importFrom dplyr %>% filter_at any_vars select
|
||||
ab_class_vars <- function(ab_class) {
|
||||
ab_vars <- AMR::antibiotics %>%
|
||||
filter_at(vars(c("atc_group1", "atc_group2")), any_vars(. %like% ab_class)) %>%
|
||||
select(atc:trade_name) %>%
|
||||
as.matrix() %>%
|
||||
as.character() %>%
|
||||
paste(collapse = "|") %>%
|
||||
strsplit("|", fixed = TRUE) %>%
|
||||
unlist() %>%
|
||||
unique()
|
||||
ab_vars[!is.na(ab_vars)]
|
||||
}
|
||||
|
||||
#' @importFrom dplyr %>% filter pull
|
||||
ab_class_atcgroups <- function(ab_class) {
|
||||
AMR::antibiotics %>%
|
||||
filter(atc %in% ab_class_vars(ab_class)) %>%
|
||||
pull("atc_group2") %>%
|
||||
unique() %>%
|
||||
tolower() %>%
|
||||
paste(collapse = "/")
|
||||
}
|
51
R/mo.R
51
R/mo.R
@ -174,14 +174,26 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
|
||||
# check onLoad() in R/zzz.R: data tables are created there.
|
||||
}
|
||||
|
||||
if (deparse(substitute(reference_df)) == "get_mo_source()"
|
||||
if (mo_source_isvalid(reference_df)
|
||||
& isFALSE(Becker)
|
||||
& isFALSE(Lancefield)
|
||||
& !is.null(reference_df)
|
||||
& all(x %in% reference_df[,1])) {
|
||||
& all(x %in% reference_df[,1][[1]])) {
|
||||
|
||||
# has valid own reference_df
|
||||
# (data.table not faster here)
|
||||
reference_df <- reference_df %>% filter(!is.na(mo))
|
||||
# keep only first two columns, second must be mo
|
||||
if (colnames(reference_df)[1] == "mo") {
|
||||
reference_df <- reference_df[, c(2, 1)]
|
||||
} else {
|
||||
reference_df <- reference_df[, c(1, 2)]
|
||||
}
|
||||
colnames(reference_df)[1] <- "x"
|
||||
# remove factors, just keep characters
|
||||
suppressWarnings(
|
||||
reference_df[] <- lapply(reference_df, as.character)
|
||||
)
|
||||
suppressWarnings(
|
||||
y <- data.frame(x = x, stringsAsFactors = FALSE) %>%
|
||||
left_join(reference_df, by = "x") %>%
|
||||
@ -277,8 +289,12 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
# only check the uniques, which is way faster
|
||||
x <- unique(x)
|
||||
# remove empty values (to later fill them in again with NAs)
|
||||
# ("xxx" is WHONET code for 'no growth')
|
||||
x <- x[!is.na(x) & !is.null(x) & !identical(x, "") & !identical(x, "xxx")]
|
||||
# ("xxx" is WHONET code for 'no growth' and "con" is WHONET code for 'contamination')
|
||||
x <- x[!is.na(x)
|
||||
& !is.null(x)
|
||||
& !identical(x, "")
|
||||
& !identical(x, "xxx")
|
||||
& !identical(x, "con")]
|
||||
|
||||
# conversion of old MO codes from v0.5.0 (ITIS) to later versions (Catalogue of Life)
|
||||
if (any(x %like% "^[BFP]_[A-Z]{3,7}") & !all(x %in% microorganisms$mo)) {
|
||||
@ -292,14 +308,18 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
|
||||
# defined df to check for
|
||||
if (!is.null(reference_df)) {
|
||||
if (!is.data.frame(reference_df) | NCOL(reference_df) < 2) {
|
||||
stop('`reference_df` must be a data.frame with at least two columns.', call. = FALSE)
|
||||
}
|
||||
if (!"mo" %in% colnames(reference_df)) {
|
||||
if (!mo_source_isvalid(reference_df)) {
|
||||
stop("`reference_df` must contain a column `mo` with values from the 'microorganisms' data set.", call. = FALSE)
|
||||
}
|
||||
reference_df <- reference_df %>% filter(!is.na(mo))
|
||||
# # remove factors, just keep characters
|
||||
# keep only first two columns, second must be mo
|
||||
if (colnames(reference_df)[1] == "mo") {
|
||||
reference_df <- reference_df[, c(2, 1)]
|
||||
} else {
|
||||
reference_df <- reference_df[, c(1, 2)]
|
||||
}
|
||||
colnames(reference_df)[1] <- "x"
|
||||
# remove factors, just keep characters
|
||||
suppressWarnings(
|
||||
reference_df[] <- lapply(reference_df, as.character)
|
||||
)
|
||||
@ -314,8 +334,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
return(rep(NA_character_, length(x_input)))
|
||||
}
|
||||
|
||||
} else if (all(x %in% reference_df[, 1])
|
||||
& all(reference_df[, "mo"] %in% AMR::microorganisms$mo)) {
|
||||
} else if (all(x %in% reference_df[, 1][[1]])) {
|
||||
# all in reference df
|
||||
colnames(reference_df)[1] <- "x"
|
||||
suppressWarnings(
|
||||
@ -420,12 +439,12 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
next
|
||||
}
|
||||
|
||||
if (any(x_trimmed[i] %in% c(NA, ""))) {
|
||||
if (any(x_trimmed[i] %in% c(NA, "", "xxx", "con"))) {
|
||||
x[i] <- NA_character_
|
||||
next
|
||||
}
|
||||
|
||||
if (tolower(x_trimmed[i]) %in% c("xxx", "other", "none", "unknown")) {
|
||||
if (tolower(x_trimmed[i]) %in% c("other", "none", "unknown")) {
|
||||
# empty and nonsense values, ignore without warning
|
||||
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
|
||||
next
|
||||
@ -959,7 +978,11 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
# Wrap up ----------------------------------------------------------------
|
||||
|
||||
# comply to x, which is also unique and without empty values
|
||||
x_input_unique_nonempty <- unique(x_input[!is.na(x_input) & !is.null(x_input) & !identical(x_input, "") & !identical(x_input, "xxx")])
|
||||
x_input_unique_nonempty <- unique(x_input[!is.na(x_input)
|
||||
& !is.null(x_input)
|
||||
& !identical(x_input, "")
|
||||
& !identical(x_input, "xxx")
|
||||
& !identical(x_input, "con")])
|
||||
|
||||
# left join the found results to the original input values (x_input)
|
||||
df_found <- data.frame(input = as.character(x_input_unique_nonempty),
|
||||
|
@ -117,22 +117,6 @@ set_mo_source <- function(path) {
|
||||
stop("File not found: ", path)
|
||||
}
|
||||
|
||||
is_valid <- function(df) {
|
||||
valid <- TRUE
|
||||
if (!is.data.frame(df)) {
|
||||
valid <- FALSE
|
||||
} else if (!"mo" %in% colnames(df)) {
|
||||
valid <- FALSE
|
||||
} else if (all(as.data.frame(df)[, 1] == "")) {
|
||||
valid <- FALSE
|
||||
} else if (!all(df$mo %in% c("", AMR::microorganisms$mo))) {
|
||||
valid <- FALSE
|
||||
} else if (NCOL(df) < 2) {
|
||||
valid <- FALSE
|
||||
}
|
||||
valid
|
||||
}
|
||||
|
||||
if (path %like% '[.]rds$') {
|
||||
df <- readRDS(path)
|
||||
|
||||
@ -151,13 +135,13 @@ set_mo_source <- function(path) {
|
||||
try(
|
||||
df <- utils::read.table(header = TRUE, sep = ",", stringsAsFactors = FALSE),
|
||||
silent = TRUE)
|
||||
if (!is_valid(df)) {
|
||||
if (!mo_source_isvalid(df)) {
|
||||
# try tab
|
||||
try(
|
||||
df <- utils::read.table(header = TRUE, sep = "\t", stringsAsFactors = FALSE),
|
||||
silent = TRUE)
|
||||
}
|
||||
if (!is_valid(df)) {
|
||||
if (!mo_source_isvalid(df)) {
|
||||
# try pipe
|
||||
try(
|
||||
df <- utils::read.table(header = TRUE, sep = "|", stringsAsFactors = FALSE),
|
||||
@ -165,10 +149,12 @@ set_mo_source <- function(path) {
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_valid(df)) {
|
||||
if (!mo_source_isvalid(df)) {
|
||||
stop("File must contain a column with self-defined values and a reference column `mo` with valid values from the `microorganisms` data set.")
|
||||
}
|
||||
|
||||
df <- df %>% filter(!is.na(mo))
|
||||
|
||||
# keep only first two columns, second must be mo
|
||||
if (colnames(df)[1] == "mo") {
|
||||
df <- df[, c(2, 1)]
|
||||
@ -213,3 +199,22 @@ get_mo_source <- function() {
|
||||
|
||||
readRDS("~/.mo_source.rds")
|
||||
}
|
||||
|
||||
mo_source_isvalid <- function(x) {
|
||||
if (deparse(substitute(x)) == "get_mo_source()") {
|
||||
return(TRUE)
|
||||
}
|
||||
if (identical(x, get_mo_source())) {
|
||||
return(TRUE)
|
||||
}
|
||||
if (is.null(x)) {
|
||||
return(TRUE)
|
||||
}
|
||||
if (!is.data.frame(x)) {
|
||||
return(FALSE)
|
||||
}
|
||||
if (!"mo" %in% colnames(x)) {
|
||||
return(FALSE)
|
||||
}
|
||||
all(x$mo %in% c("", AMR::microorganisms$mo))
|
||||
}
|
||||
|
Reference in New Issue
Block a user