1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-08 08:32:04 +02:00

con WHONET, filter ab class

This commit is contained in:
2019-03-05 22:47:42 +01:00
parent e835525cf6
commit 74e0ae21fd
73 changed files with 1174 additions and 375 deletions

268
R/filter_ab_class.R Normal file
View File

@ -0,0 +1,268 @@
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# SOURCE #
# https://gitlab.com/msberends/AMR #
# #
# LICENCE #
# (c) 2019 Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# #
# This R package was created for academic research and was publicly #
# released in the hope that it will be useful, but it comes WITHOUT #
# ANY WARRANTY OR LIABILITY. #
# Visit our website for more info: https://msberends.gitab.io/AMR. #
# ==================================================================== #
#' Filter on antibiotic class
#'
#' Filter on specific antibiotic variables based on their class (ATC groups).
#' @param tbl a data set
#' @param ab_class an antimicrobial class, like \code{"carbapenems"}
#' @param result an antibiotic result: S, I or R (or a combination of more of them)
#' @param scope the scope to check which variables to check, can be \code{"any"} (default) or \code{"all"}
#' @param ... parameters passed on to \code{\link[dplyr]{filter_at}}
#' @details The \code{\code{antibiotics}} data set will be searched for \code{ab_class} in the columns \code{atc_group1} and \code{atc_group2} (case-insensitive). Next, \code{tbl} will be checked for column names with a value in any abbreviations, codes or official names found in the \code{antibiotics} data set.
#' @rdname filter_ab_class
#' @importFrom dplyr filter_at %>% select vars any_vars all_vars
#' @importFrom crayon bold blue
#' @export
#' @examples
#' library(dplyr)
#'
#' # filter on isolates that have any result for any aminoglycoside
#' septic_patients %>% filter_aminoglycosides()
#'
#' # this is essentially the same as:
#' septic_patients %>%
#' filter_at(.vars = vars(c("gent", "tobr", "amik", "kana")),
#' .vars_predicate = any_vars(. %in% c("S", "I", "R")))
#'
#'
#' # filter on isolates that show resistance to ANY aminoglycoside
#' septic_patients %>% filter_aminoglycosides("R")
#'
#' # filter on isolates that show resistance to ALL aminoglycosides
#' septic_patients %>% filter_aminoglycosides("R", "all")
#'
#' # filter on isolates that show resistance to
#' # any aminoglycoside and any fluoroquinolone
#' septic_patients %>%
#' filter_aminoglycosides("R", "any") %>%
#' filter_fluoroquinolones("R", "any")
filter_ab_class <- function(tbl,
ab_class,
result = NULL,
scope = "any",
...) {
scope <- scope[1L]
if (is.null(result)) {
result <- c("S", "I", "R")
}
if (!all(result %in% c("S", "I", "R"))) {
stop("`result` must be one or more of: S, I, R", call. = FALSE)
}
if (!all(scope %in% c("any", "all"))) {
stop("`scope` must be one of: any, all", call. = FALSE)
}
vars_df <- colnames(tbl)[tolower(colnames(tbl)) %in% tolower(ab_class_vars(ab_class))]
atc_groups <- ab_class_atcgroups(ab_class)
if (length(vars_df) > 0) {
if (length(result) == 1) {
operator <- " is "
} else {
operator <- " is one of "
}
if (scope == "any") {
scope_txt <- " or "
scope_fn <- any_vars
} else {
scope_txt <- " and "
scope_fn <- all_vars
}
message(blue(paste0("Filtering on ", atc_groups, ": ", scope, " of ",
paste(bold(vars_df), collapse = scope_txt), operator, toString(result))))
tbl %>%
filter_at(.vars = vars(vars_df),
.vars_predicate = scope_fn(. %in% result),
...)
} else {
warning(paste0("no antibiotics of class ", atc_groups, " found, leaving data unchanged"), call. = FALSE)
tbl
}
}
#' @rdname filter_ab_class
#' @export
filter_aminoglycosides <- function(tbl,
result = NULL,
scope = "any",
...) {
filter_ab_class(tbl = tbl,
ab_class = "aminoglycoside",
result = result,
scope = scope,
...)
}
#' @rdname filter_ab_class
#' @export
filter_carbapenems <- function(tbl,
result = NULL,
scope = "any",
...) {
filter_ab_class(tbl = tbl,
ab_class = "carbapenem",
result = result,
scope = scope,
...)
}
#' @rdname filter_ab_class
#' @export
filter_cephalosporins <- function(tbl,
result = NULL,
scope = "any",
...) {
filter_ab_class(tbl = tbl,
ab_class = "cephalosporin",
result = result,
scope = scope,
...)
}
#' @rdname filter_ab_class
#' @export
filter_1st_cephalosporins <- function(tbl,
result = NULL,
scope = "any",
...) {
filter_ab_class(tbl = tbl,
ab_class = "first-generation cephalosporin",
result = result,
scope = scope,
...)
}
#' @rdname filter_ab_class
#' @export
filter_2nd_cephalosporins <- function(tbl,
result = NULL,
scope = "any",
...) {
filter_ab_class(tbl = tbl,
ab_class = "second-generation cephalosporin",
result = result,
scope = scope,
...)
}
#' @rdname filter_ab_class
#' @export
filter_3rd_cephalosporins <- function(tbl,
result = NULL,
scope = "any",
...) {
filter_ab_class(tbl = tbl,
ab_class = "third-generation cephalosporin",
result = result,
scope = scope,
...)
}
#' @rdname filter_ab_class
#' @export
filter_4th_cephalosporins <- function(tbl,
result = NULL,
scope = "any",
...) {
filter_ab_class(tbl = tbl,
ab_class = "fourth-generation cephalosporin",
result = result,
scope = scope,
...)
}
#' @rdname filter_ab_class
#' @export
filter_fluoroquinolones <- function(tbl,
result = NULL,
scope = "any",
...) {
filter_ab_class(tbl = tbl,
ab_class = "fluoroquinolone",
result = result,
scope = scope,
...)
}
#' @rdname filter_ab_class
#' @export
filter_glycopeptides <- function(tbl,
result = NULL,
scope = "any",
...) {
filter_ab_class(tbl = tbl,
ab_class = "glycopeptide",
result = result,
scope = scope,
...)
}
#' @rdname filter_ab_class
#' @export
filter_macrolides <- function(tbl,
result = NULL,
scope = "any",
...) {
filter_ab_class(tbl = tbl,
ab_class = "macrolide",
result = result,
scope = scope,
...)
}
#' @rdname filter_ab_class
#' @export
filter_tetracyclines <- function(tbl,
result = NULL,
scope = "any",
...) {
filter_ab_class(tbl = tbl,
ab_class = "tetracycline",
result = result,
scope = scope,
...)
}
#' @importFrom dplyr %>% filter_at any_vars select
ab_class_vars <- function(ab_class) {
ab_vars <- AMR::antibiotics %>%
filter_at(vars(c("atc_group1", "atc_group2")), any_vars(. %like% ab_class)) %>%
select(atc:trade_name) %>%
as.matrix() %>%
as.character() %>%
paste(collapse = "|") %>%
strsplit("|", fixed = TRUE) %>%
unlist() %>%
unique()
ab_vars[!is.na(ab_vars)]
}
#' @importFrom dplyr %>% filter pull
ab_class_atcgroups <- function(ab_class) {
AMR::antibiotics %>%
filter(atc %in% ab_class_vars(ab_class)) %>%
pull("atc_group2") %>%
unique() %>%
tolower() %>%
paste(collapse = "/")
}

51
R/mo.R
View File

@ -174,14 +174,26 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
# check onLoad() in R/zzz.R: data tables are created there.
}
if (deparse(substitute(reference_df)) == "get_mo_source()"
if (mo_source_isvalid(reference_df)
& isFALSE(Becker)
& isFALSE(Lancefield)
& !is.null(reference_df)
& all(x %in% reference_df[,1])) {
& all(x %in% reference_df[,1][[1]])) {
# has valid own reference_df
# (data.table not faster here)
reference_df <- reference_df %>% filter(!is.na(mo))
# keep only first two columns, second must be mo
if (colnames(reference_df)[1] == "mo") {
reference_df <- reference_df[, c(2, 1)]
} else {
reference_df <- reference_df[, c(1, 2)]
}
colnames(reference_df)[1] <- "x"
# remove factors, just keep characters
suppressWarnings(
reference_df[] <- lapply(reference_df, as.character)
)
suppressWarnings(
y <- data.frame(x = x, stringsAsFactors = FALSE) %>%
left_join(reference_df, by = "x") %>%
@ -277,8 +289,12 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
# only check the uniques, which is way faster
x <- unique(x)
# remove empty values (to later fill them in again with NAs)
# ("xxx" is WHONET code for 'no growth')
x <- x[!is.na(x) & !is.null(x) & !identical(x, "") & !identical(x, "xxx")]
# ("xxx" is WHONET code for 'no growth' and "con" is WHONET code for 'contamination')
x <- x[!is.na(x)
& !is.null(x)
& !identical(x, "")
& !identical(x, "xxx")
& !identical(x, "con")]
# conversion of old MO codes from v0.5.0 (ITIS) to later versions (Catalogue of Life)
if (any(x %like% "^[BFP]_[A-Z]{3,7}") & !all(x %in% microorganisms$mo)) {
@ -292,14 +308,18 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
# defined df to check for
if (!is.null(reference_df)) {
if (!is.data.frame(reference_df) | NCOL(reference_df) < 2) {
stop('`reference_df` must be a data.frame with at least two columns.', call. = FALSE)
}
if (!"mo" %in% colnames(reference_df)) {
if (!mo_source_isvalid(reference_df)) {
stop("`reference_df` must contain a column `mo` with values from the 'microorganisms' data set.", call. = FALSE)
}
reference_df <- reference_df %>% filter(!is.na(mo))
# # remove factors, just keep characters
# keep only first two columns, second must be mo
if (colnames(reference_df)[1] == "mo") {
reference_df <- reference_df[, c(2, 1)]
} else {
reference_df <- reference_df[, c(1, 2)]
}
colnames(reference_df)[1] <- "x"
# remove factors, just keep characters
suppressWarnings(
reference_df[] <- lapply(reference_df, as.character)
)
@ -314,8 +334,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
return(rep(NA_character_, length(x_input)))
}
} else if (all(x %in% reference_df[, 1])
& all(reference_df[, "mo"] %in% AMR::microorganisms$mo)) {
} else if (all(x %in% reference_df[, 1][[1]])) {
# all in reference df
colnames(reference_df)[1] <- "x"
suppressWarnings(
@ -420,12 +439,12 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
next
}
if (any(x_trimmed[i] %in% c(NA, ""))) {
if (any(x_trimmed[i] %in% c(NA, "", "xxx", "con"))) {
x[i] <- NA_character_
next
}
if (tolower(x_trimmed[i]) %in% c("xxx", "other", "none", "unknown")) {
if (tolower(x_trimmed[i]) %in% c("other", "none", "unknown")) {
# empty and nonsense values, ignore without warning
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
next
@ -959,7 +978,11 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
# Wrap up ----------------------------------------------------------------
# comply to x, which is also unique and without empty values
x_input_unique_nonempty <- unique(x_input[!is.na(x_input) & !is.null(x_input) & !identical(x_input, "") & !identical(x_input, "xxx")])
x_input_unique_nonempty <- unique(x_input[!is.na(x_input)
& !is.null(x_input)
& !identical(x_input, "")
& !identical(x_input, "xxx")
& !identical(x_input, "con")])
# left join the found results to the original input values (x_input)
df_found <- data.frame(input = as.character(x_input_unique_nonempty),

View File

@ -117,22 +117,6 @@ set_mo_source <- function(path) {
stop("File not found: ", path)
}
is_valid <- function(df) {
valid <- TRUE
if (!is.data.frame(df)) {
valid <- FALSE
} else if (!"mo" %in% colnames(df)) {
valid <- FALSE
} else if (all(as.data.frame(df)[, 1] == "")) {
valid <- FALSE
} else if (!all(df$mo %in% c("", AMR::microorganisms$mo))) {
valid <- FALSE
} else if (NCOL(df) < 2) {
valid <- FALSE
}
valid
}
if (path %like% '[.]rds$') {
df <- readRDS(path)
@ -151,13 +135,13 @@ set_mo_source <- function(path) {
try(
df <- utils::read.table(header = TRUE, sep = ",", stringsAsFactors = FALSE),
silent = TRUE)
if (!is_valid(df)) {
if (!mo_source_isvalid(df)) {
# try tab
try(
df <- utils::read.table(header = TRUE, sep = "\t", stringsAsFactors = FALSE),
silent = TRUE)
}
if (!is_valid(df)) {
if (!mo_source_isvalid(df)) {
# try pipe
try(
df <- utils::read.table(header = TRUE, sep = "|", stringsAsFactors = FALSE),
@ -165,10 +149,12 @@ set_mo_source <- function(path) {
}
}
if (!is_valid(df)) {
if (!mo_source_isvalid(df)) {
stop("File must contain a column with self-defined values and a reference column `mo` with valid values from the `microorganisms` data set.")
}
df <- df %>% filter(!is.na(mo))
# keep only first two columns, second must be mo
if (colnames(df)[1] == "mo") {
df <- df[, c(2, 1)]
@ -213,3 +199,22 @@ get_mo_source <- function() {
readRDS("~/.mo_source.rds")
}
mo_source_isvalid <- function(x) {
if (deparse(substitute(x)) == "get_mo_source()") {
return(TRUE)
}
if (identical(x, get_mo_source())) {
return(TRUE)
}
if (is.null(x)) {
return(TRUE)
}
if (!is.data.frame(x)) {
return(FALSE)
}
if (!"mo" %in% colnames(x)) {
return(FALSE)
}
all(x$mo %in% c("", AMR::microorganisms$mo))
}