con WHONET, filter ab class

2025-08-24 23:42:18 +02:00 · 2019-03-05 22:47:42 +01:00
parent e835525cf6
commit 74e0ae21fd
73 changed files with 1174 additions and 375 deletions
--- a/R/filter_ab_class.R
+++ b/R/filter_ab_class.R
@@ -0,0 +1,268 @@
+# ==================================================================== #
+# TITLE                                                                #
+# Antimicrobial Resistance (AMR) Analysis                              #
+#                                                                      #
+# SOURCE                                                               #
+# https://gitlab.com/msberends/AMR                                     #
+#                                                                      #
+# LICENCE                                                              #
+# (c) 2019 Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl)  #
+#                                                                      #
+# This R package is free software; you can freely use and distribute   #
+# it for both personal and commercial purposes under the terms of the  #
+# GNU General Public License version 2.0 (GNU GPL-2), as published by  #
+# the Free Software Foundation.                                        #
+#                                                                      #
+# This R package was created for academic research and was publicly    #
+# released in the hope that it will be useful, but it comes WITHOUT    #
+# ANY WARRANTY OR LIABILITY.                                           #
+# Visit our website for more info: https://msberends.gitab.io/AMR.     #
+# ==================================================================== #
+
+#' Filter on antibiotic class
+#'
+#' Filter on specific antibiotic variables based on their class (ATC groups).
+#' @param tbl a data set
+#' @param ab_class an antimicrobial class, like \code{"carbapenems"}
+#' @param result an antibiotic result: S, I or R (or a combination of more of them)
+#' @param scope the scope to check which variables to check, can be \code{"any"} (default) or \code{"all"}
+#' @param ... parameters passed on to \code{\link[dplyr]{filter_at}}
+#' @details The \code{\code{antibiotics}} data set will be searched for \code{ab_class} in the columns \code{atc_group1} and \code{atc_group2} (case-insensitive). Next, \code{tbl} will be checked for column names with a value in any abbreviations, codes or official names found in the \code{antibiotics} data set.
+#' @rdname filter_ab_class
+#' @importFrom dplyr filter_at %>% select vars any_vars all_vars
+#' @importFrom crayon bold blue
+#' @export
+#' @examples
+#' library(dplyr)
+#'
+#' # filter on isolates that have any result for any aminoglycoside
+#' septic_patients %>% filter_aminoglycosides()
+#'
+#' # this is essentially the same as:
+#' septic_patients %>%
+#'   filter_at(.vars = vars(c("gent", "tobr", "amik", "kana")),
+#'             .vars_predicate = any_vars(. %in% c("S", "I", "R")))
+#'
+#'
+#' # filter on isolates that show resistance to ANY aminoglycoside
+#' septic_patients %>% filter_aminoglycosides("R")
+#'
+#' # filter on isolates that show resistance to ALL aminoglycosides
+#' septic_patients %>% filter_aminoglycosides("R", "all")
+#'
+#' # filter on isolates that show resistance to
+#' # any aminoglycoside and any fluoroquinolone
+#' septic_patients %>%
+#'   filter_aminoglycosides("R", "any") %>%
+#'   filter_fluoroquinolones("R", "any")
+filter_ab_class <- function(tbl,
+                            ab_class,
+                            result = NULL,
+                            scope = "any",
+                            ...) {
+  scope <- scope[1L]
+  if (is.null(result)) {
+    result <- c("S", "I", "R")
+  }
+
+  if (!all(result %in% c("S", "I", "R"))) {
+    stop("`result` must be one or more of: S, I, R", call. = FALSE)
+  }
+  if (!all(scope %in% c("any", "all"))) {
+    stop("`scope` must be one of: any, all", call. = FALSE)
+  }
+
+  vars_df <- colnames(tbl)[tolower(colnames(tbl)) %in% tolower(ab_class_vars(ab_class))]
+  atc_groups <- ab_class_atcgroups(ab_class)
+
+  if (length(vars_df) > 0) {
+    if (length(result) == 1) {
+      operator <- " is "
+    } else {
+      operator <- " is one of "
+    }
+    if (scope == "any") {
+      scope_txt <- " or "
+      scope_fn <- any_vars
+    } else {
+      scope_txt <- " and "
+      scope_fn <- all_vars
+    }
+    message(blue(paste0("Filtering on ", atc_groups, ": ", scope, " of ",
+                        paste(bold(vars_df), collapse = scope_txt), operator, toString(result))))
+    tbl %>%
+      filter_at(.vars = vars(vars_df),
+                .vars_predicate = scope_fn(. %in% result),
+                ...)
+  } else {
+    warning(paste0("no antibiotics of class ", atc_groups, " found, leaving data unchanged"), call. = FALSE)
+    tbl
+  }
+}
+
+#' @rdname filter_ab_class
+#' @export
+filter_aminoglycosides <- function(tbl,
+                                   result = NULL,
+                                   scope = "any",
+                                   ...) {
+  filter_ab_class(tbl = tbl,
+                  ab_class = "aminoglycoside",
+                  result = result,
+                  scope = scope,
+                  ...)
+}
+
+#' @rdname filter_ab_class
+#' @export
+filter_carbapenems <- function(tbl,
+                               result = NULL,
+                               scope = "any",
+                               ...) {
+  filter_ab_class(tbl = tbl,
+                  ab_class = "carbapenem",
+                  result = result,
+                  scope = scope,
+                  ...)
+}
+
+#' @rdname filter_ab_class
+#' @export
+filter_cephalosporins <- function(tbl,
+                                  result = NULL,
+                                  scope = "any",
+                                  ...) {
+  filter_ab_class(tbl = tbl,
+                  ab_class = "cephalosporin",
+                  result = result,
+                  scope = scope,
+                  ...)
+}
+
+#' @rdname filter_ab_class
+#' @export
+filter_1st_cephalosporins <- function(tbl,
+                                      result = NULL,
+                                      scope = "any",
+                                      ...) {
+  filter_ab_class(tbl = tbl,
+                  ab_class = "first-generation cephalosporin",
+                  result = result,
+                  scope = scope,
+                  ...)
+}
+
+#' @rdname filter_ab_class
+#' @export
+filter_2nd_cephalosporins <- function(tbl,
+                                      result = NULL,
+                                      scope = "any",
+                                      ...) {
+  filter_ab_class(tbl = tbl,
+                  ab_class = "second-generation cephalosporin",
+                  result = result,
+                  scope = scope,
+                  ...)
+}
+
+#' @rdname filter_ab_class
+#' @export
+filter_3rd_cephalosporins <- function(tbl,
+                                      result = NULL,
+                                      scope = "any",
+                                      ...) {
+  filter_ab_class(tbl = tbl,
+                  ab_class = "third-generation cephalosporin",
+                  result = result,
+                  scope = scope,
+                  ...)
+}
+
+#' @rdname filter_ab_class
+#' @export
+filter_4th_cephalosporins <- function(tbl,
+                                      result = NULL,
+                                      scope = "any",
+                                      ...) {
+  filter_ab_class(tbl = tbl,
+                  ab_class = "fourth-generation cephalosporin",
+                  result = result,
+                  scope = scope,
+                  ...)
+}
+
+#' @rdname filter_ab_class
+#' @export
+filter_fluoroquinolones <- function(tbl,
+                                    result = NULL,
+                                    scope = "any",
+                                    ...) {
+  filter_ab_class(tbl = tbl,
+                  ab_class = "fluoroquinolone",
+                  result = result,
+                  scope = scope,
+                  ...)
+}
+
+#' @rdname filter_ab_class
+#' @export
+filter_glycopeptides <- function(tbl,
+                                 result = NULL,
+                                 scope = "any",
+                                 ...) {
+  filter_ab_class(tbl = tbl,
+                  ab_class = "glycopeptide",
+                  result = result,
+                  scope = scope,
+                  ...)
+}
+
+#' @rdname filter_ab_class
+#' @export
+filter_macrolides <- function(tbl,
+                              result = NULL,
+                              scope = "any",
+                              ...) {
+  filter_ab_class(tbl = tbl,
+                  ab_class = "macrolide",
+                  result = result,
+                  scope = scope,
+                  ...)
+}
+
+#' @rdname filter_ab_class
+#' @export
+filter_tetracyclines <- function(tbl,
+                                 result = NULL,
+                                 scope = "any",
+                                 ...) {
+  filter_ab_class(tbl = tbl,
+                  ab_class = "tetracycline",
+                  result = result,
+                  scope = scope,
+                  ...)
+}
+
+#' @importFrom dplyr %>% filter_at any_vars select
+ab_class_vars <- function(ab_class) {
+  ab_vars <- AMR::antibiotics %>%
+    filter_at(vars(c("atc_group1", "atc_group2")), any_vars(. %like% ab_class)) %>%
+    select(atc:trade_name) %>%
+    as.matrix() %>%
+    as.character() %>%
+    paste(collapse = "|") %>%
+    strsplit("|", fixed = TRUE) %>%
+    unlist() %>%
+    unique()
+  ab_vars[!is.na(ab_vars)]
+}
+
+#' @importFrom dplyr %>% filter pull
+ab_class_atcgroups <- function(ab_class) {
+  AMR::antibiotics %>%
+    filter(atc %in% ab_class_vars(ab_class)) %>%
+    pull("atc_group2") %>%
+    unique() %>%
+    tolower() %>%
+    paste(collapse = "/")
+}
--- a/R/mo.R
+++ b/R/mo.R
@@ -174,14 +174,26 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
    # check onLoad() in R/zzz.R: data tables are created there.
  }

-  if (deparse(substitute(reference_df)) == "get_mo_source()"
+  if (mo_source_isvalid(reference_df)
      & isFALSE(Becker)
      & isFALSE(Lancefield)
      & !is.null(reference_df)
-      & all(x %in% reference_df[,1])) {
+      & all(x %in% reference_df[,1][[1]])) {
+
    # has valid own reference_df
    # (data.table not faster here)
+    reference_df <- reference_df %>% filter(!is.na(mo))
+    # keep only first two columns, second must be mo
+    if (colnames(reference_df)[1] == "mo") {
+      reference_df <- reference_df[, c(2, 1)]
+    } else {
+      reference_df <- reference_df[, c(1, 2)]
+    }
    colnames(reference_df)[1] <- "x"
+    # remove factors, just keep characters
+    suppressWarnings(
+      reference_df[] <- lapply(reference_df, as.character)
+    )
    suppressWarnings(
      y <- data.frame(x = x, stringsAsFactors = FALSE) %>%
        left_join(reference_df, by = "x") %>%
@@ -277,8 +289,12 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
  # only check the uniques, which is way faster
  x <- unique(x)
  # remove empty values (to later fill them in again with NAs)
-  # ("xxx" is WHONET code for 'no growth')
-  x <- x[!is.na(x) & !is.null(x) & !identical(x, "") & !identical(x, "xxx")]
+  # ("xxx" is WHONET code for 'no growth' and "con" is WHONET code for 'contamination')
+  x <- x[!is.na(x)
+         & !is.null(x)
+         & !identical(x, "")
+         & !identical(x, "xxx")
+         & !identical(x, "con")]

  # conversion of old MO codes from v0.5.0 (ITIS) to later versions (Catalogue of Life)
  if (any(x %like% "^[BFP]_[A-Z]{3,7}") & !all(x %in% microorganisms$mo)) {
@@ -292,14 +308,18 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,

  # defined df to check for
  if (!is.null(reference_df)) {
-    if (!is.data.frame(reference_df) | NCOL(reference_df) < 2) {
-      stop('`reference_df` must be a data.frame with at least two columns.', call. = FALSE)
-    }
-    if (!"mo" %in% colnames(reference_df)) {
+    if (!mo_source_isvalid(reference_df)) {
      stop("`reference_df` must contain a column `mo` with values from the 'microorganisms' data set.", call. = FALSE)
    }
    reference_df <- reference_df %>% filter(!is.na(mo))
-    # # remove factors, just keep characters
+    # keep only first two columns, second must be mo
+    if (colnames(reference_df)[1] == "mo") {
+      reference_df <- reference_df[, c(2, 1)]
+    } else {
+      reference_df <- reference_df[, c(1, 2)]
+    }
+    colnames(reference_df)[1] <- "x"
+    # remove factors, just keep characters
    suppressWarnings(
      reference_df[] <- lapply(reference_df, as.character)
    )
@@ -314,8 +334,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
      return(rep(NA_character_, length(x_input)))
    }

-  } else if (all(x %in% reference_df[, 1])
-             & all(reference_df[, "mo"] %in% AMR::microorganisms$mo)) {
+  } else if (all(x %in% reference_df[, 1][[1]])) {
    # all in reference df
    colnames(reference_df)[1] <- "x"
    suppressWarnings(
@@ -420,12 +439,12 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
        next
      }

-      if (any(x_trimmed[i] %in% c(NA, ""))) {
+      if (any(x_trimmed[i] %in% c(NA, "", "xxx", "con"))) {
        x[i] <- NA_character_
        next
      }

-      if (tolower(x_trimmed[i]) %in% c("xxx", "other", "none", "unknown")) {
+      if (tolower(x_trimmed[i]) %in% c("other", "none", "unknown")) {
        # empty and nonsense values, ignore without warning
        x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
        next
@@ -959,7 +978,11 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
  # Wrap up ----------------------------------------------------------------

  # comply to x, which is also unique and without empty values
-  x_input_unique_nonempty <- unique(x_input[!is.na(x_input) & !is.null(x_input) & !identical(x_input, "") & !identical(x_input, "xxx")])
+  x_input_unique_nonempty <- unique(x_input[!is.na(x_input)
+                                            & !is.null(x_input)
+                                            & !identical(x_input, "")
+                                            & !identical(x_input, "xxx")
+                                            & !identical(x_input, "con")])

  # left join the found results to the original input values (x_input)
  df_found <- data.frame(input = as.character(x_input_unique_nonempty),
--- a/R/mo_source.R
+++ b/R/mo_source.R
@@ -117,22 +117,6 @@ set_mo_source <- function(path) {
    stop("File not found: ", path)
  }

-  is_valid <- function(df) {
-    valid <- TRUE
-    if (!is.data.frame(df)) {
-      valid <- FALSE
-    } else if (!"mo" %in% colnames(df)) {
-      valid <- FALSE
-    } else if (all(as.data.frame(df)[, 1] == "")) {
-      valid <- FALSE
-    } else if (!all(df$mo %in% c("", AMR::microorganisms$mo))) {
-      valid <- FALSE
-    } else if (NCOL(df) < 2) {
-      valid <- FALSE
-    }
-    valid
-  }
-
  if (path %like% '[.]rds$') {
    df <- readRDS(path)

@@ -151,13 +135,13 @@ set_mo_source <- function(path) {
    try(
      df <- utils::read.table(header = TRUE, sep = ",", stringsAsFactors = FALSE),
      silent = TRUE)
-    if (!is_valid(df)) {
+    if (!mo_source_isvalid(df)) {
      # try tab
      try(
        df <- utils::read.table(header = TRUE, sep = "\t", stringsAsFactors = FALSE),
        silent = TRUE)
    }
-    if (!is_valid(df)) {
+    if (!mo_source_isvalid(df)) {
      # try pipe
      try(
        df <- utils::read.table(header = TRUE, sep = "|", stringsAsFactors = FALSE),
@@ -165,10 +149,12 @@ set_mo_source <- function(path) {
    }
  }

-  if (!is_valid(df)) {
+  if (!mo_source_isvalid(df)) {
    stop("File must contain a column with self-defined values and a reference column `mo` with valid values from the `microorganisms` data set.")
  }

+  df <- df %>% filter(!is.na(mo))
+
  # keep only first two columns, second must be mo
  if (colnames(df)[1] == "mo") {
    df <- df[, c(2, 1)]
@@ -213,3 +199,22 @@ get_mo_source <- function() {

  readRDS("~/.mo_source.rds")
 }
+
+mo_source_isvalid <- function(x) {
+  if (deparse(substitute(x)) == "get_mo_source()") {
+    return(TRUE)
+  }
+  if (identical(x, get_mo_source())) {
+    return(TRUE)
+  }
+  if (is.null(x)) {
+    return(TRUE)
+  }
+  if (!is.data.frame(x)) {
+    return(FALSE)
+  }
+  if (!"mo" %in% colnames(x)) {
+    return(FALSE)
+  }
+  all(x$mo %in% c("", AMR::microorganisms$mo))
+}