(v1.2.0.9001) filter_ab_class() update

2025-07-09 04:42:22 +02:00 · 2020-06-03 11:48:00 +02:00
parent 02d07b9fb3
commit 5dc4c96b7d
18 changed files with 159 additions and 77 deletions
--- a/R/filter_ab_class.R
+++ b/R/filter_ab_class.R
@ -21,14 +21,14 @@

 #' Filter isolates on result in antimicrobial class
 #'
-#' Filter isolates on results in specific antimicrobial classes. This makes it easy to filter on isolates that were tested for e.g. any aminoglycoside.
+#' Filter isolates on results in specific antimicrobial classes. This makes it easy to filter on isolates that were tested for e.g. any aminoglycoside, or to filter on carbapenem-resistant isolates without the need to specify the drugs.
 #' @inheritSection lifecycle Stable lifecycle
 #' @param x a data set
 #' @param ab_class an antimicrobial class, like `"carbapenems"`, as can be found in [`antibiotics$group`][antibiotics]
 #' @param result an antibiotic result: S, I or R (or a combination of more of them)
 #' @param scope the scope to check which variables to check, can be `"any"` (default) or `"all"`
 #' @param ... parameters passed on to `filter_at` from the `dplyr` package
-#' @details The `group` column in [antibiotics] data set will be searched for `ab_class` (case-insensitive). If no results are found, the `atc_group1` and `atc_group2` columns will be searched. Next, `x` will be checked for column names with a value in any abbreviations, codes or official names found in the [antibiotics] data set.
+#' @details The columns `group`, `atc_group1` and `atc_group2` of the [antibiotics] data set will be searched for the input given in `ab_class` (case-insensitive). Next, `x` will be checked for column names with a value in any abbreviation, code or official name found in the [antibiotics] data set.
 #' @rdname filter_ab_class
 #' @export
 #' @examples
@ -36,6 +36,7 @@
 #' library(dplyr)
 #'
 #' # filter on isolates that have any result for any aminoglycoside
+#' example_isolates %>% filter_ab_class("aminoglycoside")
 #' example_isolates %>% filter_aminoglycosides()
 #'
 #' # this is essentially the same as (but without determination of column names):
@ -45,7 +46,7 @@
 #'
 #'
 #' # filter on isolates that show resistance to ANY aminoglycoside
-#' example_isolates %>% filter_aminoglycosides("R")
+#' example_isolates %>% filter_aminoglycosides("R", "any")
 #'
 #' # filter on isolates that show resistance to ALL aminoglycosides
 #' example_isolates %>% filter_aminoglycosides("R", "all")
@ -76,13 +77,14 @@ filter_ab_class <- function(x,
  
  # save to return later
  x_class <- class(x)
+  x.bak <- x
  x <- as.data.frame(x, stringsAsFactors = FALSE)
  
  scope <- scope[1L]
  if (is.null(result)) {
    result <- c("S", "I", "R")
  }
-  # make result = "SI" work too:
+  # make result = "SI" works too:
  result <- unlist(strsplit(result, ""))
  
  if (!all(result %in% c("S", "I", "R"))) {
@ -92,46 +94,62 @@ filter_ab_class <- function(x,
    stop("`scope` must be one of: any, all", call. = FALSE)
  }
  
-  # get only columns with class ab, mic or disk - those are AMR results
-  vars_df <- colnames(x)[sapply(x, function(y) is.rsi(y) | is.mic(y) | is.disk(y))]
-  vars_df_ab <- suppressWarnings(as.ab(vars_df))
-  # get the columns with a group names in the chosen ab class
-  vars_df <- vars_df[which(ab_group(vars_df_ab) %like% ab_class | 
-                             ab_atc_group1(vars_df_ab) %like% ab_class |
-                             ab_atc_group2(vars_df_ab) %like% ab_class)]
-  ab_group <- find_ab_group(ab_class)
-  
-  if (length(vars_df) > 0) {
-    if (length(result) == 1) {
-      operator <- " is "
-    } else {
-      operator <- " is one of "
-    }
-    if (scope == "any") {
-      scope_txt <- " or "
-      scope_fn <- any
-    } else {
-      scope_txt <- " and "
-      scope_fn <- all
-      if (length(vars_df) > 1) {
-        operator <- gsub("is", "are", operator)
-      }
-    }
-    if (length(vars_df) > 1) {
-      scope <- paste(scope, "of columns ")
-    } else {
-      scope <- "column "
-    }
-    message(font_blue(paste0("Filtering on ", ab_group, ": ", scope,
-                             paste0(font_bold(paste0("`", vars_df, "`"), collapse = NULL), collapse = scope_txt), operator, toString(result))))
-    filtered <<- as.logical(by(x, seq_len(nrow(x)),
-                              function(row) scope_fn(unlist(row[, vars_df]) %in% result, na.rm = TRUE)))
-    x <- x[which(filtered), , drop = FALSE]
-  } else {
-    message(font_blue(paste0("NOTE: no antimicrobial agents of class ", ab_group, 
-                             " (such as ", find_ab_names(ab_group), 
-                             ") found, data left unchanged.")))
+  # get all columns in data with names that resemble antibiotics
+  ab_in_data <- suppressMessages(get_column_abx(x))
+  if (length(ab_in_data) == 0) {
+    message(font_blue("NOTE: no antimicrobial agents found, data left unchanged."))
+    return(x.bak)
  }
+  # get reference data
+  ab_class <- gsub("[^a-zA-Z0-9]+", ".*", ab_class)
+  ab_class <- gsub("(ph|f)", "(ph|f)", ab_class)
+  ab_class <- gsub("(t|th)", "(t|th)", ab_class)
+  ab_reference <- subset(antibiotics,
+                         group %like% ab_class | 
+                           atc_group1 %like% ab_class | 
+                           atc_group2 %like% ab_class)
+  ab_group <- find_ab_group(ab_class)
+  # get the columns with a group names in the chosen ab class
+  agents <- ab_in_data[names(ab_in_data) %in% ab_reference$ab]
+  if (length(agents) == 0) {
+    message(font_blue(paste0("NOTE: no antimicrobial agents of class ", ab_group, 
+                             " found (such as ", find_ab_names(ab_class, 2), 
+                             "), data left unchanged.")))
+    return(x.bak)
+  }
+  
+  if (length(result) == 1) {
+    operator <- " is "
+  } else {
+    operator <- " is one of "
+  }
+  if (scope == "any") {
+    scope_txt <- " or "
+    scope_fn <- any
+  } else {
+    scope_txt <- " and "
+    scope_fn <- all
+    if (length(agents) > 1) {
+      operator <- gsub("is", "are", operator)
+    }
+  }
+  if (length(agents) > 1) {
+    scope <- paste(scope, "of columns ")
+  } else {
+    scope <- "column "
+  }
+  
+  # sort columns on official name
+  agents <- agents[order(ab_name(names(agents), language = NULL))]
+  
+  message(font_blue(paste0("Filtering on ", ab_group, ": ", scope,
+                           paste(paste0("`", font_bold(agents, collapse = NULL),
+                                        "` (", ab_name(names(agents), tolower = TRUE, language = NULL), ")"),
+                                 collapse = scope_txt),
+                           operator, toString(result))))
+  filtered <- as.logical(by(x, seq_len(nrow(x)),
+                            function(row) scope_fn(unlist(row[, agents]) %in% result, na.rm = TRUE)))
+  x <- x[which(filtered), , drop = FALSE]
  class(x) <- x_class
  x
 }
@ -279,6 +297,19 @@ filter_macrolides <- function(x,
                  ...)
 }

+#' @rdname filter_ab_class
+#' @export
+filter_penicillins <- function(x,
+                               result = NULL,
+                               scope = "any",
+                               ...) {
+  filter_ab_class(x = x,
+                  ab_class = "penicillin",
+                  result = result,
+                  scope = scope,
+                  ...)
+}
+
 #' @rdname filter_ab_class
 #' @export
 filter_tetracyclines <- function(x,
@ -312,9 +343,9 @@ find_ab_group <- function(ab_class) {
  )
 }

-find_ab_names <- function(ab_group) {
+find_ab_names <- function(ab_group, n = 3) {
  drugs <- antibiotics[which(antibiotics$group %like% ab_group), "name"]
-  paste0(ab_name(sample(drugs, size = min(4, length(drugs)), replace = FALSE),
-                 tolower = TRUE, language = NULL), 
+  paste0(sort(ab_name(sample(drugs, size = min(n, length(drugs)), replace = FALSE),
+                      tolower = TRUE, language = NULL)), 
         collapse = ", ")
 }
--- a/R/guess_ab_col.R
+++ b/R/guess_ab_col.R
@ -160,6 +160,11 @@ get_column_abx <- function(x,
    x <- x[!is.na(x)]
  }

+  if (length(x) == 0) {
+    message(font_blue("No columns found."))
+    return(x)
+  }
+  
  # sort on name
  x <- x[order(names(x), x)]
  duplicates <- c(x[base::duplicated(x)], x[base::duplicated(names(x))]) 
@ -167,7 +172,7 @@ get_column_abx <- function(x,
  x <- c(x[!names(x) %in% names(duplicates)], duplicates)
  x <- x[order(names(x), x)]
  
-  # succeeded with aut-guessing
+  # succeeded with auto-guessing
  message(font_blue("OK."))

  for (i in seq_len(length(x))) {
--- a/R/join_microorganisms.R
+++ b/R/join_microorganisms.R
@ -55,7 +55,9 @@
 #' }
 inner_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
  check_dataset_integrity()
+  check_groups_before_join(x, "inner_join_microorganisms")
  checked <- joins_check_df(x, by)
+  x_class <- class(x)
  x <- checked$x
  by <- checked$by
  join <- suppressWarnings(
@ -64,6 +66,7 @@ inner_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
  if (NROW(join) > NROW(x)) {
    warning("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.")
  }
+  class(join) <- x_class
  join
 }

@ -71,7 +74,9 @@ inner_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
 #' @export
 left_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
  check_dataset_integrity()
+  check_groups_before_join(x, "left_join_microorganisms")
  checked <- joins_check_df(x, by)
+  x_class <- class(x)
  x <- checked$x
  by <- checked$by
  join <- suppressWarnings(
@ -80,6 +85,7 @@ left_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
  if (NROW(join) > NROW(x)) {
    warning("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.")
  }
+  class(join) <- x_class
  join
 }

@ -87,7 +93,9 @@ left_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
 #' @export
 right_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
  check_dataset_integrity()
+  check_groups_before_join(x, "right_join_microorganisms")
  checked <- joins_check_df(x, by)
+  x_class <- class(x)
  x <- checked$x
  by <- checked$by
  join <- suppressWarnings(
@ -96,6 +104,7 @@ right_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
  if (NROW(join) > NROW(x)) {
    warning("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.")
  }
+  class(join) <- x_class
  join
 }

@ -103,7 +112,9 @@ right_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
 #' @export
 full_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
  check_dataset_integrity()
+  check_groups_before_join(x, "full_join_microorganisms")
  checked <- joins_check_df(x, by)
+  x_class <- class(x)
  x <- checked$x
  by <- checked$by
  join <- suppressWarnings(
@ -112,6 +123,7 @@ full_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
  if (NROW(join) > NROW(x)) {
    warning("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.")
  }
+  class(join) <- x_class
  join
 }

@ -119,24 +131,32 @@ full_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
 #' @export
 semi_join_microorganisms <- function(x, by = NULL, ...) {
  check_dataset_integrity()
+  check_groups_before_join(x, "semi_join_microorganisms")
+  x_class <- class(x)
  checked <- joins_check_df(x, by)
  x <- checked$x
  by <- checked$by
-  suppressWarnings(
+  join <- suppressWarnings(
    semi_join(x = x, y = microorganisms, by = by, ...)
  )
+  class(join) <- x_class
+  join
 }

 #' @rdname join
 #' @export
 anti_join_microorganisms <- function(x, by = NULL, ...) {
  check_dataset_integrity()
+  check_groups_before_join(x, "anti_join_microorganisms")
  checked <- joins_check_df(x, by)
+  x_class <- class(x)
  x <- checked$x
  by <- checked$by
-  suppressWarnings(
+  join <- suppressWarnings(
    anti_join(x = x, y = microorganisms, by = by, ...)
  )
+  class(join) <- x_class
+  join
 }

 joins_check_df <- function(x, by) {
@ -146,6 +166,7 @@ joins_check_df <- function(x, by) {
      by <- "mo"
    }
  }
+  x <- as.data.frame(x, stringsAsFactors = FALSE)
  if (is.null(by)) {
    # search for column with class `mo` and return first one found
    by <- colnames(x)[lapply(x, is.mo) == TRUE][1]
@ -168,3 +189,9 @@ joins_check_df <- function(x, by) {
  list(x = x,
       by = joinby)
 }
+
+check_groups_before_join <- function(x, fn) {
+  if (is.data.frame(x) && !is.null(attributes(x)$groups)) {
+    warning("Groups are dropped, since the ", fn, "() function relies on merge() from base R, not on join() from dplyr.", call. = FALSE)
+  }
+}
--- a/R/sysdata.rda
+++ b/R/sysdata.rda