is.rsi.eligible update

2025-07-09 02:03:04 +02:00 · 2019-02-04 12:24:07 +01:00
parent cd07d65734
commit 587e00b7be
25 changed files with 1115 additions and 491 deletions
--- a/R/availability.R
+++ b/R/availability.R
@ -0,0 +1,61 @@
+# ==================================================================== #
+# TITLE                                                                #
+# Antimicrobial Resistance (AMR) Analysis                              #
+#                                                                      #
+# SOURCE                                                               #
+# https://gitlab.com/msberends/AMR                                     #
+#                                                                      #
+# LICENCE                                                              #
+# (c) 2019 Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl)  #
+#                                                                      #
+# This R package is free software; you can freely use and distribute   #
+# it for both personal and commercial purposes under the terms of the  #
+# GNU General Public License version 2.0 (GNU GPL-2), as published by  #
+# the Free Software Foundation.                                        #
+#                                                                      #
+# This R package was created for academic research and was publicly    #
+# released in the hope that it will be useful, but it comes WITHOUT    #
+# ANY WARRANTY OR LIABILITY.                                           #
+# Visit our website for more info: https://msberends.gitab.io/AMR.     #
+# ==================================================================== #
+
+#' Check availability of columns
+#'
+#' Easy check for availability of columns in a data set. This makes it easy to get an idea of which antibiotic combination can be used for calculation with e.g. \code{\link{portion_IR}}.
+#' @param tbl a \code{data.frame} or \code{list}
+#' @return \code{data.frame} with column names of \code{tbl} as row names and columns: \code{percent_IR}, \code{count}, \code{percent}, \code{visual_availability}.
+#' @export
+#' @examples
+#' availability(septic_patients)
+#'
+#' library(dplyr)
+#' septic_patients %>% availability()
+#'
+#' septic_patients %>%
+#'   select_if(is.rsi) %>%
+#'   availability()
+#'
+#' septic_patients %>%
+#'   filter(mo == as.mo("E. coli")) %>%
+#'   select_if(is.rsi) %>%
+#'   availability()
+availability <- function(tbl) {
+  x <- base::sapply(tbl, function(x) { 1 - base::sum(base::is.na(x)) / base::length(x) })
+  n <- base::sapply(tbl, function(x) base::length(x[!base::is.na(x)]))
+  IR <- base::sapply(tbl, function(x) base::ifelse(is.rsi(x), base::round(portion_IR(x, minimum = 0) * 100, 1), "NaN"))
+  IR <- paste0(IR, "%")
+  IR <- gsub("NaN%", "", IR)
+  max_chars <- 50
+  x_chars <- strrep("#", round(x, digits = 2) / (1 / max_chars))
+  x_chars_empty <- strrep("-", max_chars - nchar(x_chars))
+  # x_abnames <- character(length(x))
+  # for (i in 1:length(x)) {
+  #   if (tbl %>% pull(i) %>% is.rsi()) {
+  #     x_abnames[i] <- atc_name(colnames(tbl)[i])
+  #   }
+  # }
+  data.frame(percent_IR = IR,
+             count = n,
+             percent = paste0(round(x * 100, 1), "%"),
+             visual_availabilty = paste0("|", x_chars, x_chars_empty, "|"))
+}
--- a/R/data.R
+++ b/R/data.R
@ -211,7 +211,7 @@
 #' \describe{
 #'   \item{\code{Identification number}}{ID of the sample}
 #'   \item{\code{Specimen number}}{ID of the specimen}
-#'   \item{\code{Organism}}{Microorganisms, can be coerced with \code{\link{as.mo}}}
+#'   \item{\code{Organism}}{Name of the microorganism. Before analysis, you should transform this to a valid microbial class, using \code{\link{as.mo}}.}
 #'   \item{\code{Country}}{Country of origin}
 #'   \item{\code{Laboratory}}{Name of laboratory}
 #'   \item{\code{Last name}}{Last name of patient}
@ -234,7 +234,7 @@
 #'   \item{\code{Inducible clindamycin resistance}}{Clindamycin can be induced?}
 #'   \item{\code{Comment}}{Other comments}
 #'   \item{\code{Date of data entry}}{Date this data was entered in WHONET}
-#'   \item{\code{AMP_ND10:CIP_EE}}{27 different antibiotics. You can lookup the abbreviatons in the \code{\link{antibiotics}} data set, or use e.g. \code{\link{atc_name}("AMP")} to get the official name immediately.}
+#'   \item{\code{AMP_ND10:CIP_EE}}{27 different antibiotics. You can lookup the abbreviatons in the \code{\link{antibiotics}} data set, or use e.g. \code{\link{atc_name}("AMP")} to get the official name immediately. Before analysis, you should transform this to a valid antibiotic class, using \code{\link{as.rsi}}.}
 #' }
 #' @inheritSection AMR Read more on our website!
 "WHONET"
--- a/R/first_isolate.R
+++ b/R/first_isolate.R
@ -381,7 +381,7 @@ first_isolate <- function(tbl,

  if (abs(row.start) == Inf | abs(row.end) == Inf) {
    if (info == TRUE) {
-      message('No isolates found.')
+      message(paste("=> Found", bold("no isolates")))
    }
    # NAs where genus is unavailable
    return(tbl %>%
--- a/R/rsi.R
+++ b/R/rsi.R
@ -24,6 +24,7 @@
 #' This transforms a vector to a new class \code{rsi}, which is an ordered factor with levels \code{S < I < R}. Invalid antimicrobial interpretations will be translated as \code{NA} with a warning.
 #' @rdname as.rsi
 #' @param x vector
+#' @param threshold maximum fraction of \code{x} that is allowed to fail transformation, see Examples
 #' @details The function \code{is.rsi.eligible} returns \code{TRUE} when a columns contains only valid antimicrobial interpretations (S and/or I and/or R), and \code{FALSE} otherwise.
 #' @return Ordered factor with new class \code{rsi}
 #' @keywords rsi
@ -48,10 +49,15 @@
 #' septic_patients %>%
 #'   mutate_at(vars(peni:rifa), as.rsi)
 #'
+#'
 #' # fastest way to transform all columns with already valid AB results to class `rsi`:
 #' septic_patients %>%
 #'   mutate_if(is.rsi.eligible,
 #'             as.rsi)
+#'
+#' # default threshold of `is.rsi.eligible` is 5%.
+#' is.rsi.eligible(WHONET$`First name`) # fails, >80% is invalid
+#' is.rsi.eligible(WHONET$`First name`, threhold = 0.9) # succeeds
 as.rsi <- function(x) {
  if (is.rsi(x)) {
    x
@ -99,28 +105,37 @@ as.rsi <- function(x) {

 #' @rdname as.rsi
 #' @export
-#' @importFrom dplyr %>%
 is.rsi <- function(x) {
-  class(x) %>% identical(c('rsi', 'ordered', 'factor'))
+  identical(class(x),
+            c('rsi', 'ordered', 'factor'))
 }

 #' @rdname as.rsi
 #' @export
-#' @importFrom dplyr %>%
-is.rsi.eligible <- function(x) {
-  if (is.logical(x)
-      | is.numeric(x)
-      | is.mo(x)
-      | identical(class(x), "Date")
-      | is.rsi(x)) {
+is.rsi.eligible <- function(x, threshold = 0.05) {
+  if (NCOL(x) > 1) {
+    stop('`x` must be a one-dimensional vector.')
+  }
+  if (any(c("logical",
+            "numeric",
+            "integer",
+            "mo",
+            "Date",
+            "POSIXct",
+            "rsi",
+            "raw",
+            "hms")
+          %in% class(x))) {
    # no transformation needed
    FALSE
  } else {
-    # check all but a-z
-    y <- unique(gsub("[^RSIrsi]+", "", unique(x)))
-    !all(y %in% c("", NA_character_)) &
-      all(y %in% c("R", "I", "S", "", NA_character_)) &
-      max(nchar(as.character(x)), na.rm = TRUE) < 8
+    x <- x[!is.na(x) & !is.null(x) & !identical(x, "")]
+    if (length(x) == 0) {
+      return(FALSE)
+    }
+    checked <- suppressWarnings(as.rsi(x))
+    outcome <- sum(is.na(checked)) / length(x)
+    outcome <= threshold
  }
 }