(v2.1.1.9043) fix sir translation with as.double

2025-07-10 22:22:19 +02:00 · 2024-06-10 10:34:45 +02:00
parent a3071cf58b
commit 31207952d3
10 changed files with 68 additions and 38 deletions
--- a/R/export_biosample.R
+++ b/R/export_biosample.R
@ -33,12 +33,12 @@
 #' @param x a data set 
 #' @param filename a character string specifying the file name
 #' @param type a character string specifying the type of data set, either "pathogen MIC" or "beta-lactamase MIC", see <https://www.ncbi.nlm.nih.gov/biosample/docs/>
-#' @param save_as_xlsx
-export_biosample <- function(x,
-                             filename = paste0("biosample_", format(Sys.time(), "%Y-%m-%d-%H%M%S"), ".xlsx"),
-                             type = "pathogen MIC",
-                             columns = where(is.mic),
-                             save_as_xlsx = TRUE) {
+#' @keywords internal
+export_ncbi_biosample <- function(x,
+                                  filename = paste0("biosample_", format(Sys.time(), "%Y-%m-%d-%H%M%S"), ".xlsx"),
+                                  type = "pathogen MIC",
+                                  columns = where(is.mic),
+                                  save_as_xlsx = TRUE) {
  meet_criteria(x, allow_class = "data.frame") # also checks dimensions to be >0
  meet_criteria(filename, allow_class = "character", has_length = 1)
  meet_criteria(type, allow_class = "character", has_length = 1, is_in = c("pathogen MIC", "beta-lactamase MIC"))
--- a/R/sir.R
+++ b/R/sir.R
@ -29,7 +29,7 @@

 #' Translate MIC and Disk Diffusion to SIR, or Clean Existing SIR Data
 #'
-#' @description Clean up existing SIR values, or interpret minimum inhibitory concentration (MIC) values and disk diffusion diameters according to EUCAST or CLSI. [as.sir()] transforms the input to a new class [`sir`], which is an ordered [factor].
+#' @description Clean up existing SIR values, or interpret minimum inhibitory concentration (MIC) values and disk diffusion diameters according to EUCAST or CLSI. [as.sir()] transforms the input to a new class [`sir`], which is an ordered [factor] containing the levels `S`, `SDD`, `I`, `R`, `N`.
 #' 
 #' These breakpoints are currently implemented:
 #' - For **clinical microbiology**: EUCAST `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST" & type == "human")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST" & type == "human")$guideline)))` and CLSI `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "human")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "human")$guideline)))`;
@ -123,8 +123,10 @@
 #' ### Other
 #'
 #' The function [is.sir()] detects if the input contains class `sir`. If the input is a [data.frame], it iterates over all columns and returns a [logical] vector.
+#' 
+#' The base R function [as.double()] can be used to retrieve quantitative values from a `sir` object: `"S"` = 1, `"I"`/`"SDD"` = 2, `"R"` = 3. All other values are rendered `NA` . **Note:** Do not use `as.integer()`, since that (because of how R works internally) will return the factor level indices, and not these aforementioned quantitative values.
 #'
-#' The function [is_sir_eligible()] returns `TRUE` when a columns contains at most 5% invalid antimicrobial interpretations (not S and/or I and/or R and/or N and/or SDD), and `FALSE` otherwise. The threshold of 5% can be set with the `threshold` argument. If the input is a [data.frame], it iterates over all columns and returns a [logical] vector.
+#' The function [is_sir_eligible()] returns `TRUE` when a column contains at most 5% invalid antimicrobial interpretations (not S and/or I and/or R and/or N and/or SDD), and `FALSE` otherwise. The threshold of 5% can be set with the `threshold` argument. If the input is a [data.frame], it iterates over all columns and returns a [logical] vector.
 #' @section Interpretation of SIR:
 #' In 2019, the European Committee on Antimicrobial Susceptibility Testing (EUCAST) has decided to change the definitions of susceptibility testing categories S, I, and R as shown below (<https://www.eucast.org/newsiandr>):
 #'
@ -224,7 +226,12 @@
 #' is.sir(sir_data)
 #' plot(sir_data) # for percentages
 #' barplot(sir_data) # for frequencies
-#'
+#' 
+#' # as common in R, you can use as.integer() to return factor indices:
+#' as.integer(as.sir(c("S", "SDD", "I", "R", "N", NA)))
+#' # but for computational use, as.double() will return 1 for S, 2 for I/SDD, and 3 for R:
+#' as.double(as.sir(c("S", "SDD", "I", "R", "N", NA)))
+#' 
 #' # the dplyr way
 #' if (require("dplyr")) {
 #'   example_isolates %>%
@ -1394,6 +1401,17 @@ print.sir <- function(x, ...) {
  print(as.character(x), quote = FALSE)
 }

+
+#' @method as.double sir
+#' @export
+as.double.sir <- function(x, ...) {
+  dbls <- rep(NA_real_, length(x))
+  dbls[x == "S"] <- 1
+  dbls[x %in% c("SDD", "I")] <- 2
+  dbls[x == "R"] <- 3
+  dbls
+}
+
 #' @method droplevels sir
 #' @export
 #' @noRd
@ -1410,8 +1428,10 @@ summary.sir <- function(object, ...) {
  x <- object
  n <- sum(!is.na(x))
  S <- sum(x == "S", na.rm = TRUE)
+  SDD <- sum(x == "SDD", na.rm = TRUE)
  I <- sum(x == "I", na.rm = TRUE)
  R <- sum(x == "R", na.rm = TRUE)
+  N <- sum(x == "N", na.rm = TRUE)
  pad <- function(x) {
    if (is.na(x)) {
      return("??")
@ -1426,10 +1446,11 @@ summary.sir <- function(object, ...) {
  }
  value <- c(
    "Class" = "sir",
+    "%S" = paste0(pad(percentage(S / n, digits = 1)), " (n=", S, ")"),
+    "%SDD" = paste0(pad(percentage(SDD / n, digits = 1)), " (n=", SDD, ")"),
+    "%I" = paste0(pad(percentage(I / n, digits = 1)), " (n=", I, ")"),
    "%R" = paste0(pad(percentage(R / n, digits = 1)), " (n=", R, ")"),
-    "%SI" = paste0(pad(percentage((S + I) / n, digits = 1)), " (n=", S + I, ")"),
-    "- %S" = paste0(pad(percentage(S / n, digits = 1)), " (n=", S, ")"),
-    "- %I" = paste0(pad(percentage(I / n, digits = 1)), " (n=", I, ")")
+    "%N" = paste0(pad(percentage(N / n, digits = 1)), " (n=", N, ")")
  )
  class(value) <- c("summaryDefault", "table")
  value
--- a/R/sir_calc.R
+++ b/R/sir_calc.R
@ -135,20 +135,13 @@ sir_calc <- function(...,

    x_transposed <- as.list(as.data.frame(t(x), stringsAsFactors = FALSE))
    if (isTRUE(only_all_tested)) {
-      get_integers <- function(x) {
-        ints <- rep(NA_integer_, length(x))
-        ints[x == "S"] <- 1L
-        ints[x %in% c("SDD", "I")] <- 2L
-        ints[x == "R"] <- 3L
-        ints
-      }
      # no NAs in any column
      y <- apply(
-        X = as.data.frame(lapply(x, get_integers), stringsAsFactors = FALSE),
+        X = as.data.frame(lapply(x, as.double), stringsAsFactors = FALSE),
        MARGIN = 1,
        FUN = min
      )
-      numerator <- sum(!is.na(y) & y %in% get_integers(ab_result), na.rm = TRUE)
+      numerator <- sum(!is.na(y) & y %in% as.double(ab_result), na.rm = TRUE)
      denominator <- sum(vapply(FUN.VALUE = logical(1), x_transposed, function(y) !(anyNA(y))))
    } else {
      # may contain NAs in any column
@ -364,7 +357,11 @@ sir_calc_df <- function(type, # "proportion", "count" or "both"
  } else {
    # don't use as.sir() here, as it would add the class 'sir' and we would like
    # the same data structure as output, regardless of input
-    out$interpretation <- factor(out$interpretation, levels = c("S", "SDD", "I", "R", "N"), ordered = TRUE)
+    if (out$value[out$interpretation == "SDD"] > 0) {
+      out$interpretation <- factor(out$interpretation, levels = c("S", "SDD", "I", "R"), ordered = TRUE)  
+    } else {
+      out$interpretation <- factor(out$interpretation, levels = c("S", "I", "R"), ordered = TRUE)
+    }
  }
  
  out <- out[!is.na(out$interpretation), , drop = FALSE]