update sir to include interpretation details

2025-12-21 13:00:23 +01:00 · 2025-08-01 15:15:49 +02:00
parent fc72cf9324
commit d232666e49
3 changed files with 192 additions and 62 deletions
--- a/3
+++ b/3
@@ -12,6 +12,7 @@ S3method("[",deprecated_amr_dataset)
 S3method("[",disk)
 S3method("[",mic)
 S3method("[",mo)
 S3method("[",sir)
 S3method("[<-",ab)
 S3method("[<-",av)
 S3method("[<-",disk)
@@ -24,6 +25,7 @@ S3method("[[",deprecated_amr_dataset)
 S3method("[[",disk)
 S3method("[[",mic)
 S3method("[[",mo)
 S3method("[[",sir)
 S3method("[[<-",ab)
 S3method("[[<-",av)
 S3method("[[<-",disk)
@@ -99,6 +101,7 @@ S3method(print,custom_eucast_rules)
 S3method(print,custom_mdro_guideline)
 S3method(print,deprecated_amr_dataset)
 S3method(print,disk)
 S3method(print,interpreted_sir)
 S3method(print,mic)
 S3method(print,mo)
 S3method(print,mo_renamed)
--- a/R/sir.R
+++ b/R/sir.R
@@ -385,26 +385,15 @@ as.sir <- function(x, ...) {
  UseMethod("as.sir")
 }
-as_sir_structure <- function(x,
+as_sir_structure <- function(x) {
-                             guideline = NULL,
+  int <- attr(x, "interpretation_details")
                             mo = NULL,
                             ab = NULL,
                             method = NULL,
                             ref_tbl = NULL,
                             ref_breakpoints = NULL) {
  structure(
    factor(as.character(unlist(unname(x))),
      levels = c("S", "SDD", "I", "R", "NI"),
      ordered = TRUE
    ),
-    # TODO for #170
+    interpretation_details = int,
-    # guideline = guideline,
+    class = c(if (!is.null(int)) "interpreted_sir" else NULL, "sir", "ordered", "factor")
    # mo = mo,
    # ab = ab,
    # method = method,
    # ref_tbl = ref_tbl,
    # ref_breakpoints = ref_breakpoints,
    class = c("sir", "ordered", "factor")
  )
 }
@@ -1649,9 +1638,11 @@ as_sir_method <- function(method_short,
        breakpoint_S_R = vectorise_log_entry(NA_character_, length(rows)),
        stringsAsFactors = FALSE
      )
      attr(new_sir, "interpretation_details") <- out
      out <- subset(out, !is.na(input_given))
      AMR_env$sir_interpretation_history <- rbind_AMR(AMR_env$sir_interpretation_history, out)
      notes <- c(notes, notes_current)
      df[rows, "result"] <- new_sir
      next
    }
@@ -1827,6 +1818,7 @@ as_sir_method <- function(method_short,
        breakpoint_S_R = vectorise_log_entry(paste0(breakpoints_current[, "breakpoint_S", drop = TRUE], "-", breakpoints_current[, "breakpoint_R", drop = TRUE]), length(rows)),
        stringsAsFactors = FALSE
      )
      attr(new_sir, "interpretation_details") <- out
      out <- subset(out, !is.na(input_given))
      AMR_env$sir_interpretation_history <- rbind_AMR(AMR_env$sir_interpretation_history, out)
    }
@@ -1871,20 +1863,33 @@ as_sir_method <- function(method_short,
  new_part <- new_part[order(new_part$index), , drop = FALSE]
  AMR_env$sir_interpretation_history <- rbind_AMR(old_part, new_part)
-  df$result
+  as_sir_structure(df$result)
 }
 #' @rdname as.sir
 #' @param sir_values SIR values that were interpreted from MIC or disk diffusion values using [as.sir()].
 #' @param clean A [logical] to indicate whether previously stored results should be forgotten after returning the 'logbook' with results.
 #' @export
-sir_interpretation_history <- function(clean = FALSE) {
+sir_interpretation_history <- function(sir_values = NULL, clean = FALSE) {
  # for AMR v3.0.0 and lower, the first argument was `clean`, so allow `sir_interpretation_history(TRUE)` to keep working
  if (is.logical(sir_values) && missing(clean)) {
    clean <- sir_values
    sir_values <- NULL
    warning_("For `sir_interpretation_history()`, the `clean` argument is no longer the first argument, please update your code to explicitly state 'clean': `sir_interpretation_history(clean = ", clean, ")`.")
  }
  meet_criteria(sir_values, allow_class = "sir", allow_NULL = TRUE)
  meet_criteria(clean, allow_class = "logical", has_length = 1)
-  out <- AMR_env$sir_interpretation_history
+
-  out <- out[which(!is.na(out$datetime)), , drop = FALSE]
+  if (!is.null(sir_values)) {
-  out$outcome <- as.sir(out$outcome)
+    out <- attr(sir_values, "interpretation_details")
-  out$site <- as.character(out$site)
+  } else {
-  if (isTRUE(clean)) {
+    out <- AMR_env$sir_interpretation_history
-    AMR_env$sir_interpretation_history <- AMR_env$sir_interpretation_history[0, , drop = FALSE]
+    out <- out[which(!is.na(out$datetime)), , drop = FALSE]
    out$outcome <- as.sir(out$outcome)
    out$site <- as.character(out$site)
    if (isTRUE(clean)) {
      AMR_env$sir_interpretation_history <- AMR_env$sir_interpretation_history[0, , drop = FALSE]
    }
  }
  if (pkg_is_available("tibble")) {
    out <- import_fn("as_tibble", "tibble")(out)
@@ -2008,21 +2013,60 @@ get_skimmers.sir <- function(column) {
 #' @export
 #' @noRd
 print.sir <- function(x, ...) {
  x_name <- deparse(substitute(x))
  cat("Class 'sir'\n")
  # TODO for #170
  # if (!is.null(attributes(x)$guideline) && !all(is.na(attributes(x)$guideline))) {
  #   cat(font_blue(word_wrap("These values were interpreted using ",
  #                           font_bold(vector_and(attributes(x)$guideline, quotes = FALSE)),
  #                           " based on ",
  #                           vector_and(attributes(x)$method, quotes = FALSE),
  #                           " values. ",
  #                           "Use `sir_interpretation_history(", x_name, ")` to return a full logbook.")))
  #   cat("\n")
  # }
  print(as.character(x), quote = FALSE)
 }
 #' @method print interpreted_sir
 #' @export
 #' @noRd
 print.interpreted_sir <- function(x, ...) {
  cat("Class 'sir'\n")
  print(as.character(x), quote = FALSE)
  if (length(x) == 0) {
    return(invisible())
  }
  int <- attr(x, "interpretation_details")
  if (NROW(int) == 0) {
    if (length(x) == 1) {
      cat(font_blue(word_wrap("Source data were lost for this interpreted value.")))
    } else {
      cat(font_blue(word_wrap("Source data were lost for these interpreted values.")))
    }
  } else {
    relevant_cols <- int[, c("guideline", "method", "ab", "mo"), drop = FALSE]
    relevant_cols <- unique(relevant_cols)
    vals1_plural <- ifelse(length(x) == 1, "This value was", "These values were")
    vals2_plural <- ifelse(length(x) == 1, "value", "values")
    method_fn <- ifelse(relevant_cols$method == "MIC", "MIC", "disk diffusion")
    if (NROW(relevant_cols) == 1) {
      in_host <- ifelse(relevant_cols$host == "human", "", paste0(" in ", relevant_cols$host))
      cat(font_blue(word_wrap(
        vals1_plural, " interpreted using ",
        relevant_cols$guideline,
        " based on the ",
        method_fn,
        " ", vals2_plural, " for ",
        ab_name(relevant_cols$ab, language = NULL, info = FALSE, tolower = TRUE), " in ",
        italicise_taxonomy(mo_name(relevant_cols$mo, language = NULL, info = FALSE), type = "ansi"),
        in_host,
        "."
      )))
    } else {
      cat(font_blue(word_wrap(
        vals1_plural, " interpreted using ",
        vector_and(relevant_cols$guideline, quotes = FALSE),
        " based on ",
        vector_and(method_fn, quotes = FALSE),
        " ", vals2_plural, "."
      )))
    }
    cat(font_blue(word_wrap("\nUse `sir_interpretation_history()` on this object to return a full logbook.\n")))
  }
 }
 #' @method as.double sir
 #' @export
@@ -2078,51 +2122,132 @@ summary.sir <- function(object, ...) {
  value
 }
 #' @method [ sir
 #' @export
 #' @noRd
 "[.sir" <- function(x, ...) {
  y <- NextMethod()
  det <- attr(x, "interpretation_details")
  if (!is.null(det)) {
    subset_idx <- seq_along(x)[...]
    # safer than relying on implicit eval inside NextMethod()
    attr(y, "interpretation_details") <- det[subset_idx, , drop = FALSE]
  }
  y
 }
 #' @method [[ sir
 #' @export
 #' @noRd
 "[[.sir" <- function(x, i, ...) {
  if (length(i) != 1L) {
    stop("attempt to select more than one element with [[.", call. = FALSE)
  }
  x[i] # calls `[.sir`, ensures attr alignment
 }
 #' @method [<- sir
 #' @export
 #' @noRd
 "[<-.sir" <- function(i, j, ..., value) {
  value <- as.sir(value)
  y <- NextMethod()
-  attributes(y) <- attributes(i)
+
  old_det <- attr(i, "interpretation_details")
  new_det <- attr(value, "interpretation_details")
  len_y <- length(y)
  # Neither i nor value have details -> do nothing
  if (is.null(old_det) && is.null(new_det)) {
    return(y)
  }
  # Start building full_det as copy of old_det or empty
  full_det <- if (!is.null(old_det)) old_det else data.frame(row = seq_along(i))
  # Ensure full_det has correct row count and order
  if (nrow(full_det) != length(i)) {
    attr(y, "interpretation_details") <- NULL
    return(y)
  }
  # Which rows are being assigned?
  assign_idx <- if (missing(j)) seq_along(i) else j
  assign_idx <- as.integer(assign_idx)
  # If new_det is missing or too short, fill it
  if (is.null(new_det)) {
    new_det <- data.frame(row = assign_idx)
  } else if (nrow(new_det) != length(value)) {
    new_det <- data.frame(row = assign_idx)
  }
  # Add temporary .row to track positions
  full_det$.row <- seq_len(nrow(full_det))
  new_det$.row <- assign_idx
  # Replace old rows with new rows
  full_det <- rbind(
    subset(full_det, !.row %in% assign_idx),
    new_det
  )
  full_det <- full_det[order(full_det$.row), , drop = FALSE]
  full_det$.row <- NULL
  # Clean up: ensure right number of rows
  if (nrow(full_det) == len_y) {
    attr(y, "interpretation_details") <- full_det
  } else {
    attr(y, "interpretation_details") <- NULL
  }
  y
 }
 #' @method [[<- sir
 #' @export
 #' @noRd
 "[[<-.sir" <- function(i, j, ..., value) {
-  value <- as.sir(value)
+  if (!is.null(det) && length(i) == 1 && nrow(det) >= i) {
-  y <- NextMethod()
+    i[j] <- value
-  attributes(y) <- attributes(i)
+    i
-  y
+  } else {
    NextMethod()
  }
 }
 #' @method c sir
 #' @export
 #' @noRd
-c.sir <- function(...) {
+c.sir <- function(..., recursive = FALSE) {
-  lst <- list(...)
+  lst <- lapply(
    list(...),
    function(x) {
      list(
        values = as.character(x),
        interpretation_details = attr(x, "interpretation_details")
      )
    }
  )
  x <- unlist(lapply(lst, `[[`, "values"), use.names = FALSE)
  details <- lapply(lst, `[[`, "interpretation_details")
  has_details <- vapply(details, is.data.frame, logical(1))
  if (!any(has_details)) {
    return(as_sir_structure(x))
  }
-  # TODO for #170
+  # Pre-allocate details (no Map, no matrix allocation)
-  # guideline <- vapply(FUN.VALUE = character(1), lst, function(x) attributes(x)$guideline %or% NA_character_)
+  combined_details <- do.call(rbind, lapply(seq_along(details), function(i) {
-  # mo <- vapply(FUN.VALUE = character(1), lst, function(x) attributes(x)$mo %or% NA_character_)
+    d <- details[[i]]
-  # ab <- vapply(FUN.VALUE = character(1), lst, function(x) attributes(x)$ab %or% NA_character_)
+    if (is.null(d)) {
-  # method <- vapply(FUN.VALUE = character(1), lst, function(x) attributes(x)$method %or% NA_character_)
+      # generate NA rows of correct length, but fast
-  # ref_tbl <- vapply(FUN.VALUE = character(1), lst, function(x) attributes(x)$ref_tbl %or% NA_character_)
+      n <- length(details[[i]])
-  # ref_breakpoints <- vapply(FUN.VALUE = character(1), lst, function(x) attributes(x)$ref_breakpoints %or% NA_character_)
+      as.data.frame(matrix(NA, nrow = n, ncol = 0))
    } else {
      d
    }
  }))
-  out <- as.sir(unlist(lapply(list(...), as.character)))
+  attr(x, "interpretation_details") <- combined_details
-
+  as_sir_structure(x)
  # TODO for #170
  # if (!all(is.na(guideline))) {
  #   attributes(out)$guideline <- guideline
  #   attributes(out)$mo <- mo
  #   attributes(out)$ab <- ab
  #   attributes(out)$method <- method
  #   attributes(out)$ref_tbl <- ref_tbl
  #   attributes(out)$ref_breakpoints <- ref_breakpoints
  # }
  out
 }
 #' @method unique sir
--- a/man/as.sir.Rd
+++ b/man/as.sir.Rd
@@ -70,7 +70,7 @@ is_sir_eligible(x, threshold = 0.05)
  language = get_AMR_locale(), verbose = FALSE, info = interactive(),
  parallel = FALSE, max_cores = -1, conserve_capped_values = NULL)
-sir_interpretation_history(clean = FALSE)
+sir_interpretation_history(sir_values = NULL, clean = FALSE)
 }
 \arguments{
 \item{x}{Vector of values (for class \code{\link{mic}}: MIC values in mg/L, for class \code{\link{disk}}: a disk diffusion radius in millimetres).}
@@ -147,6 +147,8 @@ The default \code{"standard"} setting ensures cautious handling of uncertain val
 \item{max_cores}{Maximum number of cores to use if \code{parallel = TRUE}. Use a negative value to subtract that number from the available number of cores, e.g. a value of \code{-2} on an 8-core machine means that at most 6 cores will be used. Defaults to \code{-1}. There will never be used more cores than variables to analyse. The available number of cores are detected using \code{\link[parallelly:availableCores]{parallelly::availableCores()}} if that package is installed, and base \R's \code{\link[parallel:detectCores]{parallel::detectCores()}} otherwise.}
 \item{sir_values}{SIR values that were interpreted from MIC or disk diffusion values using \code{\link[=as.sir]{as.sir()}}.}
 \item{clean}{A \link{logical} to indicate whether previously stored results should be forgotten after returning the 'logbook' with results.}
 }
 \value{