From d232666e49bdf007723b9b04214d0f15326306e7 Mon Sep 17 00:00:00 2001 From: Matthijs Berends Date: Fri, 1 Aug 2025 15:15:49 +0200 Subject: [PATCH] update sir to include interpretation details --- NAMESPACE | 3 + R/sir.R | 247 +++++++++++++++++++++++++++++++++++++------------- man/as.sir.Rd | 4 +- 3 files changed, 192 insertions(+), 62 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 2b2cb9f33..5fc99890f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,7 @@ S3method("[",deprecated_amr_dataset) S3method("[",disk) S3method("[",mic) S3method("[",mo) +S3method("[",sir) S3method("[<-",ab) S3method("[<-",av) S3method("[<-",disk) @@ -24,6 +25,7 @@ S3method("[[",deprecated_amr_dataset) S3method("[[",disk) S3method("[[",mic) S3method("[[",mo) +S3method("[[",sir) S3method("[[<-",ab) S3method("[[<-",av) S3method("[[<-",disk) @@ -99,6 +101,7 @@ S3method(print,custom_eucast_rules) S3method(print,custom_mdro_guideline) S3method(print,deprecated_amr_dataset) S3method(print,disk) +S3method(print,interpreted_sir) S3method(print,mic) S3method(print,mo) S3method(print,mo_renamed) diff --git a/R/sir.R b/R/sir.R index 042643d1f..c0e4e9766 100755 --- a/R/sir.R +++ b/R/sir.R @@ -385,26 +385,15 @@ as.sir <- function(x, ...) { UseMethod("as.sir") } -as_sir_structure <- function(x, - guideline = NULL, - mo = NULL, - ab = NULL, - method = NULL, - ref_tbl = NULL, - ref_breakpoints = NULL) { +as_sir_structure <- function(x) { + int <- attr(x, "interpretation_details") structure( factor(as.character(unlist(unname(x))), levels = c("S", "SDD", "I", "R", "NI"), ordered = TRUE ), - # TODO for #170 - # guideline = guideline, - # mo = mo, - # ab = ab, - # method = method, - # ref_tbl = ref_tbl, - # ref_breakpoints = ref_breakpoints, - class = c("sir", "ordered", "factor") + interpretation_details = int, + class = c(if (!is.null(int)) "interpreted_sir" else NULL, "sir", "ordered", "factor") ) } @@ -1649,9 +1638,11 @@ as_sir_method <- function(method_short, breakpoint_S_R = vectorise_log_entry(NA_character_, length(rows)), stringsAsFactors = FALSE ) + attr(new_sir, "interpretation_details") <- out out <- subset(out, !is.na(input_given)) AMR_env$sir_interpretation_history <- rbind_AMR(AMR_env$sir_interpretation_history, out) notes <- c(notes, notes_current) + df[rows, "result"] <- new_sir next } @@ -1827,6 +1818,7 @@ as_sir_method <- function(method_short, breakpoint_S_R = vectorise_log_entry(paste0(breakpoints_current[, "breakpoint_S", drop = TRUE], "-", breakpoints_current[, "breakpoint_R", drop = TRUE]), length(rows)), stringsAsFactors = FALSE ) + attr(new_sir, "interpretation_details") <- out out <- subset(out, !is.na(input_given)) AMR_env$sir_interpretation_history <- rbind_AMR(AMR_env$sir_interpretation_history, out) } @@ -1871,20 +1863,33 @@ as_sir_method <- function(method_short, new_part <- new_part[order(new_part$index), , drop = FALSE] AMR_env$sir_interpretation_history <- rbind_AMR(old_part, new_part) - df$result + as_sir_structure(df$result) } #' @rdname as.sir +#' @param sir_values SIR values that were interpreted from MIC or disk diffusion values using [as.sir()]. #' @param clean A [logical] to indicate whether previously stored results should be forgotten after returning the 'logbook' with results. #' @export -sir_interpretation_history <- function(clean = FALSE) { +sir_interpretation_history <- function(sir_values = NULL, clean = FALSE) { + # for AMR v3.0.0 and lower, the first argument was `clean`, so allow `sir_interpretation_history(TRUE)` to keep working + if (is.logical(sir_values) && missing(clean)) { + clean <- sir_values + sir_values <- NULL + warning_("For `sir_interpretation_history()`, the `clean` argument is no longer the first argument, please update your code to explicitly state 'clean': `sir_interpretation_history(clean = ", clean, ")`.") + } + meet_criteria(sir_values, allow_class = "sir", allow_NULL = TRUE) meet_criteria(clean, allow_class = "logical", has_length = 1) - out <- AMR_env$sir_interpretation_history - out <- out[which(!is.na(out$datetime)), , drop = FALSE] - out$outcome <- as.sir(out$outcome) - out$site <- as.character(out$site) - if (isTRUE(clean)) { - AMR_env$sir_interpretation_history <- AMR_env$sir_interpretation_history[0, , drop = FALSE] + + if (!is.null(sir_values)) { + out <- attr(sir_values, "interpretation_details") + } else { + out <- AMR_env$sir_interpretation_history + out <- out[which(!is.na(out$datetime)), , drop = FALSE] + out$outcome <- as.sir(out$outcome) + out$site <- as.character(out$site) + if (isTRUE(clean)) { + AMR_env$sir_interpretation_history <- AMR_env$sir_interpretation_history[0, , drop = FALSE] + } } if (pkg_is_available("tibble")) { out <- import_fn("as_tibble", "tibble")(out) @@ -2008,21 +2013,60 @@ get_skimmers.sir <- function(column) { #' @export #' @noRd print.sir <- function(x, ...) { - x_name <- deparse(substitute(x)) cat("Class 'sir'\n") - # TODO for #170 - # if (!is.null(attributes(x)$guideline) && !all(is.na(attributes(x)$guideline))) { - # cat(font_blue(word_wrap("These values were interpreted using ", - # font_bold(vector_and(attributes(x)$guideline, quotes = FALSE)), - # " based on ", - # vector_and(attributes(x)$method, quotes = FALSE), - # " values. ", - # "Use `sir_interpretation_history(", x_name, ")` to return a full logbook."))) - # cat("\n") - # } print(as.character(x), quote = FALSE) } +#' @method print interpreted_sir +#' @export +#' @noRd +print.interpreted_sir <- function(x, ...) { + cat("Class 'sir'\n") + print(as.character(x), quote = FALSE) + + if (length(x) == 0) { + return(invisible()) + } + + int <- attr(x, "interpretation_details") + if (NROW(int) == 0) { + if (length(x) == 1) { + cat(font_blue(word_wrap("Source data were lost for this interpreted value."))) + } else { + cat(font_blue(word_wrap("Source data were lost for these interpreted values."))) + } + } else { + relevant_cols <- int[, c("guideline", "method", "ab", "mo"), drop = FALSE] + relevant_cols <- unique(relevant_cols) + vals1_plural <- ifelse(length(x) == 1, "This value was", "These values were") + vals2_plural <- ifelse(length(x) == 1, "value", "values") + method_fn <- ifelse(relevant_cols$method == "MIC", "MIC", "disk diffusion") + if (NROW(relevant_cols) == 1) { + in_host <- ifelse(relevant_cols$host == "human", "", paste0(" in ", relevant_cols$host)) + cat(font_blue(word_wrap( + vals1_plural, " interpreted using ", + relevant_cols$guideline, + " based on the ", + method_fn, + " ", vals2_plural, " for ", + ab_name(relevant_cols$ab, language = NULL, info = FALSE, tolower = TRUE), " in ", + italicise_taxonomy(mo_name(relevant_cols$mo, language = NULL, info = FALSE), type = "ansi"), + in_host, + "." + ))) + } else { + cat(font_blue(word_wrap( + vals1_plural, " interpreted using ", + vector_and(relevant_cols$guideline, quotes = FALSE), + " based on ", + vector_and(method_fn, quotes = FALSE), + " ", vals2_plural, "." + ))) + } + cat(font_blue(word_wrap("\nUse `sir_interpretation_history()` on this object to return a full logbook.\n"))) + } +} + #' @method as.double sir #' @export @@ -2078,51 +2122,132 @@ summary.sir <- function(object, ...) { value } +#' @method [ sir +#' @export +#' @noRd +"[.sir" <- function(x, ...) { + y <- NextMethod() + det <- attr(x, "interpretation_details") + if (!is.null(det)) { + subset_idx <- seq_along(x)[...] + # safer than relying on implicit eval inside NextMethod() + attr(y, "interpretation_details") <- det[subset_idx, , drop = FALSE] + } + y +} +#' @method [[ sir +#' @export +#' @noRd +"[[.sir" <- function(x, i, ...) { + if (length(i) != 1L) { + stop("attempt to select more than one element with [[.", call. = FALSE) + } + x[i] # calls `[.sir`, ensures attr alignment +} + #' @method [<- sir #' @export #' @noRd "[<-.sir" <- function(i, j, ..., value) { value <- as.sir(value) y <- NextMethod() - attributes(y) <- attributes(i) + + old_det <- attr(i, "interpretation_details") + new_det <- attr(value, "interpretation_details") + + len_y <- length(y) + + # Neither i nor value have details -> do nothing + if (is.null(old_det) && is.null(new_det)) { + return(y) + } + + # Start building full_det as copy of old_det or empty + full_det <- if (!is.null(old_det)) old_det else data.frame(row = seq_along(i)) + + # Ensure full_det has correct row count and order + if (nrow(full_det) != length(i)) { + attr(y, "interpretation_details") <- NULL + return(y) + } + + # Which rows are being assigned? + assign_idx <- if (missing(j)) seq_along(i) else j + assign_idx <- as.integer(assign_idx) + + # If new_det is missing or too short, fill it + if (is.null(new_det)) { + new_det <- data.frame(row = assign_idx) + } else if (nrow(new_det) != length(value)) { + new_det <- data.frame(row = assign_idx) + } + + # Add temporary .row to track positions + full_det$.row <- seq_len(nrow(full_det)) + new_det$.row <- assign_idx + + # Replace old rows with new rows + full_det <- rbind( + subset(full_det, !.row %in% assign_idx), + new_det + ) + full_det <- full_det[order(full_det$.row), , drop = FALSE] + full_det$.row <- NULL + + # Clean up: ensure right number of rows + if (nrow(full_det) == len_y) { + attr(y, "interpretation_details") <- full_det + } else { + attr(y, "interpretation_details") <- NULL + } + y } #' @method [[<- sir #' @export #' @noRd "[[<-.sir" <- function(i, j, ..., value) { - value <- as.sir(value) - y <- NextMethod() - attributes(y) <- attributes(i) - y + if (!is.null(det) && length(i) == 1 && nrow(det) >= i) { + i[j] <- value + i + } else { + NextMethod() + } } #' @method c sir #' @export #' @noRd -c.sir <- function(...) { - lst <- list(...) +c.sir <- function(..., recursive = FALSE) { + lst <- lapply( + list(...), + function(x) { + list( + values = as.character(x), + interpretation_details = attr(x, "interpretation_details") + ) + } + ) + x <- unlist(lapply(lst, `[[`, "values"), use.names = FALSE) + details <- lapply(lst, `[[`, "interpretation_details") + has_details <- vapply(details, is.data.frame, logical(1)) + if (!any(has_details)) { + return(as_sir_structure(x)) + } - # TODO for #170 - # guideline <- vapply(FUN.VALUE = character(1), lst, function(x) attributes(x)$guideline %or% NA_character_) - # mo <- vapply(FUN.VALUE = character(1), lst, function(x) attributes(x)$mo %or% NA_character_) - # ab <- vapply(FUN.VALUE = character(1), lst, function(x) attributes(x)$ab %or% NA_character_) - # method <- vapply(FUN.VALUE = character(1), lst, function(x) attributes(x)$method %or% NA_character_) - # ref_tbl <- vapply(FUN.VALUE = character(1), lst, function(x) attributes(x)$ref_tbl %or% NA_character_) - # ref_breakpoints <- vapply(FUN.VALUE = character(1), lst, function(x) attributes(x)$ref_breakpoints %or% NA_character_) + # Pre-allocate details (no Map, no matrix allocation) + combined_details <- do.call(rbind, lapply(seq_along(details), function(i) { + d <- details[[i]] + if (is.null(d)) { + # generate NA rows of correct length, but fast + n <- length(details[[i]]) + as.data.frame(matrix(NA, nrow = n, ncol = 0)) + } else { + d + } + })) - out <- as.sir(unlist(lapply(list(...), as.character))) - - # TODO for #170 - # if (!all(is.na(guideline))) { - # attributes(out)$guideline <- guideline - # attributes(out)$mo <- mo - # attributes(out)$ab <- ab - # attributes(out)$method <- method - # attributes(out)$ref_tbl <- ref_tbl - # attributes(out)$ref_breakpoints <- ref_breakpoints - # } - - out + attr(x, "interpretation_details") <- combined_details + as_sir_structure(x) } #' @method unique sir diff --git a/man/as.sir.Rd b/man/as.sir.Rd index 5a20480b9..81bedad8c 100644 --- a/man/as.sir.Rd +++ b/man/as.sir.Rd @@ -70,7 +70,7 @@ is_sir_eligible(x, threshold = 0.05) language = get_AMR_locale(), verbose = FALSE, info = interactive(), parallel = FALSE, max_cores = -1, conserve_capped_values = NULL) -sir_interpretation_history(clean = FALSE) +sir_interpretation_history(sir_values = NULL, clean = FALSE) } \arguments{ \item{x}{Vector of values (for class \code{\link{mic}}: MIC values in mg/L, for class \code{\link{disk}}: a disk diffusion radius in millimetres).} @@ -147,6 +147,8 @@ The default \code{"standard"} setting ensures cautious handling of uncertain val \item{max_cores}{Maximum number of cores to use if \code{parallel = TRUE}. Use a negative value to subtract that number from the available number of cores, e.g. a value of \code{-2} on an 8-core machine means that at most 6 cores will be used. Defaults to \code{-1}. There will never be used more cores than variables to analyse. The available number of cores are detected using \code{\link[parallelly:availableCores]{parallelly::availableCores()}} if that package is installed, and base \R's \code{\link[parallel:detectCores]{parallel::detectCores()}} otherwise.} +\item{sir_values}{SIR values that were interpreted from MIC or disk diffusion values using \code{\link[=as.sir]{as.sir()}}.} + \item{clean}{A \link{logical} to indicate whether previously stored results should be forgotten after returning the 'logbook' with results.} } \value{