From d214f74e25d6ed609314c7a4cda444d97e1effae Mon Sep 17 00:00:00 2001 From: Matthijs Berends Date: Mon, 20 May 2024 21:29:13 +0200 Subject: [PATCH] allow column name for `ab` in `as.sir()` --- DESCRIPTION | 2 +- NEWS.md | 3 +- R/aa_helper_functions.R | 7 +- R/ab.R | 27 +++--- R/ab_from_text.R | 4 + R/count.R | 2 +- R/sir.R | 175 ++++++++++++++++++++----------------- R/sir_calc.R | 13 ++- inst/tinytest/test-count.R | 6 +- man/as.sir.Rd | 6 +- 10 files changed, 139 insertions(+), 106 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 93a2c9f7..1bfeea3a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: AMR -Version: 2.1.1.9031 +Version: 2.1.1.9032 Date: 2024-05-20 Title: Antimicrobial Resistance Data Analysis Description: Functions to simplify and standardise antimicrobial resistance (AMR) diff --git a/NEWS.md b/NEWS.md index 52848c78..12d3dbfb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# AMR 2.1.1.9031 +# AMR 2.1.1.9032 *(this beta version will eventually become v3.0. We're happy to reach a new major milestone soon, which will be all about the new One Health support!)* @@ -21,6 +21,7 @@ This package now supports not only tools for AMR data analysis in clinical setti * Function `mo_group_members()` to retrieve the member microorganisms. For example, `mo_group_members("Strep group C")` returns a vector of all microorganisms that are in that group. ## Changed +* For SIR interpretation, it is now possible to use column names for argument `ab` and `mo`: `as.sir(..., ab = "column1", mo = "column2")`. This greatly improves the flexibility for users. * For MICs: * Added as valid levels: 4096, 6 powers of 0.0625, and 5 powers of 192 (192, 384, 576, 768, 960) * Added new argument `keep_operators` to `as.mic()`. This can be `"all"` (default), `"none"`, or `"edges"`. This argument is also available in the new `limit_mic_range()` and `scale_*_mic()` functions. diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R index e336aa4c..509110a8 100644 --- a/R/aa_helper_functions.R +++ b/R/aa_helper_functions.R @@ -1049,10 +1049,15 @@ get_current_column <- function() { if (tryCatch(!is.null(env$i), error = function(e) FALSE)) { if (!is.null(env$tibble_vars)) { # for mutate_if() + # TODO remove later, was part of older dplyr versions (at least not in dplyr 1.1.4) env$tibble_vars[env$i] } else { # for mutate(across()) - df <- tryCatch(get_current_data(NA, 0), error = function(e) NULL) + if (!is.null(env$data) && is.data.frame(env$data)) { + df <- env$data + } else { + df <- tryCatch(get_current_data(NA, 0), error = function(e) NULL) + } if (is.data.frame(df)) { colnames(df)[env$i] } else { diff --git a/R/ab.R b/R/ab.R index c53a6efc..9bddfb89 100755 --- a/R/ab.R +++ b/R/ab.R @@ -322,13 +322,12 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { } # INITIAL SEARCH - More uncertain results ---- - if (loop_time <= 2 && fast_mode == FALSE) { # only run on first and second try # try by removing all spaces if (x[i] %like% " ") { - found <- suppressWarnings(as.ab(gsub(" +", "", x[i], perl = TRUE), loop_time = loop_time + 1)) + found <- suppressWarnings(as.ab(gsub(" +", "", x[i], perl = TRUE), loop_time = loop_time + 2)) if (length(found) > 0 && !is.na(found)) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) next @@ -337,7 +336,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { # try by removing all spaces and numbers if (x[i] %like% " " || x[i] %like% "[0-9]") { - found <- suppressWarnings(as.ab(gsub("[ 0-9]", "", x[i], perl = TRUE), loop_time = loop_time + 1)) + found <- suppressWarnings(as.ab(gsub("[ 0-9]", "", x[i], perl = TRUE), loop_time = loop_time + 2)) if (length(found) > 0 && !is.na(found)) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) next @@ -363,7 +362,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { )[[1]], collapse = "/" ) - x_translated_guess <- suppressWarnings(as.ab(x_translated, loop_time = loop_time + 1)) + x_translated_guess <- suppressWarnings(as.ab(x_translated, loop_time = loop_time + 2)) if (!is.na(x_translated_guess)) { x_new[i] <- x_translated_guess next @@ -375,7 +374,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { strsplit(x_translated, "[^A-Z0-9 ]"), function(y) { for (i in seq_len(length(y))) { - y_name <- suppressWarnings(ab_name(y[i], language = NULL, loop_time = loop_time + 1)) + y_name <- suppressWarnings(ab_name(y[i], language = NULL, loop_time = loop_time + 2)) y[i] <- ifelse(!is.na(y_name), y_name, y[i] @@ -386,7 +385,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { )[[1]], collapse = "/" ) - x_translated_guess <- suppressWarnings(as.ab(x_translated, loop_time = loop_time + 1)) + x_translated_guess <- suppressWarnings(as.ab(x_translated, loop_time = loop_time + 2)) if (!is.na(x_translated_guess)) { x_new[i] <- x_translated_guess next @@ -394,7 +393,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { # try by removing all trailing capitals if (x[i] %like_case% "[a-z]+[A-Z]+$") { - found <- suppressWarnings(as.ab(gsub("[A-Z]+$", "", x[i], perl = TRUE), loop_time = loop_time + 1)) + found <- suppressWarnings(as.ab(gsub("[A-Z]+$", "", x[i], perl = TRUE), loop_time = loop_time + 2)) if (!is.na(found)) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) next @@ -402,7 +401,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { } # keep only letters - found <- suppressWarnings(as.ab(gsub("[^A-Z]", "", x[i], perl = TRUE), loop_time = loop_time + 1)) + found <- suppressWarnings(as.ab(gsub("[^A-Z]", "", x[i], perl = TRUE), loop_time = loop_time + 2)) if (!is.na(found)) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) next @@ -413,7 +412,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { if (flag_multiple_results == TRUE) { found <- from_text[1L] } else { - found <- tryCatch(suppressWarnings(ab_from_text(x[i], loop_time = loop_time + 1, translate_ab = FALSE)[[1]][1L]), + found <- tryCatch(suppressWarnings(ab_from_text(x[i], loop_time = loop_time + 2, translate_ab = FALSE)[[1]][1L]), error = function(e) NA_character_ ) } @@ -423,12 +422,12 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { } # first 5 except for cephalosporins, then first 7 (those cephalosporins all start quite the same!) - found <- suppressWarnings(as.ab(substr(x[i], 1, 5), loop_time = loop_time + 1)) + found <- suppressWarnings(as.ab(substr(x[i], 1, 5), loop_time = loop_time + 2)) if (!is.na(found) && ab_group(found, loop_time = loop_time + 1) %unlike% "cephalosporins") { x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } - found <- suppressWarnings(as.ab(substr(x[i], 1, 7), loop_time = loop_time + 1)) + found <- suppressWarnings(as.ab(substr(x[i], 1, 7), loop_time = loop_time + 2)) if (!is.na(found)) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) next @@ -436,7 +435,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { # make all consonants facultative search_str <- gsub("([BCDFGHJKLMNPQRSTVWXZ])", "\\1*", x[i], perl = TRUE) - found <- suppressWarnings(as.ab(search_str, loop_time = loop_time + 1, already_regex = TRUE)) + found <- suppressWarnings(as.ab(search_str, loop_time = loop_time + 2, already_regex = TRUE)) # keep at least 4 normal characters if (nchar(gsub(".\\*", "", search_str, perl = TRUE)) < 4) { found <- NA @@ -448,7 +447,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { # make all vowels facultative search_str <- gsub("([AEIOUY])", "\\1*", x[i], perl = TRUE) - found <- suppressWarnings(as.ab(search_str, loop_time = loop_time + 1, already_regex = TRUE)) + found <- suppressWarnings(as.ab(search_str, loop_time = loop_time + 2, already_regex = TRUE)) # keep at least 5 normal characters if (nchar(gsub(".\\*", "", search_str, perl = TRUE)) < 5) { found <- NA @@ -464,7 +463,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { x_spelling <- gsub("I+", "[AEIOU]+", x_spelling, fixed = TRUE) x_spelling <- gsub("O+", "[AEIOU]+", x_spelling, fixed = TRUE) x_spelling <- gsub("U+", "[AEIOU]+", x_spelling, fixed = TRUE) - found <- suppressWarnings(as.ab(x_spelling, loop_time = loop_time + 1, already_regex = TRUE)) + found <- suppressWarnings(as.ab(x_spelling, loop_time = loop_time + 2, already_regex = TRUE)) if (!is.na(found)) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) next diff --git a/R/ab_from_text.R b/R/ab_from_text.R index 43336c80..f2d7469a 100755 --- a/R/ab_from_text.R +++ b/R/ab_from_text.R @@ -129,6 +129,10 @@ ab_from_text <- function(text, text_split_all <- text_split_all[nchar(text_split_all) >= 4 & grepl("[a-z]+", text_split_all)] result <- lapply(text_split_all, function(text_split) { progress$tick() + text_split <- text_split[text_split %like% "[A-Z]" & text_split %unlike% "[0-9]"] + if (length(text_split) == 0) { + return(as.ab(NA_character_)) + } suppressWarnings( as.ab(text_split, ...) ) diff --git a/R/count.R b/R/count.R index b5817996..772ff43b 100755 --- a/R/count.R +++ b/R/count.R @@ -135,7 +135,7 @@ count_resistant <- function(..., only_all_tested = FALSE) { count_susceptible <- function(..., only_all_tested = FALSE) { tryCatch( sir_calc(..., - ab_result = c("S", "I"), + ab_result = c("S", "SDD", "I"), only_all_tested = only_all_tested, only_count = TRUE ), diff --git a/R/sir.R b/R/sir.R index 1f314635..d5ab34d7 100755 --- a/R/sir.R +++ b/R/sir.R @@ -39,8 +39,8 @@ #' All breakpoints used for interpretation are available in our [clinical_breakpoints] data set. #' @rdname as.sir #' @param x vector of values (for class [`mic`]: MIC values in mg/L, for class [`disk`]: a disk diffusion radius in millimetres) -#' @param mo any (vector of) text that can be coerced to valid microorganism codes with [as.mo()], can be left empty to determine it automatically -#' @param ab any (vector of) text that can be coerced to a valid antimicrobial drug code with [as.ab()] +#' @param mo a vector (or column name) with [character]s that can be coerced to valid microorganism codes with [as.mo()], can be left empty to determine it automatically +#' @param ab a vector (or column name) with [character]s that can be coerced to a valid antimicrobial drug code with [as.ab()] #' @param uti (Urinary Tract Infection) A vector with [logical]s (`TRUE` or `FALSE`) to specify whether a UTI specific interpretation from the guideline should be chosen. For using [as.sir()] on a [data.frame], this can also be a column containing [logical]s or when left blank, the data set will be searched for a column 'specimen', and rows within this column containing 'urin' (such as 'urine', 'urina') will be regarded isolates from a UTI. See *Examples*. #' @inheritParams first_isolate #' @param guideline defaults to EUCAST `r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST")$guideline)))` (the latest implemented EUCAST guideline in the [AMR::clinical_breakpoints] data set), but can be set with the [package option][AMR-options] [`AMR_guideline`][AMR-options]. Currently supports EUCAST (`r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST")$guideline)))`) and CLSI (`r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI")$guideline)))`), see *Details*. @@ -191,7 +191,7 @@ #' df %>% mutate(across(AMP:TOB, as.sir)) #' #' df %>% -#' mutate_at(vars(AMP:TOB), as.sir, mo = .$microorganism) +#' mutate_at(vars(AMP:TOB), as.sir, mo = "microorganism") #' #' # to include information about urinary tract infections (UTI) #' data.frame( @@ -759,7 +759,7 @@ as_sir_method <- function(method_short, ...) { meet_criteria(x, allow_NA = TRUE, .call_depth = -2) meet_criteria(mo, allow_class = c("mo", "character"), allow_NULL = TRUE, .call_depth = -2) - meet_criteria(ab, allow_class = c("ab", "character"), has_length = 1, .call_depth = -2) + meet_criteria(ab, allow_class = c("ab", "character"), .call_depth = -2) meet_criteria(guideline, allow_class = "character", has_length = 1, .call_depth = -2) meet_criteria(uti, allow_class = "logical", has_length = c(1, length(x)), allow_NULL = TRUE, allow_NA = TRUE, .call_depth = -2) meet_criteria(conserve_capped_values, allow_class = "logical", has_length = 1, .call_depth = -2) @@ -808,37 +808,49 @@ as_sir_method <- function(method_short, message_("Please note that in the absence of specific veterinary breakpoints for certain animal hosts, breakpoints for dogs, cattle, swine, cats, horse, aquatic, and poultry, in that order, are used as substitutes.\n\n") } - # for dplyr's across() - cur_column_dplyr <- import_fn("cur_column", "dplyr", error_on_fail = FALSE) - if (!is.null(cur_column_dplyr) && tryCatch(is.data.frame(get_current_data("ab", call = 0)), error = function(e) FALSE)) { - # try to get current column, which will only be available when in across() - ab <- tryCatch(cur_column_dplyr(), - error = function(e) ab - ) - } + current_df <- tryCatch(get_current_data(NA, 0), error = function(e) NULL) - # for auto-determining mo - mo_var_found <- "" - if (is.null(mo)) { - tryCatch( - { - df <- get_current_data(arg_name = "mo", call = -3) # will return an error if not found - mo <- NULL - try( - { - mo <- suppressMessages(search_type_in_df(df, "mo")) - }, - silent = TRUE - ) - if (!is.null(df) && !is.null(mo) && is.data.frame(df)) { - mo_var_found <- paste0(" based on column '", font_bold(mo), "'") - mo <- df[, mo, drop = TRUE] + # get ab + if (!is.null(current_df) && length(ab) == 1 && ab %in% colnames(current_df) && any(current_df[[ab]] %like% "[A-Z]", na.rm = TRUE)) { + ab <- current_df[[ab]] + } else { + # for dplyr's across() + cur_column_dplyr <- import_fn("cur_column", "dplyr", error_on_fail = FALSE) + if (!is.null(cur_column_dplyr) && is.data.frame(current_df)) { + # try to get current column, which will only be available when in across() + ab <- tryCatch(cur_column_dplyr(), + error = function(e) ab + ) + } + } + + # get mo + if (!is.null(current_df) && length(mo) == 1 && mo %in% colnames(current_df)) { + mo_var_found <- paste0(" based on column '", font_bold(mo), "'") + mo <- current_df[[mo]] + } else { + mo_var_found <- "" + if (is.null(mo)) { + tryCatch( + { + df <- get_current_data(arg_name = "mo", call = -3) # will return an error if not found + mo <- NULL + try( + { + mo <- suppressMessages(search_type_in_df(df, "mo")) + }, + silent = TRUE + ) + if (!is.null(df) && !is.null(mo) && is.data.frame(df)) { + mo_var_found <- paste0(" based on column '", font_bold(mo), "'") + mo <- df[, mo, drop = TRUE] + } + }, + error = function(e) { + mo <- NULL } - }, - error = function(e) { - mo <- NULL - } - ) + ) + } } if (is.null(mo)) { stop_("No information was supplied about the microorganisms (missing argument `mo` and no column of class 'mo' found). See ?as.sir.\n\n", @@ -861,9 +873,9 @@ as_sir_method <- function(method_short, } # be sure to take current taxonomy, as the 'clinical_breakpoints' data set only contains current taxonomy mo <- suppressWarnings(suppressMessages(as.mo(mo, keep_synonyms = FALSE, info = FALSE))) - if (is.na(ab)) { - message_("Returning NAs for unknown antibiotic: '", font_bold(ab.bak), - "'. Rename this column to a valid name or code, and check the output with `as.ab()`.", + if (all(is.na(ab))) { + message_("Returning NAs for unknown antibiotic: ", vector_and(ab.bak, sort = FALSE, quotes = TRUE), + ". Rename this column to a valid name or code, and check the output with `as.ab()`.", add_fn = font_red, as_note = FALSE ) @@ -887,25 +899,20 @@ as_sir_method <- function(method_short, } } - agent_formatted <- paste0("'", font_bold(ab.bak), "'") + agent_formatted <- paste0("'", font_bold(ab.bak, collapse = NULL), "'") agent_name <- ab_name(ab, tolower = TRUE, language = NULL) - if (generalise_antibiotic_name(ab.bak) == generalise_antibiotic_name(agent_name)) { - agent_formatted <- paste0( - agent_formatted, - " (", ab, ")" - ) - } else if (generalise_antibiotic_name(ab) != generalise_antibiotic_name(agent_name)) { - agent_formatted <- paste0( - agent_formatted, - " (", ifelse(ab.bak == ab, "", - paste0(ab, ", ") - ), agent_name, ")" - ) - } - + same_ab <- generalise_antibiotic_name(ab) == generalise_antibiotic_name(agent_name) + same_ab.bak <- generalise_antibiotic_name(ab.bak) == generalise_antibiotic_name(agent_name) + agent_formatted[same_ab.bak] <- paste0(agent_formatted[same_ab.bak], " (", ab, ")") + agent_formatted[same_ab.bak & !same_ab] <- paste0(agent_formatted[same_ab.bak & !same_ab], + " (", ifelse(ab.bak[same_ab.bak & !same_ab] == ab[same_ab.bak & !same_ab], + "", + paste0(ab[same_ab.bak & !same_ab], ", ")), + agent_name[same_ab.bak & !same_ab], + ")") # this intro text will also be printed in the progress bar in the `progress` package is installed intro_txt <- paste0("Interpreting ", method_long, ": ", ifelse(isTRUE(list(...)$is_data.frame), "column ", ""), - agent_formatted, + ifelse(length(agent_formatted) == 1, agent_formatted, ""), mo_var_found, ifelse(identical(reference_data, AMR::clinical_breakpoints), paste0(", ", font_bold(guideline_coerced)), @@ -928,23 +935,6 @@ as_sir_method <- function(method_short, metadata_mo <- get_mo_uncertainties() - df <- data.frame( - values = x, - mo = mo, - result = NA_sir_, - uti = uti, - host = host, - stringsAsFactors = FALSE - ) - if (method == "mic") { - # when as.sir.mic is called directly - df$values <- as.mic(df$values) - } else if (method == "disk") { - # when as.sir.disk is called directly - df$values <- as.disk(df$values) - } - df_unique <- unique(df[ , c("mo", "uti", "host"), drop = FALSE]) - rise_warning <- FALSE rise_note <- FALSE method_coerced <- toupper(method) @@ -952,20 +942,41 @@ as_sir_method <- function(method_short, if (identical(reference_data, AMR::clinical_breakpoints)) { breakpoints <- reference_data %pm>% - subset(guideline == guideline_coerced & method == method_coerced & ab == ab_coerced) - if (ab_coerced == "AMX" && nrow(breakpoints) == 0) { - ab_coerced <- "AMP" + subset(guideline == guideline_coerced & method == method_coerced & ab %in% ab_coerced) + if (any(ab_coerced == "AMX") && nrow(breakpoints[breakpoints$ab == "AMX", , drop = FALSE]) == 0) { + ab_coerced[ab_coerced == "AMX"] <- "AMP" breakpoints <- reference_data %pm>% - subset(guideline == guideline_coerced & method == method_coerced & ab == ab_coerced) + subset(guideline == guideline_coerced & method == method_coerced & ab %in% ab_coerced) } } else { breakpoints <- reference_data %pm>% - subset(method == method_coerced & ab == ab_coerced) + subset(method == method_coerced & ab %in% ab_coerced) } - + + # create the unique data frame to be filled to save time + df <- data.frame( + values = x, + mo = mo, + ab = ab, + result = NA_sir_, + uti = uti, + host = host, + stringsAsFactors = FALSE + ) + + if (method == "mic") { + # when as.sir.mic is called directly + df$values <- as.mic(df$values) + } else if (method == "disk") { + # when as.sir.disk is called directly + df$values <- as.disk(df$values) + } + df_unique <- unique(df[ , c("mo", "ab", "uti", "host"), drop = FALSE]) + + # get all breakpoints breakpoints <- breakpoints %pm>% subset(type == breakpoint_type) - + if (isFALSE(include_screening)) { # remove screening rules from the breakpoints table breakpoints <- breakpoints %pm>% @@ -1003,6 +1014,7 @@ as_sir_method <- function(method_short, for (i in seq_len(nrow(df_unique))) { p$tick() mo_current <- df_unique[i, "mo", drop = TRUE] + ab_current <- df_unique[i, "ab", drop = TRUE] uti_current <- df_unique[i, "uti", drop = TRUE] if (is.na(uti_current)) { # no preference, so no filter on UTIs @@ -1030,16 +1042,17 @@ as_sir_method <- function(method_short, # formatted for notes mo_formatted <- mo_current_name if (!mo_current_rank %in% c("kingdom", "phylum", "class", "order")) { - mo_formatted <- font_italic(mo_formatted) + mo_formatted <- font_italic(mo_formatted, collapse = NULL) } ab_formatted <- paste0( - suppressMessages(suppressWarnings(ab_name(ab_coerced, language = NULL, tolower = TRUE))), - " (", ab_coerced, ")" + suppressMessages(suppressWarnings(ab_name(ab_current, language = NULL, tolower = TRUE))), + " (", ab_current, ")" ) # gather all available breakpoints for current MO breakpoints_current <- breakpoints %pm>% + subset(ab == ab_current) %pm>% subset(mo %in% c( mo_current, mo_current_genus, mo_current_family, mo_current_order, mo_current_class, @@ -1155,9 +1168,9 @@ as_sir_method <- function(method_short, data.frame( datetime = rep(Sys.time(), length(rows)), index = rows, - ab_user = rep(ab.bak, length(rows)), + ab_user = rep(ab.bak[match(ab_current, df$ab)][1], length(rows)), mo_user = rep(mo.bak[match(mo_current, df$mo)][1], length(rows)), - ab = rep(ab_coerced, length(rows)), + ab = rep(ab_current, length(rows)), mo = rep(breakpoints_current[, "mo", drop = TRUE], length(rows)), input = as.double(values), outcome = as.sir(new_sir), diff --git a/R/sir_calc.R b/R/sir_calc.R index ec30b45d..8a962dc5 100755 --- a/R/sir_calc.R +++ b/R/sir_calc.R @@ -135,13 +135,20 @@ sir_calc <- function(..., x_transposed <- as.list(as.data.frame(t(x), stringsAsFactors = FALSE)) if (isTRUE(only_all_tested)) { + get_integers <- function(x) { + ints <- rep(NA_integer_, length(x)) + ints[x == "S"] <- 1L + ints[x %in% c("SDD", "I")] <- 2L + ints[x == "R"] <- 3L + ints + } # no NAs in any column y <- apply( - X = as.data.frame(lapply(x, as.integer), stringsAsFactors = FALSE), + X = as.data.frame(lapply(x, get_integers), stringsAsFactors = FALSE), MARGIN = 1, FUN = min ) - numerator <- sum(as.integer(y) %in% as.integer(ab_result), na.rm = TRUE) + numerator <- sum(!is.na(y) & y %in% get_integers(ab_result), na.rm = TRUE) denominator <- sum(vapply(FUN.VALUE = logical(1), x_transposed, function(y) !(anyNA(y)))) } else { # may contain NAs in any column @@ -359,6 +366,8 @@ sir_calc_df <- function(type, # "proportion", "count" or "both" # the same data structure as output, regardless of input out$interpretation <- factor(out$interpretation, levels = c("S", "SDD", "I", "R", "N"), ordered = TRUE) } + + out <- out[!is.na(out$interpretation), , drop = FALSE] if (data_has_groups) { # ordering by the groups and two more: "antibiotic" and "interpretation" diff --git a/inst/tinytest/test-count.R b/inst/tinytest/test-count.R index 604ac307..ea82baa5 100644 --- a/inst/tinytest/test-count.R +++ b/inst/tinytest/test-count.R @@ -69,7 +69,7 @@ if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0", also_load = TRUE)) { example_isolates %>% count_susceptible(AMC, GEN, only_all_tested = TRUE) + example_isolates %>% count_resistant(AMC, GEN, only_all_tested = TRUE) ) - + # count of cases expect_equal( example_isolates %>% @@ -95,8 +95,10 @@ if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0", also_load = TRUE)) { example_isolates %>% select(AMX) %>% count_df(combine_SI = FALSE) %>% pull(value), c( suppressWarnings(example_isolates$AMX %>% count_S()), + 0, example_isolates$AMX %>% count_I(), - example_isolates$AMX %>% count_R() + example_isolates$AMX %>% count_R(), + 0 ) ) diff --git a/man/as.sir.Rd b/man/as.sir.Rd index 8e2ae050..1a468a2a 100644 --- a/man/as.sir.Rd +++ b/man/as.sir.Rd @@ -98,9 +98,9 @@ sir_interpretation_history(clean = FALSE) \item{S, I, R, N, SDD}{a case-indepdendent \link[base:regex]{regular expression} to translate input to this result. This regular expression will be run \emph{after} all non-letters are removed from the input.} -\item{mo}{any (vector of) text that can be coerced to valid microorganism codes with \code{\link[=as.mo]{as.mo()}}, can be left empty to determine it automatically} +\item{mo}{a vector (or column name) with \link{character}s that can be coerced to valid microorganism codes with \code{\link[=as.mo]{as.mo()}}, can be left empty to determine it automatically} -\item{ab}{any (vector of) text that can be coerced to a valid antimicrobial drug code with \code{\link[=as.ab]{as.ab()}}} +\item{ab}{a vector (or column name) with \link{character}s that can be coerced to a valid antimicrobial drug code with \code{\link[=as.ab]{as.ab()}}} \item{guideline}{defaults to EUCAST 2023 (the latest implemented EUCAST guideline in the \link{clinical_breakpoints} data set), but can be set with the \link[=AMR-options]{package option} \code{\link[=AMR-options]{AMR_guideline}}. Currently supports EUCAST (2011-2023) and CLSI (2011-2023), see \emph{Details}.} @@ -284,7 +284,7 @@ if (require("dplyr")) { df \%>\% mutate(across(AMP:TOB, as.sir)) df \%>\% - mutate_at(vars(AMP:TOB), as.sir, mo = .$microorganism) + mutate_at(vars(AMP:TOB), as.sir, mo = "microorganism") # to include information about urinary tract infections (UTI) data.frame(