diff --git a/DESCRIPTION b/DESCRIPTION index ec73018b..54bff7fd 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 0.4.0.9004 -Date: 2018-10-17 +Version: 0.4.0.9005 +Date: 2018-10-19 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/NAMESPACE b/NAMESPACE index 7554b16a..b29d1ce5 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -175,6 +175,8 @@ importFrom(data.table,as.data.table) importFrom(data.table,data.table) importFrom(data.table,setkey) importFrom(dplyr,"%>%") +importFrom(dplyr,all_vars) +importFrom(dplyr,any_vars) importFrom(dplyr,arrange) importFrom(dplyr,arrange_at) importFrom(dplyr,as_tibble) @@ -184,7 +186,9 @@ importFrom(dplyr,case_when) importFrom(dplyr,desc) importFrom(dplyr,everything) importFrom(dplyr,filter) +importFrom(dplyr,filter_all) importFrom(dplyr,full_join) +importFrom(dplyr,funs) importFrom(dplyr,group_by) importFrom(dplyr,group_by_at) importFrom(dplyr,group_vars) @@ -192,6 +196,7 @@ importFrom(dplyr,if_else) importFrom(dplyr,lag) importFrom(dplyr,left_join) importFrom(dplyr,mutate) +importFrom(dplyr,mutate_all) importFrom(dplyr,mutate_at) importFrom(dplyr,n_distinct) importFrom(dplyr,progress_estimated) diff --git a/NEWS.md b/NEWS.md index 6cc75700..567fb82e 100755 --- a/NEWS.md +++ b/NEWS.md @@ -11,10 +11,11 @@ * Better error handling when rules cannot be applied (i.e. new values could not be inserted) * The amount of affected values will now only be measured once per row/column combination * Data set `septic_patients` now reflects these changes -* Empty values as input for `as.mo` will be processed faster +* Tremendous speed improvement for `as.mo` (and consequently all `mo_*` functions), as empty values wil be ignored a priori * Fewer than 3 characters as input for `as.mo` will return NA * Added parameter `combine_IR` (TRUE/FALSE) to functions `portion_df` and `count_df`, to indicate that all values of I and R must be merged into one, so the output only consists of S vs. IR (susceptible vs. non-susceptible) * Fix for `portion_*(..., as_percent = TRUE)` when minimal amount of isolates would not be met +* Added parameter `also_single_tested` for `portion_*` and `count_*` functions to also include cases where not all antibiotics were tested but at least one of the tested antibiotics includes the target antimicribial interpretation, see `?portion` * Using `portion_*` functions now throws a warning when total available isolate is below parameter `minimum` * Functions `as.mo`, `as.rsi` and `as.mic` will not set package name as attribute anymore * Data set `septic_patients` is now a `data.frame`, not a tibble anymore diff --git a/R/count.R b/R/count.R index 6dbcdbb4..d0bf15e7 100644 --- a/R/count.R +++ b/R/count.R @@ -92,56 +92,61 @@ #' group_by(hospital_id) %>% #' count_df(translate = FALSE) #' -count_R <- function(...) { +count_R <- function(..., also_single_tested = FALSE) { rsi_calc(..., type = "R", include_I = FALSE, minimum = 0, as_percent = FALSE, + also_single_tested = FALSE, only_count = TRUE) } #' @rdname count #' @export -count_IR <- function(...) { +count_IR <- function(..., also_single_tested = FALSE) { rsi_calc(..., type = "R", include_I = TRUE, minimum = 0, as_percent = FALSE, + also_single_tested = FALSE, only_count = TRUE) } #' @rdname count #' @export -count_I <- function(...) { +count_I <- function(..., also_single_tested = FALSE) { rsi_calc(..., type = "I", include_I = FALSE, minimum = 0, as_percent = FALSE, + also_single_tested = FALSE, only_count = TRUE) } #' @rdname count #' @export -count_SI <- function(...) { +count_SI <- function(..., also_single_tested = FALSE) { rsi_calc(..., type = "S", include_I = TRUE, minimum = 0, as_percent = FALSE, + also_single_tested = FALSE, only_count = TRUE) } #' @rdname count #' @export -count_S <- function(...) { +count_S <- function(..., also_single_tested = FALSE) { rsi_calc(..., type = "S", include_I = FALSE, minimum = 0, as_percent = FALSE, + also_single_tested = FALSE, only_count = TRUE) } diff --git a/R/data.R b/R/data.R index d919ffdb..9f07703b 100755 --- a/R/data.R +++ b/R/data.R @@ -175,7 +175,7 @@ #' Translation table for Certe #' #' A data set containing all bacteria codes of Certe MMB. These codes can be joined to data with an ID from \code{\link{microorganisms}$mo} (using \code{\link{left_join_microorganisms}}). GLIMS codes can also be translated to valid \code{MO}s with \code{\link{guess_mo}}. -#' @format A \code{\link{tibble}} with 2,664 observations and 2 variables: +#' @format A \code{\link{tibble}} with 2,665 observations and 2 variables: #' \describe{ #' \item{\code{certe}}{Code of microorganism according to Certe MMB} #' \item{\code{mo}}{Code of microorganism in \code{\link{microorganisms}}} diff --git a/R/eucast.R b/R/eucast.R index 47f395aa..220103ea 100755 --- a/R/eucast.R +++ b/R/eucast.R @@ -361,15 +361,20 @@ EUCAST_rules <- function(tbl, changed_results <<- changed_results + sum(before != after, na.rm = TRUE) # will be reset at start of every rule if (verbose == TRUE) { - verbose_new <- data.frame(rule_type = rule[1], - rule_set = rule[2], - force_to = to, - found = length(before), - changed = sum(before != after, na.rm = TRUE), - stringsAsFactors = FALSE) - verbose_new$target_columns <- list(unname(cols)) - verbose_new$target_rows <- list(unname(rows)) - verbose_info <<- rbind(verbose_info, verbose_new) + for (i in 1:length(cols)) { + # add new row for every affected column + verbose_new <- data.frame(rule_type = rule[1], + rule_set = rule[2], + force_to = to, + found = length(before), + changed = sum(before != after, na.rm = TRUE), + target_column = cols[i], + stringsAsFactors = FALSE) + verbose_new$target_rows <- list(unname(rows)) + rownames(verbose_new) <- NULL + verbose_info <<- rbind(verbose_info, verbose_new) + } + } } } @@ -410,15 +415,15 @@ EUCAST_rules <- function(tbl, # since ampicillin ^= amoxicillin, get the first from the latter (not in original table) if (!is.na(ampi) & !is.na(amox)) { if (verbose == TRUE) { - cat("\n VERBOSE: transforming", - length(which(tbl[, amox] == "S" & !tbl[, ampi] %in% c("S", "I", "R"))), - "empty ampicillin fields to 'S' based on amoxicillin.") - cat("\n VERBOSE: transforming", - length(which(tbl[, amox] == "I" & !tbl[, ampi] %in% c("S", "I", "R"))), - "empty ampicillin fields to 'I' based on amoxicillin.") - cat("\n VERBOSE: transforming", - length(which(tbl[, amox] == "R" & !tbl[, ampi] %in% c("S", "I", "R"))), - "empty ampicillin fields to 'R' based on amoxicillin.\n") + cat(bgGreen("\n VERBOSE: transforming", + length(which(tbl[, amox] == "S" & !tbl[, ampi] %in% c("S", "I", "R"))), + "empty ampicillin fields to 'S' based on amoxicillin. ")) + cat(bgGreen("\n VERBOSE: transforming", + length(which(tbl[, amox] == "I" & !tbl[, ampi] %in% c("S", "I", "R"))), + "empty ampicillin fields to 'I' based on amoxicillin. ")) + cat(bgGreen("\n VERBOSE: transforming", + length(which(tbl[, amox] == "R" & !tbl[, ampi] %in% c("S", "I", "R"))), + "empty ampicillin fields to 'R' based on amoxicillin. \n")) } tbl[which(tbl[, amox] == "S" & !tbl[, ampi] %in% c("S", "I", "R")), ampi] <- "S" tbl[which(tbl[, amox] == "I" & !tbl[, ampi] %in% c("S", "I", "R")), ampi] <- "I" diff --git a/R/mo.R b/R/mo.R index 6f3990e7..492b5dd8 100644 --- a/R/mo.R +++ b/R/mo.R @@ -195,7 +195,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = if (all(x %in% AMR::microorganisms[, property])) { # already existing mo } else if (all(x %in% AMR::microorganisms[, "mo"])) { - # existing mo codes + # existing mo codes when not looking for property "mo" suppressWarnings( x <- data.frame(mo = x, stringsAsFactors = FALSE) %>% left_join(AMR::microorganisms, by = "mo") %>% @@ -661,13 +661,12 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = x[x == MOs[mo == 'B_STRPTC_SAL', ..property][[1]][1L]] <- MOs[mo == 'B_STRPTC_GRK', ..property][[1]][1L] } - x_input_unique <- unique(x_input) - # fill in empty values again - x[is.na(x_input_unique) | is.null(x_input_unique) | identical(x_input_unique, "")] <- NA + # comply to x, which is also unique and without empty values + x_input_unique_nonempty <- unique(x_input[!is.na(x_input) & !is.null(x_input) & !identical(x_input, "")]) # left join the found results to the original input values (x_input) - df_found <- data.frame(input = as.character(x_input_unique), - found = x, + df_found <- data.frame(input = as.character(x_input_unique_nonempty), + found = as.character(x), stringsAsFactors = FALSE) df_input <- data.frame(input = as.character(x_input), stringsAsFactors = FALSE) diff --git a/R/portion.R b/R/portion.R index 0ad593fa..cc89e09a 100755 --- a/R/portion.R +++ b/R/portion.R @@ -24,6 +24,7 @@ #' @param ... one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed. Use multiple columns to calculate (the lack of) co-resistance: the probability where one of two drugs have a resistant or susceptible result. See Examples. #' @param minimum the minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source. #' @param as_percent a logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}. +#' @param also_single_tested a logical to indicate whether (in combination therapies) also observations should be included where not all antibiotics were tested, but at least one of the tested antibiotics contains a target interpretation (e.g. S in case of \code{portion_S} and R in case of \code{portion_R}). \strong{This would lead to selection bias in almost all cases.} #' @param data a \code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}}) #' @param translate_ab a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations to, using \code{\link{abname}}. This can be set with \code{\link{getOption}("get_antibiotic_names")}. #' @param combine_IR a logical to indicate whether all values of I and R must be merged into one, so the output only consists of S vs. IR (susceptible vs. non-susceptible) @@ -134,12 +135,14 @@ #' } portion_R <- function(..., minimum = 30, - as_percent = FALSE) { + as_percent = FALSE, + also_single_tested = FALSE) { rsi_calc(..., type = "R", include_I = FALSE, minimum = minimum, as_percent = as_percent, + also_single_tested = also_single_tested, only_count = FALSE) } @@ -147,12 +150,14 @@ portion_R <- function(..., #' @export portion_IR <- function(..., minimum = 30, - as_percent = FALSE) { + as_percent = FALSE, + also_single_tested = FALSE) { rsi_calc(..., type = "R", include_I = TRUE, minimum = minimum, as_percent = as_percent, + also_single_tested = also_single_tested, only_count = FALSE) } @@ -160,12 +165,14 @@ portion_IR <- function(..., #' @export portion_I <- function(..., minimum = 30, - as_percent = FALSE) { + as_percent = FALSE, + also_single_tested = FALSE) { rsi_calc(..., type = "I", include_I = FALSE, minimum = minimum, as_percent = as_percent, + also_single_tested = also_single_tested, only_count = FALSE) } @@ -173,12 +180,14 @@ portion_I <- function(..., #' @export portion_SI <- function(..., minimum = 30, - as_percent = FALSE) { + as_percent = FALSE, + also_single_tested = FALSE) { rsi_calc(..., type = "S", include_I = TRUE, minimum = minimum, as_percent = as_percent, + also_single_tested = also_single_tested, only_count = FALSE) } @@ -186,12 +195,14 @@ portion_SI <- function(..., #' @export portion_S <- function(..., minimum = 30, - as_percent = FALSE) { + as_percent = FALSE, + also_single_tested = FALSE) { rsi_calc(..., type = "S", include_I = FALSE, minimum = minimum, as_percent = as_percent, + also_single_tested = also_single_tested, only_count = FALSE) } diff --git a/R/rsi_calc.R b/R/rsi_calc.R index 83addfe9..15a1f030 100644 --- a/R/rsi_calc.R +++ b/R/rsi_calc.R @@ -16,12 +16,13 @@ # GNU General Public License for more details. # # ==================================================================== # -#' @importFrom dplyr %>% pull +#' @importFrom dplyr %>% pull all_vars any_vars filter_all funs mutate_all rsi_calc <- function(..., type, include_I, minimum, as_percent, + also_single_tested, only_count) { if (!is.logical(include_I)) { @@ -33,6 +34,9 @@ rsi_calc <- function(..., if (!is.logical(as_percent)) { stop('`as_percent` must be logical', call. = FALSE) } + if (!is.logical(also_single_tested)) { + stop('`also_single_tested` must be logical', call. = FALSE) + } dots_df <- ...elt(1) # it needs this evaluation dots <- base::eval(base::substitute(base::alist(...))) @@ -67,23 +71,53 @@ rsi_calc <- function(..., } print_warning <- FALSE - # check integrity of columns: force rsi class + + type_trans <- as.integer(as.rsi(type)) + type_others <- setdiff(1:3, type_trans) + if (is.data.frame(x)) { + rsi_integrity_check <- character(0) for (i in 1:ncol(x)) { + # check integrity of columns: force rsi class if (!is.rsi(x %>% pull(i))) { - x[, i] <- as.rsi(x[, i]) + rsi_integrity_check <- c(rsi_integrity_check, x %>% pull(i) %>% as.character()) + x[, i] <- suppressWarnings(as.rsi(x[, i])) # warning will be given later print_warning <- TRUE } x[, i] <- x %>% pull(i) %>% as.integer() } - x <- apply(X = x, - MARGIN = 1, - FUN = min) + if (length(rsi_integrity_check) > 0) { + # this will give a warning for invalid results, of all input columns (so only 1 warning) + rsi_integrity_check <- as.rsi(rsi_integrity_check) + } + + if (include_I == TRUE) { + x <- x %>% mutate_all(funs(ifelse(. == 2, type_trans, .))) + } + + if (also_single_tested == TRUE) { + # THE CHANCE THAT AT LEAST ONE RESULT IS type + found <- x %>% filter_all(any_vars(. == type_trans)) %>% nrow() + # THE CHANCE THAT AT LEAST ONE RESULT IS type OR ALL ARE TESTED + total <- found + x %>% filter_all(all_vars(. %in% type_others)) %>% nrow() + } else { + x <- apply(X = x, + MARGIN = 1, + FUN = min) + found <- sum(as.integer(x) == type_trans, na.rm = TRUE) + total <- length(x) - sum(is.na(x)) + } } else { if (!is.rsi(x)) { x <- as.rsi(x) print_warning <- TRUE } + x <- as.integer(x) + if (include_I == TRUE) { + x[x == 2] <- type_trans + } + found <- sum(x == type_trans, na.rm = TRUE) + total <- length(x) - sum(is.na(x)) } if (print_warning == TRUE) { @@ -91,21 +125,10 @@ rsi_calc <- function(..., call. = FALSE) } - if (type == "S") { - found <- sum(as.integer(x) <= 1 + include_I, na.rm = TRUE) - } else if (type == "I") { - found <- sum(as.integer(x) == 2, na.rm = TRUE) - } else if (type == "R") { - found <- sum(as.integer(x) >= 3 - include_I, na.rm = TRUE) - } else { - stop("invalid type") - } - if (only_count == TRUE) { return(found) } - total <- length(x) - sum(is.na(x)) if (total < minimum) { warning("Introducing NA: only ", total, " results available (minimum set to ", minimum, ").", call. = FALSE) result <- NA diff --git a/data/microorganisms.certe.rda b/data/microorganisms.certe.rda index 15fe6e6c..33bc1999 100644 Binary files a/data/microorganisms.certe.rda and b/data/microorganisms.certe.rda differ diff --git a/man/count.Rd b/man/count.Rd index 17e32403..9137ff6f 100644 --- a/man/count.Rd +++ b/man/count.Rd @@ -15,15 +15,15 @@ Wickham H. \strong{Tidy Data.} The Journal of Statistical Software, vol. 59, 2014. \url{http://vita.had.co.nz/papers/tidy-data.html} } \usage{ -count_R(...) +count_R(..., also_single_tested = FALSE) -count_IR(...) +count_IR(..., also_single_tested = FALSE) -count_I(...) +count_I(..., also_single_tested = FALSE) -count_SI(...) +count_SI(..., also_single_tested = FALSE) -count_S(...) +count_S(..., also_single_tested = FALSE) count_all(...) @@ -35,6 +35,8 @@ count_df(data, translate_ab = getOption("get_antibiotic_names", \arguments{ \item{...}{one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed.} +\item{also_single_tested}{a logical to indicate whether (in combination therapies) also observations should be included where not all antibiotics were tested, but at least one of the tested antibiotics contains a target interpretation (e.g. S in case of \code{portion_S} and R in case of \code{portion_R}). \strong{This would lead to selection bias in almost all cases.}} + \item{data}{a \code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}})} \item{translate_ab}{a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations to, using \code{\link{abname}}. This can be set with \code{\link{getOption}("get_antibiotic_names")}.} diff --git a/man/microorganisms.certe.Rd b/man/microorganisms.certe.Rd index 7b8ff185..b15bff2c 100644 --- a/man/microorganisms.certe.Rd +++ b/man/microorganisms.certe.Rd @@ -4,7 +4,7 @@ \name{microorganisms.certe} \alias{microorganisms.certe} \title{Translation table for Certe} -\format{A \code{\link{tibble}} with 2,664 observations and 2 variables: +\format{A \code{\link{tibble}} with 2,665 observations and 2 variables: \describe{ \item{\code{certe}}{Code of microorganism according to Certe MMB} \item{\code{mo}}{Code of microorganism in \code{\link{microorganisms}}} diff --git a/man/portion.Rd b/man/portion.Rd index 57368a68..3b8820dd 100644 --- a/man/portion.Rd +++ b/man/portion.Rd @@ -15,15 +15,20 @@ Wickham H. \strong{Tidy Data.} The Journal of Statistical Software, vol. 59, 2014. \url{http://vita.had.co.nz/papers/tidy-data.html} } \usage{ -portion_R(..., minimum = 30, as_percent = FALSE) +portion_R(..., minimum = 30, as_percent = FALSE, + also_single_tested = FALSE) -portion_IR(..., minimum = 30, as_percent = FALSE) +portion_IR(..., minimum = 30, as_percent = FALSE, + also_single_tested = FALSE) -portion_I(..., minimum = 30, as_percent = FALSE) +portion_I(..., minimum = 30, as_percent = FALSE, + also_single_tested = FALSE) -portion_SI(..., minimum = 30, as_percent = FALSE) +portion_SI(..., minimum = 30, as_percent = FALSE, + also_single_tested = FALSE) -portion_S(..., minimum = 30, as_percent = FALSE) +portion_S(..., minimum = 30, as_percent = FALSE, + also_single_tested = FALSE) portion_df(data, translate_ab = getOption("get_antibiotic_names", "official"), minimum = 30, as_percent = FALSE, combine_IR = FALSE) @@ -35,6 +40,8 @@ portion_df(data, translate_ab = getOption("get_antibiotic_names", \item{as_percent}{a logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}.} +\item{also_single_tested}{a logical to indicate whether (in combination therapies) also observations should be included where not all antibiotics were tested, but at least one of the tested antibiotics contains a target interpretation (e.g. S in case of \code{portion_S} and R in case of \code{portion_R}). \strong{This would lead to selection bias in almost all cases.}} + \item{data}{a \code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}})} \item{translate_ab}{a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations to, using \code{\link{abname}}. This can be set with \code{\link{getOption}("get_antibiotic_names")}.} diff --git a/tests/testthat/test-portion.R b/tests/testthat/test-portion.R index 8ad11ac5..60df5fd0 100755 --- a/tests/testthat/test-portion.R +++ b/tests/testthat/test-portion.R @@ -17,6 +17,9 @@ test_that("portions works", { expect_equal(septic_patients %>% portion_S(amcl, gent), 0.9210074, tolerance = 0.001) + expect_equal(septic_patients %>% portion_S(amcl, gent, also_single_tested = TRUE), + 0.9239669, + tolerance = 0.001) # amcl+genta susceptibility around 92.1% expect_equal(suppressWarnings(rsi(septic_patients$amcl, @@ -66,6 +69,7 @@ test_that("portions works", { expect_error(portion_I("test", as_percent = "test")) expect_error(portion_S("test", minimum = "test")) expect_error(portion_S("test", as_percent = "test")) + expect_error(portion_S("test", also_single_tested = "test")) # check too low amount of isolates expect_identical(suppressWarnings(portion_R(septic_patients$amox, minimum = nrow(septic_patients) + 1)),