diff --git a/DESCRIPTION b/DESCRIPTION index a531a7e1..fc37da37 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 0.4.0.9002 -Date: 2018-10-12 +Version: 0.4.0.9003 +Date: 2018-10-16 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/NEWS.md b/NEWS.md index a263e706..96148302 100755 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,7 @@ * Function `count_all` to get all available isolates (that like all `portion_*` and `count_*` functions also supports `summarise` and `group_by`), the old `n_rsi` is now an alias of `count_all` #### Changed +* Added parameter `combine_IR` (TRUE/FALSE) to functions `portion_df` and `count_df`, to indicate that all values of I and R must be merged into one, so the output only consists of S vs. IR (susceptible vs. non-susceptible) * Fix for `portion_*(..., as_percent = TRUE)` when minimal amount of isolates would not be met * Using `portion_*` functions now throws a warning when total available isolate is below parameter `minimum` * `as.mo` will not set package name as attribute anymore @@ -18,6 +19,7 @@ * `"MSSA"` -> *Staphylococcus aureus* * `"MSSE"` -> *Staphylococcus epidermidis* * Fix for `join` functions +* In `g.test`, when `sum(x)` is below 1000, suggest Fisher's Exact Test #### Other * Updated vignettes to comply with README diff --git a/R/count.R b/R/count.R index 3a22392e..6dbcdbb4 100644 --- a/R/count.R +++ b/R/count.R @@ -163,7 +163,8 @@ n_rsi <- function(...) { #' @importFrom dplyr %>% select_if bind_rows summarise_if mutate group_vars select everything #' @export count_df <- function(data, - translate_ab = getOption("get_antibiotic_names", "official")) { + translate_ab = getOption("get_antibiotic_names", "official"), + combine_IR = FALSE) { if (!"data.frame" %in% class(data)) { stop("`count_df` must be called on a data.frame") @@ -184,23 +185,37 @@ count_df <- function(data, mutate(Interpretation = "S") %>% select(Interpretation, everything()) - resI <- summarise_if(.tbl = data, - .predicate = is.rsi, - .funs = count_I) %>% - mutate(Interpretation = "I") %>% - select(Interpretation, everything()) + if (combine_IR == FALSE) { + resI <- summarise_if(.tbl = data, + .predicate = is.rsi, + .funs = count_I) %>% + mutate(Interpretation = "I") %>% + select(Interpretation, everything()) - resR <- summarise_if(.tbl = data, - .predicate = is.rsi, - .funs = count_R) %>% - mutate(Interpretation = "R") %>% - select(Interpretation, everything()) + resR <- summarise_if(.tbl = data, + .predicate = is.rsi, + .funs = count_R) %>% + mutate(Interpretation = "R") %>% + select(Interpretation, everything()) - data.groups <- group_vars(data) + data.groups <- group_vars(data) - res <- bind_rows(resS, resI, resR) %>% - mutate(Interpretation = factor(Interpretation, levels = c("R", "I", "S"), ordered = TRUE)) %>% - tidyr::gather(Antibiotic, Value, -Interpretation, -data.groups) + res <- bind_rows(resS, resI, resR) %>% + mutate(Interpretation = factor(Interpretation, levels = c("R", "I", "S"), ordered = TRUE)) %>% + tidyr::gather(Antibiotic, Value, -Interpretation, -data.groups) + } else { + resIR <- summarise_if(.tbl = data, + .predicate = is.rsi, + .funs = count_IR) %>% + mutate(Interpretation = "IR") %>% + select(Interpretation, everything()) + + data.groups <- group_vars(data) + + res <- bind_rows(resS, resIR) %>% + mutate(Interpretation = factor(Interpretation, levels = c("IR", "S"), ordered = TRUE)) %>% + tidyr::gather(Antibiotic, Value, -Interpretation, -data.groups) + } if (!translate_ab == FALSE) { if (!tolower(translate_ab) %in% tolower(colnames(AMR::antibiotics))) { diff --git a/R/g.test.R b/R/g.test.R index 8cb1ebfd..cb14f987 100644 --- a/R/g.test.R +++ b/R/g.test.R @@ -217,8 +217,15 @@ g.test <- function(x, } names(STATISTIC) <- "X-squared" names(PARAMETER) <- "df" - if (any(E < 5) && is.finite(PARAMETER)) - warning("G-statistic approximation may be incorrect") + # if (any(E < 5) && is.finite(PARAMETER)) + # warning("G-statistic approximation may be incorrect") + + # suggest fisher.test when total is < 1000 (John McDonald, Handbook of Biological Statistics, 2014) + if (sum(x, na.rm = TRUE) < 1000 && is.finite(PARAMETER)) { + warning("G-statistic approximation may be incorrect, consider Fisher's Exact test") + } else if (any(E < 5) && is.finite(PARAMETER)) { + warning("G-statistic approximation may be incorrect, consider Fisher's Exact test") + } structure(list(statistic = STATISTIC, parameter = PARAMETER, p.value = PVAL, method = METHOD, data.name = DNAME, observed = x, expected = E, residuals = (x - E)/sqrt(E), diff --git a/R/portion.R b/R/portion.R index 05f80fc7..0ad593fa 100755 --- a/R/portion.R +++ b/R/portion.R @@ -22,10 +22,11 @@ #' #' \code{portion_R} and \code{portion_IR} can be used to calculate resistance, \code{portion_S} and \code{portion_SI} can be used to calculate susceptibility.\cr #' @param ... one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed. Use multiple columns to calculate (the lack of) co-resistance: the probability where one of two drugs have a resistant or susceptible result. See Examples. -#' @param minimum minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source. -#' @param as_percent logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}. +#' @param minimum the minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source. +#' @param as_percent a logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}. #' @param data a \code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}}) #' @param translate_ab a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations to, using \code{\link{abname}}. This can be set with \code{\link{getOption}("get_antibiotic_names")}. +#' @param combine_IR a logical to indicate whether all values of I and R must be merged into one, so the output only consists of S vs. IR (susceptible vs. non-susceptible) #' @details \strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set. #' #' These functions are not meant to count isolates, but to calculate the portion of resistance/susceptibility. If a column has been transformed with \code{\link{as.rsi}}, just use e.g. \code{isolates[isolates == "R"]} to get the resistant ones. You could then calculate the \code{\link{length}} of it. @@ -200,7 +201,8 @@ portion_S <- function(..., portion_df <- function(data, translate_ab = getOption("get_antibiotic_names", "official"), minimum = 30, - as_percent = FALSE) { + as_percent = FALSE, + combine_IR = FALSE) { if (!"data.frame" %in% class(data)) { stop("`portion_df` must be called on a data.frame") @@ -223,27 +225,43 @@ portion_df <- function(data, mutate(Interpretation = "S") %>% select(Interpretation, everything()) - resI <- summarise_if(.tbl = data, - .predicate = is.rsi, - .funs = portion_I, - minimum = minimum, - as_percent = as_percent) %>% - mutate(Interpretation = "I") %>% - select(Interpretation, everything()) + if (combine_IR == FALSE) { + resI <- summarise_if(.tbl = data, + .predicate = is.rsi, + .funs = portion_I, + minimum = minimum, + as_percent = as_percent) %>% + mutate(Interpretation = "I") %>% + select(Interpretation, everything()) - resR <- summarise_if(.tbl = data, - .predicate = is.rsi, - .funs = portion_R, - minimum = minimum, - as_percent = as_percent) %>% - mutate(Interpretation = "R") %>% - select(Interpretation, everything()) + resR <- summarise_if(.tbl = data, + .predicate = is.rsi, + .funs = portion_R, + minimum = minimum, + as_percent = as_percent) %>% + mutate(Interpretation = "R") %>% + select(Interpretation, everything()) - data.groups <- group_vars(data) + data.groups <- group_vars(data) - res <- bind_rows(resS, resI, resR) %>% - mutate(Interpretation = factor(Interpretation, levels = c("R", "I", "S"), ordered = TRUE)) %>% - tidyr::gather(Antibiotic, Value, -Interpretation, -data.groups) + res <- bind_rows(resS, resI, resR) %>% + mutate(Interpretation = factor(Interpretation, levels = c("R", "I", "S"), ordered = TRUE)) %>% + tidyr::gather(Antibiotic, Value, -Interpretation, -data.groups) + } else { + resIR <- summarise_if(.tbl = data, + .predicate = is.rsi, + .funs = portion_IR, + minimum = minimum, + as_percent = as_percent) %>% + mutate(Interpretation = "IR") %>% + select(Interpretation, everything()) + + data.groups <- group_vars(data) + + res <- bind_rows(resS, resIR) %>% + mutate(Interpretation = factor(Interpretation, levels = c("IR", "S"), ordered = TRUE)) %>% + tidyr::gather(Antibiotic, Value, -Interpretation, -data.groups) + } if (!translate_ab == FALSE) { if (!tolower(translate_ab) %in% tolower(colnames(AMR::antibiotics))) { diff --git a/man/count.Rd b/man/count.Rd index 89091806..17e32403 100644 --- a/man/count.Rd +++ b/man/count.Rd @@ -30,7 +30,7 @@ count_all(...) n_rsi(...) count_df(data, translate_ab = getOption("get_antibiotic_names", - "official")) + "official"), combine_IR = FALSE) } \arguments{ \item{...}{one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed.} @@ -38,6 +38,8 @@ count_df(data, translate_ab = getOption("get_antibiotic_names", \item{data}{a \code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}})} \item{translate_ab}{a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations to, using \code{\link{abname}}. This can be set with \code{\link{getOption}("get_antibiotic_names")}.} + +\item{combine_IR}{a logical to indicate whether all values of I and R must be merged into one, so the output only consists of S vs. IR (susceptible vs. non-susceptible)} } \value{ Integer diff --git a/man/portion.Rd b/man/portion.Rd index a5420e64..57368a68 100644 --- a/man/portion.Rd +++ b/man/portion.Rd @@ -26,18 +26,20 @@ portion_SI(..., minimum = 30, as_percent = FALSE) portion_S(..., minimum = 30, as_percent = FALSE) portion_df(data, translate_ab = getOption("get_antibiotic_names", - "official"), minimum = 30, as_percent = FALSE) + "official"), minimum = 30, as_percent = FALSE, combine_IR = FALSE) } \arguments{ \item{...}{one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed. Use multiple columns to calculate (the lack of) co-resistance: the probability where one of two drugs have a resistant or susceptible result. See Examples.} -\item{minimum}{minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.} +\item{minimum}{the minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.} -\item{as_percent}{logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}.} +\item{as_percent}{a logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}.} \item{data}{a \code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}})} \item{translate_ab}{a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations to, using \code{\link{abname}}. This can be set with \code{\link{getOption}("get_antibiotic_names")}.} + +\item{combine_IR}{a logical to indicate whether all values of I and R must be merged into one, so the output only consists of S vs. IR (susceptible vs. non-susceptible)} } \value{ Double or, when \code{as_percent = TRUE}, a character. diff --git a/man/rsi.Rd b/man/rsi.Rd index 404842a3..def785c0 100644 --- a/man/rsi.Rd +++ b/man/rsi.Rd @@ -12,9 +12,9 @@ rsi(ab1, ab2 = NULL, interpretation = "IR", minimum = 30, \item{interpretation}{antimicrobial interpretation to check for} -\item{minimum}{minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.} +\item{minimum}{the minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.} -\item{as_percent}{logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}.} +\item{as_percent}{a logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}.} \item{...}{deprecated parameters to support usage on older versions} } diff --git a/tests/testthat/test-count.R b/tests/testthat/test-count.R index cb08de25..779fc4b8 100644 --- a/tests/testthat/test-count.R +++ b/tests/testthat/test-count.R @@ -27,8 +27,18 @@ test_that("counts work", { pull(combination), c(192, 446, 184, 474)) - expect_equal(septic_patients %>% select(amox, cipr) %>% count_df(translate_ab = "official") %>% nrow(), - 6) + # count_df + expect_equal( + septic_patients %>% select(amox) %>% count_df() %>% pull(Value), + c(septic_patients$amox %>% count_S(), + septic_patients$amox %>% count_I(), + septic_patients$amox %>% count_R()) + ) + expect_equal( + septic_patients %>% select(amox) %>% count_df(combine_IR = TRUE) %>% pull(Value), + c(septic_patients$amox %>% count_S(), + septic_patients$amox %>% count_IR()) + ) # warning for speed loss expect_warning(count_R(as.character(septic_patients$amcl))) diff --git a/tests/testthat/test-portion.R b/tests/testthat/test-portion.R index 87bdaa82..a179e93f 100755 --- a/tests/testthat/test-portion.R +++ b/tests/testthat/test-portion.R @@ -117,6 +117,12 @@ test_that("old rsi works", { septic_patients$amox %>% portion_I(), septic_patients$amox %>% portion_R()) ) + expect_equal( + septic_patients %>% select(amox) %>% portion_df(combine_IR = TRUE) %>% pull(Value), + c(septic_patients$amox %>% portion_S(), + septic_patients$amox %>% portion_IR()) + ) + })