parameter combine_IR

2025-07-10 05:41:59 +02:00 · 2018-10-16 09:59:31 +02:00
parent 5b5b95a47b
commit d5a41de711
10 changed files with 110 additions and 48 deletions
--- a/4
+++ b/4
@ -1,6 +1,6 @@
 Package: AMR
-Version: 0.4.0.9002
-Date: 2018-10-12
+Version: 0.4.0.9003
+Date: 2018-10-16
 Title: Antimicrobial Resistance Analysis
 Authors@R: c(
    person(
--- a/NEWS.md
+++ b/NEWS.md
@ -4,6 +4,7 @@
 * Function `count_all` to get all available isolates (that like all `portion_*` and `count_*` functions also supports `summarise` and `group_by`), the old `n_rsi` is now an alias of `count_all`

 #### Changed
+* Added parameter `combine_IR` (TRUE/FALSE) to functions `portion_df` and `count_df`, to indicate that all values of I and R must be merged into one, so the output only consists of S vs. IR (susceptible vs. non-susceptible)
 * Fix for `portion_*(..., as_percent = TRUE)` when minimal amount of isolates would not be met
 * Using `portion_*` functions now throws a warning when total available isolate is below parameter `minimum`
 * `as.mo` will not set package name as attribute anymore
@ -18,6 +19,7 @@
  * `"MSSA"` -> *Staphylococcus aureus*
  * `"MSSE"` -> *Staphylococcus epidermidis*
 * Fix for `join` functions
+* In `g.test`, when `sum(x)` is below 1000, suggest Fisher's Exact Test

 #### Other
 * Updated vignettes to comply with README
--- a/R/count.R
+++ b/R/count.R
@ -163,7 +163,8 @@ n_rsi <- function(...) {
 #' @importFrom dplyr %>% select_if bind_rows summarise_if mutate group_vars select everything
 #' @export
 count_df <- function(data,
-                     translate_ab = getOption("get_antibiotic_names", "official")) {
+                     translate_ab = getOption("get_antibiotic_names", "official"),
+                     combine_IR = FALSE) {

  if (!"data.frame" %in% class(data)) {
    stop("`count_df` must be called on a data.frame")
@ -184,23 +185,37 @@ count_df <- function(data,
    mutate(Interpretation = "S") %>%
    select(Interpretation, everything())

-  resI <- summarise_if(.tbl = data,
-                       .predicate = is.rsi,
-                       .funs = count_I) %>%
-    mutate(Interpretation = "I") %>%
-    select(Interpretation, everything())
+  if (combine_IR == FALSE) {
+    resI <- summarise_if(.tbl = data,
+                         .predicate = is.rsi,
+                         .funs = count_I) %>%
+      mutate(Interpretation = "I") %>%
+      select(Interpretation, everything())

-  resR <- summarise_if(.tbl = data,
-                       .predicate = is.rsi,
-                       .funs = count_R) %>%
-    mutate(Interpretation = "R") %>%
-    select(Interpretation, everything())
+    resR <- summarise_if(.tbl = data,
+                         .predicate = is.rsi,
+                         .funs = count_R) %>%
+      mutate(Interpretation = "R") %>%
+      select(Interpretation, everything())

-  data.groups <- group_vars(data)
+    data.groups <- group_vars(data)

-  res <- bind_rows(resS, resI, resR) %>%
-    mutate(Interpretation = factor(Interpretation, levels = c("R", "I", "S"), ordered = TRUE)) %>%
-    tidyr::gather(Antibiotic, Value, -Interpretation, -data.groups)
+    res <- bind_rows(resS, resI, resR) %>%
+      mutate(Interpretation = factor(Interpretation, levels = c("R", "I", "S"), ordered = TRUE)) %>%
+      tidyr::gather(Antibiotic, Value, -Interpretation, -data.groups)
+  } else {
+    resIR <- summarise_if(.tbl = data,
+                          .predicate = is.rsi,
+                          .funs = count_IR) %>%
+      mutate(Interpretation = "IR") %>%
+      select(Interpretation, everything())
+
+    data.groups <- group_vars(data)
+
+    res <- bind_rows(resS, resIR) %>%
+      mutate(Interpretation = factor(Interpretation, levels = c("IR", "S"), ordered = TRUE)) %>%
+      tidyr::gather(Antibiotic, Value, -Interpretation, -data.groups)
+  }

  if (!translate_ab == FALSE) {
    if (!tolower(translate_ab) %in% tolower(colnames(AMR::antibiotics))) {
--- a/R/g.test.R
+++ b/R/g.test.R
@ -217,8 +217,15 @@ g.test <- function(x,
  }
  names(STATISTIC) <- "X-squared"
  names(PARAMETER) <- "df"
-  if (any(E < 5) && is.finite(PARAMETER))
-    warning("G-statistic approximation may be incorrect")
+  # if (any(E < 5) && is.finite(PARAMETER))
+  #   warning("G-statistic approximation may be incorrect")
+
+  # suggest fisher.test when total is < 1000 (John McDonald, Handbook of Biological Statistics, 2014)
+  if (sum(x, na.rm = TRUE) < 1000 && is.finite(PARAMETER)) {
+    warning("G-statistic approximation may be incorrect, consider Fisher's Exact test")
+  } else if (any(E < 5) && is.finite(PARAMETER)) {
+    warning("G-statistic approximation may be incorrect, consider Fisher's Exact test")
+  }
  structure(list(statistic = STATISTIC, parameter = PARAMETER,
                 p.value = PVAL, method = METHOD, data.name = DNAME,
                 observed = x, expected = E, residuals = (x - E)/sqrt(E),
--- a/R/portion.R
+++ b/R/portion.R
@ -22,10 +22,11 @@
 #'
 #' \code{portion_R} and \code{portion_IR} can be used to calculate resistance, \code{portion_S} and \code{portion_SI} can be used to calculate susceptibility.\cr
 #' @param ... one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed. Use multiple columns to calculate (the lack of) co-resistance: the probability where one of two drugs have a resistant or susceptible result. See Examples.
-#' @param minimum minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.
-#' @param as_percent logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}.
+#' @param minimum the minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.
+#' @param as_percent a logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}.
 #' @param data a \code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}})
 #' @param translate_ab a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations to, using \code{\link{abname}}. This can be set with \code{\link{getOption}("get_antibiotic_names")}.
+#' @param combine_IR a logical to indicate whether all values of I and R must be merged into one, so the output only consists of S vs. IR (susceptible vs. non-susceptible)
 #' @details \strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set.
 #'
 #' These functions are not meant to count isolates, but to calculate the portion of resistance/susceptibility. If a column has been transformed with \code{\link{as.rsi}}, just use e.g. \code{isolates[isolates == "R"]} to get the resistant ones. You could then calculate the \code{\link{length}} of it.
@ -200,7 +201,8 @@ portion_S <- function(...,
 portion_df <- function(data,
                       translate_ab = getOption("get_antibiotic_names", "official"),
                       minimum = 30,
-                       as_percent = FALSE) {
+                       as_percent = FALSE,
+                       combine_IR = FALSE) {

  if (!"data.frame" %in% class(data)) {
    stop("`portion_df` must be called on a data.frame")
@ -223,27 +225,43 @@ portion_df <- function(data,
    mutate(Interpretation = "S") %>%
    select(Interpretation, everything())

-  resI <- summarise_if(.tbl = data,
-                       .predicate = is.rsi,
-                       .funs = portion_I,
-                       minimum = minimum,
-                       as_percent = as_percent) %>%
-    mutate(Interpretation = "I") %>%
-    select(Interpretation, everything())
+  if (combine_IR == FALSE) {
+    resI <- summarise_if(.tbl = data,
+                         .predicate = is.rsi,
+                         .funs = portion_I,
+                         minimum = minimum,
+                         as_percent = as_percent) %>%
+      mutate(Interpretation = "I") %>%
+      select(Interpretation, everything())

-  resR <- summarise_if(.tbl = data,
-                       .predicate = is.rsi,
-                       .funs = portion_R,
-                       minimum = minimum,
-                       as_percent = as_percent) %>%
-    mutate(Interpretation = "R") %>%
-    select(Interpretation, everything())
+    resR <- summarise_if(.tbl = data,
+                         .predicate = is.rsi,
+                         .funs = portion_R,
+                         minimum = minimum,
+                         as_percent = as_percent) %>%
+      mutate(Interpretation = "R") %>%
+      select(Interpretation, everything())

-  data.groups <- group_vars(data)
+    data.groups <- group_vars(data)

-  res <- bind_rows(resS, resI, resR) %>%
-    mutate(Interpretation = factor(Interpretation, levels = c("R", "I", "S"), ordered = TRUE)) %>%
-    tidyr::gather(Antibiotic, Value, -Interpretation, -data.groups)
+    res <- bind_rows(resS, resI, resR) %>%
+      mutate(Interpretation = factor(Interpretation, levels = c("R", "I", "S"), ordered = TRUE)) %>%
+      tidyr::gather(Antibiotic, Value, -Interpretation, -data.groups)
+  } else {
+    resIR <- summarise_if(.tbl = data,
+                          .predicate = is.rsi,
+                          .funs = portion_IR,
+                          minimum = minimum,
+                          as_percent = as_percent) %>%
+      mutate(Interpretation = "IR") %>%
+      select(Interpretation, everything())
+
+    data.groups <- group_vars(data)
+
+    res <- bind_rows(resS, resIR) %>%
+      mutate(Interpretation = factor(Interpretation, levels = c("IR", "S"), ordered = TRUE)) %>%
+      tidyr::gather(Antibiotic, Value, -Interpretation, -data.groups)
+  }

  if (!translate_ab == FALSE) {
    if (!tolower(translate_ab) %in% tolower(colnames(AMR::antibiotics))) {
--- a/man/count.Rd
+++ b/man/count.Rd
@ -30,7 +30,7 @@ count_all(...)
 n_rsi(...)

 count_df(data, translate_ab = getOption("get_antibiotic_names",
-  "official"))
+  "official"), combine_IR = FALSE)
 }
 \arguments{
 \item{...}{one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed.}
@ -38,6 +38,8 @@ count_df(data, translate_ab = getOption("get_antibiotic_names",
 \item{data}{a \code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}})}

 \item{translate_ab}{a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations to, using \code{\link{abname}}. This can be set with \code{\link{getOption}("get_antibiotic_names")}.}
+
+\item{combine_IR}{a logical to indicate whether all values of I and R must be merged into one, so the output only consists of S vs. IR (susceptible vs. non-susceptible)}
 }
 \value{
 Integer
--- a/man/portion.Rd
+++ b/man/portion.Rd
@ -26,18 +26,20 @@ portion_SI(..., minimum = 30, as_percent = FALSE)
 portion_S(..., minimum = 30, as_percent = FALSE)

 portion_df(data, translate_ab = getOption("get_antibiotic_names",
-  "official"), minimum = 30, as_percent = FALSE)
+  "official"), minimum = 30, as_percent = FALSE, combine_IR = FALSE)
 }
 \arguments{
 \item{...}{one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed. Use multiple columns to calculate (the lack of) co-resistance: the probability where one of two drugs have a resistant or susceptible result. See Examples.}

-\item{minimum}{minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.}
+\item{minimum}{the minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.}

-\item{as_percent}{logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}.}
+\item{as_percent}{a logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}.}

 \item{data}{a \code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}})}

 \item{translate_ab}{a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations to, using \code{\link{abname}}. This can be set with \code{\link{getOption}("get_antibiotic_names")}.}
+
+\item{combine_IR}{a logical to indicate whether all values of I and R must be merged into one, so the output only consists of S vs. IR (susceptible vs. non-susceptible)}
 }
 \value{
 Double or, when \code{as_percent = TRUE}, a character.
--- a/man/rsi.Rd
+++ b/man/rsi.Rd
@ -12,9 +12,9 @@ rsi(ab1, ab2 = NULL, interpretation = "IR", minimum = 30,

 \item{interpretation}{antimicrobial interpretation to check for}

-\item{minimum}{minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.}
+\item{minimum}{the minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.}

-\item{as_percent}{logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}.}
+\item{as_percent}{a logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}.}

 \item{...}{deprecated parameters to support usage on older versions}
 }
--- a/tests/testthat/test-count.R
+++ b/tests/testthat/test-count.R
@ -27,8 +27,18 @@ test_that("counts work", {
                 pull(combination),
               c(192, 446, 184, 474))

-  expect_equal(septic_patients %>% select(amox, cipr) %>% count_df(translate_ab = "official") %>% nrow(),
-               6)
+  # count_df
+  expect_equal(
+    septic_patients %>% select(amox) %>% count_df() %>% pull(Value),
+    c(septic_patients$amox %>% count_S(),
+      septic_patients$amox %>% count_I(),
+      septic_patients$amox %>% count_R())
+  )
+  expect_equal(
+    septic_patients %>% select(amox) %>% count_df(combine_IR = TRUE) %>% pull(Value),
+    c(septic_patients$amox %>% count_S(),
+      septic_patients$amox %>% count_IR())
+  )

  # warning for speed loss
  expect_warning(count_R(as.character(septic_patients$amcl)))
--- a/tests/testthat/test-portion.R
+++ b/tests/testthat/test-portion.R
@ -117,6 +117,12 @@ test_that("old rsi works", {
      septic_patients$amox %>% portion_I(),
      septic_patients$amox %>% portion_R())
  )
+  expect_equal(
+    septic_patients %>% select(amox) %>% portion_df(combine_IR = TRUE) %>% pull(Value),
+    c(septic_patients$amox %>% portion_S(),
+      septic_patients$amox %>% portion_IR())
+  )
+

 })