removed ratio, better rsi_calc, update for freq

2026-07-14 20:30:59 +02:00 · 2018-08-24 11:08:20 +02:00
parent 0c0e538ef4
commit a100d07da6
13 changed files with 116 additions and 140 deletions
--- a/2
+++ b/2
@@ -1,5 +1,5 @@
 Package: AMR
-Version: 0.3.0.9002
+Version: 0.3.0.9003
 Date: 2018-08-23
 Title: Antimicrobial Resistance Analysis
 Authors@R: c(
--- a/2
+++ b/2
@@ -79,7 +79,6 @@ export(portion_R)
 export(portion_S)
 export(portion_SI)
 export(portion_df)
-export(ratio)
 export(resistance_predict)
 export(right_join_microorganisms)
 export(rsi)
@@ -127,7 +126,6 @@ importFrom(dplyr,arrange)
 importFrom(dplyr,arrange_at)
 importFrom(dplyr,as_tibble)
 importFrom(dplyr,between)
-importFrom(dplyr,bind_cols)
 importFrom(dplyr,bind_rows)
 importFrom(dplyr,case_when)
 importFrom(dplyr,desc)
--- a/NEWS.md
+++ b/NEWS.md
@@ -2,9 +2,12 @@

 #### New
 * Functions `count_R`, `count_IR`, `count_I`, `count_SI` and `count_S` to selectively count resistant or susceptible isolates
+  * New function `count_df` to get all counts of S, I and R of a data set with antibiotic columns, with support for grouped variables
 * Function `is.rsi.eligible` to check for columns that have valid antimicrobial results, but do not have the `rsi` class yet. Transform the columns of your raw data with: `data %>% mutate_if(is.rsi.eligible, as.rsi)`

 #### Changed
+* Removed function `ratio`
+* Fix in `as.mic` for values ending in zeroes after a real number
 * Added parameters `minimum` and `as_percent` to `portion_df`
 * Support for quasiquotation in the functions series `count_*` and `portions_*`, and `n_rsi`. This allows to check for more than 2 vectors or columns.
  * `septic_patients %>% select(amox, cipr) %>% count_R()`
@@ -14,6 +17,19 @@
 * Edited `ggplot_rsi` and `geom_rsi` so they can cope with `count_df`. The new `fun` parameter has value `portion_df` at default, but can be set to `count_df`.
 * Fix for `ggplot_rsi` when the `ggplot2` package was not loaded
 * Added possibility to set any parameter to `geom_rsi` (and `ggplot_rsi`) so you can set your own preferences
+* Support for types list and matrix for `freq`
+  ```r
+  my_matrix = with(septic_patients, matrix(c(age, sex), ncol = 2))
+  freq(my_matrix)
+  ```
+  * Subsetting also possible for lists:
+    ```r
+    my_list = list(age = septic_patients$age, sex = septic_patients$sex)
+    my_list %>% freq(age)
+    ```
+    
+#### Other
+* More unit tests to ensure better integrity of functions

 # 0.3.0 (latest stable version)
 **Published on CRAN: 2018-08-14**
--- a/R/classes.R
+++ b/R/classes.R
@@ -240,10 +240,13 @@ as.mic <- function(x, na.rm = FALSE) {
    # remove all after last digit
    x <- gsub('[^0-9]+$', '', x)
    # remove last zeroes
-    x <- gsub('[.]?0+$', '', x)
+    x <- gsub('([.].?)0+$', '\\1', x)
    # force to be character
    x <- as.character(x)

+    # previously unempty values now empty - should return a warning later on
+    x[x.bak != "" & x == ""] <- "invalid"
+
    # these are alllowed MIC values and will become factor levels
    lvls <- c("<0.002", "<=0.002", "0.002", ">=0.002", ">0.002",
              "<0.003", "<=0.003", "0.003", ">=0.003", ">0.003",
@@ -275,7 +278,7 @@ as.mic <- function(x, na.rm = FALSE) {
              "<0.25", "<=0.25", "0.25", ">=0.25", ">0.25",
              "<0.256", "<=0.256", "0.256", ">=0.256", ">0.256",
              "<0.28", "<=0.28", "0.28", ">=0.28", ">0.28",
-              "<0.30", "<=0.30", "0.30", ">=0.30", ">0.30",
+              "<0.3", "<=0.3", "0.3", ">=0.3", ">0.3",
              "<0.32", "<=0.32", "0.32", ">=0.32", ">0.32",
              "<0.36", "<=0.36", "0.36", ">=0.36", ">0.36",
              "<0.38", "<=0.38", "0.38", ">=0.38", ">0.38",
--- a/R/freq.R
+++ b/R/freq.R
@@ -152,22 +152,39 @@ frequency_tbl <- function(x,

  mult.columns <- 0

+  x.name <- NULL
+  cols <- NULL
+  if (any(class(x) == 'list')) {
+    cols <- names(x)
+    x <- as.data.frame(x, stringsAsFactors = FALSE)
+    x.name <- "a list"
+  } else if (any(class(x) == 'matrix')) {
+    x <- as.data.frame(x, stringsAsFactors = FALSE)
+    x.name <- "a matrix"
+    cols <- colnames(x)
+    if (all(cols %like% 'V[0-9]')) {
+      cols <- NULL
+    }
+  }
+
  if (any(class(x) == 'data.frame')) {
-    x.name <- deparse(substitute(x))
+    if (is.null(x.name)) {
+      x.name <- deparse(substitute(x))
+    }
    if (x.name == ".") {
      x.name <- NULL
    }
    dots <- base::eval(base::substitute(base::alist(...)))
    ndots <- length(dots)

-    if (NROW(x) == 0) {
-      x <- NA
-    } else if (ndots > 0 & ndots < 10) {
+    if (ndots < 10) {
      cols <- as.character(dots)
      if (!all(cols %in% colnames(x))) {
        stop("one or more columns not found: `", paste(cols, collapse = "`, `"), '`', call. = FALSE)
      }
-      x <- x[, cols]
+      if (length(cols) > 0) {
+        x <- x[, cols]
+      }
    } else if (ndots >= 10) {
      stop('A maximum of 9 columns can be analysed at the same time.', call. = FALSE)
    } else {
@@ -298,10 +315,6 @@ frequency_tbl <- function(x,
    header <- header %>% paste0(markdown_line, 'Class:     ', class(x) %>% rev() %>% paste(collapse = " > "))
  }

-  if (is.list(x) | is.matrix(x) | is.environment(x) | is.function(x)) {
-    stop('frequency tables do not support lists, matrices, environments and functions.', call. = FALSE)
-  }
-
  header <- header %>% paste0(markdown_line, '\nLength:    ', (NAs %>% length() + x %>% length()) %>% format(),
                              ' (of which NA: ', NAs %>% length() %>% format(),
                              ' = ', (NAs %>% length() / (NAs %>% length() + x %>% length())) %>% percent(force_zero = TRUE, round = digits) %>% sub('NaN', '0', ., fixed = TRUE), ')')
@@ -485,6 +498,10 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =

  opt <- attr(x, 'opt')

+  if (length(opt$vars) == 0) {
+    opt$vars <- NULL
+  }
+
  if (!is.null(opt$data) & !is.null(opt$vars)) {
    title <- paste0("of `", paste0(opt$vars, collapse = "` and `"), "` from ", opt$data)
  } else if (!is.null(opt$data) & is.null(opt$vars)) {
--- a/R/g.test.R
+++ b/R/g.test.R
@@ -79,11 +79,7 @@
 #' # by a single gene with two co-dominant alleles, you would expect a 1:2:1
 #' # ratio.
 #'
-#' x <- c(772, 1611, 737)
-#' E <- ratio(x, "1:2:1")
-#' E
-#' # 780 1560 780
-#'
+#' x <- c(772, 1611, 737)#'
 #' G <- g.test(x, p = c(1, 2, 1) / 4)
 #' # G$p.value = 0.12574.
 #'
@@ -228,30 +224,3 @@ g.test <- function(x,
                 observed = x, expected = E, residuals = (x - E)/sqrt(E),
                 stdres = (x - E)/sqrt(V)), class = "htest")
 }
-
-#' Transform vector to ratio
-#' @param x vector of values
-#' @param ratio vector with ratios of \code{x} and with same length (like \code{ratio = c(1, 2, 1)}) or a text with characters \code{":"}, \code{"-"} or \code{","} (like \code{ratio = "1:2:1"} or even \code{ratio = "1:2:1.25"})
-#' @export
-#' @seealso \code{\link{g.test}}
-#' @references McDonald, J.H. 2014. \strong{Handbook of Biological Statistics (3rd ed.)}. Sparky House Publishing, Baltimore, Maryland.
-#' @importFrom dplyr %>%
-#' @inherit g.test examples
-ratio <- function(x, ratio) {
-  if (!all(is.numeric(x))) {
-    stop('`x` must be a vector of numeric values.')
-  }
-  if (length(ratio) == 1) {
-    if (ratio %like% '^([0-9]+([.][0-9]+)?[-,:])+[0-9]+([.][0-9]+)?$') {
-      # support for "1:2:1", "1-2-1", "1,2,1" and even "1.75:2:1.5"
-      ratio <- ratio %>% base::strsplit("[-,:]") %>% base::unlist() %>% base::as.double()
-    } else {
-      stop('Invalid `ratio`: ', ratio, '.')
-    }
-  }
-  if (length(x) != length(ratio)) {
-    stop('`x` and `ratio` must be of same size.')
-  }
-  base::sum(x, na.rm = TRUE) * (ratio / base::sum(ratio, na.rm = TRUE))
-}
-
--- a/R/rsi_calc.R
+++ b/R/rsi_calc.R
@@ -16,7 +16,7 @@
 # GNU General Public License for more details.                         #
 # ==================================================================== #

-#' @importFrom dplyr %>% bind_cols pull
+#' @importFrom dplyr %>% pull
 rsi_calc <- function(...,
                     type,
                     include_I,
@@ -34,33 +34,36 @@ rsi_calc <- function(...,
    stop('`as_percent` must be logical', call. = FALSE)
  }

-  dots_length <- ...length()
-  dots <- ...elt(1) # it needs this evaluation
-  dots <- rlang::exprs(...) # or this will be a list without actual values
+  dots_df <- ...elt(1) # it needs this evaluation
+  dots <- base::eval(base::substitute(base::alist(...)))
+  ndots <- length(dots)

-  if ("data.frame" %in% class(dots[[1]]) & dots_length > 1) {
-    # data.frame passed with other columns, like:
-    #   septic_patients %>% portion_S(amcl, gent)
-    df <- dots[[1]]
-    dots_df <- data.frame(col1 = df[,1])
-    for (i in 2:dots_length) {
-      dots_col <- as.character(dots[[i]])
-      if (!dots_col %in% colnames(df)) {
-        stop("variable not found: ", dots_col)
-      }
-      dots_df <- dots_df %>% bind_cols(data.frame(df %>% pull(dots_col)))
+ if ("data.frame" %in% class(dots_df)) {
+   # data.frame passed with other columns, like:
+   #   septic_patients %>% portion_S(amcl, gent)
+   dots <- as.character(dots)
+   dots <- dots[dots != "."]
+    if (length(dots) == 0 | all(dots == "df")) {
+      # for complete data.frames, like septic_patients %>% select(amcl, gent) %>% portion_S()
+      # and the old rsi function, that has "df" as name of the first parameter
+      x <- dots_df
+    } else {
+      x <- dots_df[, dots]
    }
-    x <- dots_df[, -1]
-  } else if (dots_length == 1) {
-    # only 1 variable passed (count also be data.frame), like:
+  } else if (ndots == 1) {
+    # only 1 variable passed (can also be data.frame), like:
    #   portion_S(septic_patients$amcl)
    #   septic_patients$amcl %>% portion_S()
-    x <- dots[[1]]
+    x <- dots_df
  } else {
    # multiple variables passed without pipe, like:
    #   portion_S(septic_patients$amcl, septic_patients$gent)
-    #   with(septic_patients, portion_S(amcl, gent))
-    x <- as.data.frame(rlang::list2(...))
+    x <- NULL
+    try(x <- as.data.frame(dots), silent = TRUE)
+    if (is.null(x)) {
+      # support for: with(septic_patients, portion_S(amcl, gent))
+      x <- as.data.frame(rlang::list2(...))
+    }
  }

  print_warning <- FALSE
--- a/man/g.test.Rd
+++ b/man/g.test.Rd
@@ -110,11 +110,7 @@ If there are more than two categories and you want to find out which ones are si
 # by a single gene with two co-dominant alleles, you would expect a 1:2:1
 # ratio.

-x <- c(772, 1611, 737)
-E <- ratio(x, "1:2:1")
-E
-# 780 1560 780
-
+x <- c(772, 1611, 737)#'
 G <- g.test(x, p = c(1, 2, 1) / 4)
 # G$p.value = 0.12574.

--- a/man/ratio.Rd
+++ b/man/ratio.Rd
@@ -1,60 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/g.test.R
-\name{ratio}
-\alias{ratio}
-\title{Transform vector to ratio}
-\usage{
-ratio(x, ratio)
-}
-\arguments{
-\item{x}{vector of values}
-
-\item{ratio}{vector with ratios of \code{x} and with same length (like \code{ratio = c(1, 2, 1)}) or a text with characters \code{":"}, \code{"-"} or \code{","} (like \code{ratio = "1:2:1"} or even \code{ratio = "1:2:1.25"})}
-}
-\description{
-Transform vector to ratio
-}
-\examples{
-# = EXAMPLE 1 =
-# Shivrain et al. (2006) crossed clearfield rice (which are resistant
-# to the herbicide imazethapyr) with red rice (which are susceptible to
-# imazethapyr). They then crossed the hybrid offspring and examined the
-# F2 generation, where they found 772 resistant plants, 1611 moderately
-# resistant plants, and 737 susceptible plants. If resistance is controlled
-# by a single gene with two co-dominant alleles, you would expect a 1:2:1
-# ratio.
-
-x <- c(772, 1611, 737)
-E <- ratio(x, "1:2:1")
-E
-# 780 1560 780
-
-G <- g.test(x, p = c(1, 2, 1) / 4)
-# G$p.value = 0.12574.
-
-# There is no significant difference from a 1:2:1 ratio.
-# Meaning: resistance controlled by a single gene with two co-dominant
-# alleles, is plausible.
-
-
-# = EXAMPLE 2 =
-# Red crossbills (Loxia curvirostra) have the tip of the upper bill either
-# right or left of the lower bill, which helps them extract seeds from pine
-# cones. Some have hypothesized that frequency-dependent selection would
-# keep the number of right and left-billed birds at a 1:1 ratio. Groth (1992)
-# observed 1752 right-billed and 1895 left-billed crossbills.
-
-x <- c(1752, 1895)
-g.test(x)
-# p = 0.01787343
-
-# There is a significant difference from a 1:1 ratio.
-# Meaning: there are significantly more left-billed birds.
-
-}
-\references{
-McDonald, J.H. 2014. \strong{Handbook of Biological Statistics (3rd ed.)}. Sparky House Publishing, Baltimore, Maryland.
-}
-\seealso{
-\code{\link{g.test}}
-}
--- a/tests/testthat/test-classes.R
+++ b/tests/testthat/test-classes.R
@@ -28,9 +28,15 @@ test_that("mic works", {
  expect_true(is.mic(as.mic(8)))

  expect_equal(as.double(as.mic(">=32")), 32)
+  expect_equal(as.numeric(as.mic(">=32")), 32)
  expect_equal(as.integer(as.mic(">=32")), 32)
  expect_equal(suppressWarnings(as.logical(as.mic("INVALID VALUE"))), NA)

+  # all levels should be valid MICs
+  expect_silent(as.mic(levels(as.mic(1))))
+
+  expect_warning(as.mic("INVALID VALUE"))
+
  # print plots, should not raise errors
  barplot(as.mic(c(1, 2, 4, 8)))
  plot(as.mic(c(1, 2, 4, 8)))
--- a/tests/testthat/test-freq.R
+++ b/tests/testthat/test-freq.R
@@ -1,6 +1,8 @@
 context("freq.R")

 test_that("frequency table works", {
+  library(dplyr)
+
  expect_equal(nrow(freq(c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5))), 5)
  expect_equal(nrow(frequency_tbl(c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5))), 5)

@@ -9,6 +11,7 @@ test_that("frequency table works", {
  expect_equal(nrow(freq(septic_patients$date)),
               length(unique(septic_patients$date)))

+  expect_output(print(septic_patients %>% freq(age, nmax = Inf)))
  expect_output(print(freq(septic_patients$age, nmax = Inf)))
  expect_output(print(freq(septic_patients$age, nmax = NA)))
  expect_output(print(freq(septic_patients$age, nmax = NULL)))
@@ -31,7 +34,13 @@ test_that("frequency table works", {
  # rsi
  expect_output(print(freq(septic_patients$amcl)))
  # hms
-  expect_output(print(freq(hms::as.hms(sample(c(0:86399), 50)))))
+  expect_output(suppressWarnings(print(freq(hms::as.hms(sample(c(0:86399), 50))))))
+  # matrix
+  expect_output(print(freq(as.matrix(septic_patients$age))))
+  expect_output(print(freq(as.matrix(septic_patients[, c("age", "sex")]))))
+  # list
+  expect_output(print(freq(list(age = septic_patients$age))))
+  expect_output(print(freq(list(age = septic_patients$age, sex = septic_patients$sex))))

  library(dplyr)
  expect_output(septic_patients %>% select(1:2) %>% freq() %>% print())
@@ -94,5 +103,11 @@ test_that("frequency table works", {
                     class() %>%
                     .[1],
                   "tbl_df")
+
+  expect_error(septic_patients %>% freq(nonexisting))
+  expect_error(septic_patients %>% select(1:10) %>% freq())
+  expect_error(septic_patients %>% freq(peni, oxac, clox, amox, amcl,
+                                        ampi, pita, czol, cfep, cfur))
+
 })

--- a/tests/testthat/test-g.test.R
+++ b/tests/testthat/test-g.test.R
@@ -6,24 +6,30 @@ test_that("G-test works", {

  # example 1: clearfield rice vs. red rice
  x <- c(772, 1611, 737)
-  x.expected <- ratio(x, ratio = "1:2:1")
  expect_equal(g.test(x, p = c(0.25, 0.50, 0.25))$p.value,
               expected = 0.12574,
               tolerance = 0.00001)

  # example 2: red crossbills
  x <- c(1752, 1895)
-  x.expected <- ratio(x, c(1, 1))
  expect_equal(g.test(x)$p.value,
               expected = 0.01787343,
               tolerance = 0.00000001)

+  expect_error(g.test(0))
+  expect_error(g.test(c(0, 1), 0))
+  expect_error(g.test(c(1, 2, 3, 4), p = c(0.25, 0.25)))
+  expect_error(g.test(c(1, 2, 3, 4), p = c(0.25, 0.25, 0.25, 0.24)))
+  expect_warning(g.test(c(1, 2, 3, 4), p = c(0.25, 0.25, 0.25, 0.24), rescale.p = TRUE))

  # INDEPENDENCE

-  x <- matrix(data = round(runif(4) * 100000, 0),
-              ncol = 2,
-              byrow = TRUE)
+  x <- as.data.frame(
+    matrix(data = round(runif(4) * 100000, 0),
+           ncol = 2,
+
+           byrow = TRUE)
+  )
  expect_lt(g.test(x)$p.value,
            1)

@@ -31,4 +37,7 @@ test_that("G-test works", {
                        y = c(780, 1560, 780),
                        rescale.p = TRUE))

+  expect_error(g.test(matrix(data = c(-1, -2, -3 , -4), ncol = 2, byrow = TRUE)))
+  expect_error(g.test(matrix(data = c(0, 0, 0, 0), ncol = 2, byrow = TRUE)))
+
 })
--- a/tests/testthat/test-mdro.R
+++ b/tests/testthat/test-mdro.R
@@ -4,11 +4,12 @@ context("mdro.R")
 test_that("MDRO works", {
  library(dplyr)

-  outcome <- MDRO(septic_patients, "EUCAST", info = FALSE)
+  outcome <- suppressWarnings(MDRO(septic_patients, "EUCAST", info = TRUE))
+  outcome <- suppressWarnings(EUCAST_exceptional_phenotypes(septic_patients, info = TRUE))
  # check class
  expect_equal(outcome %>% class(), c('ordered', 'factor'))

-  outcome <- MDRO(septic_patients, "nl", info = FALSE)
+  outcome <- suppressWarnings(MDRO(septic_patients, "nl", info = TRUE))
  # check class
  expect_equal(outcome %>% class(), c('ordered', 'factor'))

@@ -18,4 +19,7 @@ test_that("MDRO works", {

  expect_equal(BRMO(septic_patients, info = FALSE), MDRO(septic_patients, "nl", info = FALSE))

+  # still working on German guidelines
+  expect_error(suppressWarnings(MRGN(septic_patients, info = TRUE)))
+
 })