From a100d07da6fe72431c0b84ae58f48642369ee4c2 Mon Sep 17 00:00:00 2001 From: "Matthijs S. Berends" Date: Fri, 24 Aug 2018 11:08:20 +0200 Subject: [PATCH] removed ratio, better rsi_calc, update for freq --- DESCRIPTION | 2 +- NAMESPACE | 2 -- NEWS.md | 16 ++++++++++ R/classes.R | 7 ++-- R/freq.R | 35 ++++++++++++++------ R/g.test.R | 33 +------------------ R/rsi_calc.R | 45 ++++++++++++++------------ man/g.test.Rd | 6 +--- man/ratio.Rd | 60 ----------------------------------- tests/testthat/test-classes.R | 6 ++++ tests/testthat/test-freq.R | 17 +++++++++- tests/testthat/test-g.test.R | 19 ++++++++--- tests/testthat/test-mdro.R | 8 +++-- 13 files changed, 116 insertions(+), 140 deletions(-) delete mode 100644 man/ratio.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 2889c2b7..389458ea 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: AMR -Version: 0.3.0.9002 +Version: 0.3.0.9003 Date: 2018-08-23 Title: Antimicrobial Resistance Analysis Authors@R: c( diff --git a/NAMESPACE b/NAMESPACE index 89462f30..32499f4a 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -79,7 +79,6 @@ export(portion_R) export(portion_S) export(portion_SI) export(portion_df) -export(ratio) export(resistance_predict) export(right_join_microorganisms) export(rsi) @@ -127,7 +126,6 @@ importFrom(dplyr,arrange) importFrom(dplyr,arrange_at) importFrom(dplyr,as_tibble) importFrom(dplyr,between) -importFrom(dplyr,bind_cols) importFrom(dplyr,bind_rows) importFrom(dplyr,case_when) importFrom(dplyr,desc) diff --git a/NEWS.md b/NEWS.md index ede6b632..24176ff9 100755 --- a/NEWS.md +++ b/NEWS.md @@ -2,9 +2,12 @@ #### New * Functions `count_R`, `count_IR`, `count_I`, `count_SI` and `count_S` to selectively count resistant or susceptible isolates + * New function `count_df` to get all counts of S, I and R of a data set with antibiotic columns, with support for grouped variables * Function `is.rsi.eligible` to check for columns that have valid antimicrobial results, but do not have the `rsi` class yet. Transform the columns of your raw data with: `data %>% mutate_if(is.rsi.eligible, as.rsi)` #### Changed +* Removed function `ratio` +* Fix in `as.mic` for values ending in zeroes after a real number * Added parameters `minimum` and `as_percent` to `portion_df` * Support for quasiquotation in the functions series `count_*` and `portions_*`, and `n_rsi`. This allows to check for more than 2 vectors or columns. * `septic_patients %>% select(amox, cipr) %>% count_R()` @@ -14,6 +17,19 @@ * Edited `ggplot_rsi` and `geom_rsi` so they can cope with `count_df`. The new `fun` parameter has value `portion_df` at default, but can be set to `count_df`. * Fix for `ggplot_rsi` when the `ggplot2` package was not loaded * Added possibility to set any parameter to `geom_rsi` (and `ggplot_rsi`) so you can set your own preferences +* Support for types list and matrix for `freq` + ```r + my_matrix = with(septic_patients, matrix(c(age, sex), ncol = 2)) + freq(my_matrix) + ``` + * Subsetting also possible for lists: + ```r + my_list = list(age = septic_patients$age, sex = septic_patients$sex) + my_list %>% freq(age) + ``` + +#### Other +* More unit tests to ensure better integrity of functions # 0.3.0 (latest stable version) **Published on CRAN: 2018-08-14** diff --git a/R/classes.R b/R/classes.R index 301e91df..3fc42974 100755 --- a/R/classes.R +++ b/R/classes.R @@ -240,10 +240,13 @@ as.mic <- function(x, na.rm = FALSE) { # remove all after last digit x <- gsub('[^0-9]+$', '', x) # remove last zeroes - x <- gsub('[.]?0+$', '', x) + x <- gsub('([.].?)0+$', '\\1', x) # force to be character x <- as.character(x) + # previously unempty values now empty - should return a warning later on + x[x.bak != "" & x == ""] <- "invalid" + # these are alllowed MIC values and will become factor levels lvls <- c("<0.002", "<=0.002", "0.002", ">=0.002", ">0.002", "<0.003", "<=0.003", "0.003", ">=0.003", ">0.003", @@ -275,7 +278,7 @@ as.mic <- function(x, na.rm = FALSE) { "<0.25", "<=0.25", "0.25", ">=0.25", ">0.25", "<0.256", "<=0.256", "0.256", ">=0.256", ">0.256", "<0.28", "<=0.28", "0.28", ">=0.28", ">0.28", - "<0.30", "<=0.30", "0.30", ">=0.30", ">0.30", + "<0.3", "<=0.3", "0.3", ">=0.3", ">0.3", "<0.32", "<=0.32", "0.32", ">=0.32", ">0.32", "<0.36", "<=0.36", "0.36", ">=0.36", ">0.36", "<0.38", "<=0.38", "0.38", ">=0.38", ">0.38", diff --git a/R/freq.R b/R/freq.R index 6f31e632..15010cc0 100755 --- a/R/freq.R +++ b/R/freq.R @@ -152,22 +152,39 @@ frequency_tbl <- function(x, mult.columns <- 0 + x.name <- NULL + cols <- NULL + if (any(class(x) == 'list')) { + cols <- names(x) + x <- as.data.frame(x, stringsAsFactors = FALSE) + x.name <- "a list" + } else if (any(class(x) == 'matrix')) { + x <- as.data.frame(x, stringsAsFactors = FALSE) + x.name <- "a matrix" + cols <- colnames(x) + if (all(cols %like% 'V[0-9]')) { + cols <- NULL + } + } + if (any(class(x) == 'data.frame')) { - x.name <- deparse(substitute(x)) + if (is.null(x.name)) { + x.name <- deparse(substitute(x)) + } if (x.name == ".") { x.name <- NULL } dots <- base::eval(base::substitute(base::alist(...))) ndots <- length(dots) - if (NROW(x) == 0) { - x <- NA - } else if (ndots > 0 & ndots < 10) { + if (ndots < 10) { cols <- as.character(dots) if (!all(cols %in% colnames(x))) { stop("one or more columns not found: `", paste(cols, collapse = "`, `"), '`', call. = FALSE) } - x <- x[, cols] + if (length(cols) > 0) { + x <- x[, cols] + } } else if (ndots >= 10) { stop('A maximum of 9 columns can be analysed at the same time.', call. = FALSE) } else { @@ -298,10 +315,6 @@ frequency_tbl <- function(x, header <- header %>% paste0(markdown_line, 'Class: ', class(x) %>% rev() %>% paste(collapse = " > ")) } - if (is.list(x) | is.matrix(x) | is.environment(x) | is.function(x)) { - stop('frequency tables do not support lists, matrices, environments and functions.', call. = FALSE) - } - header <- header %>% paste0(markdown_line, '\nLength: ', (NAs %>% length() + x %>% length()) %>% format(), ' (of which NA: ', NAs %>% length() %>% format(), ' = ', (NAs %>% length() / (NAs %>% length() + x %>% length())) %>% percent(force_zero = TRUE, round = digits) %>% sub('NaN', '0', ., fixed = TRUE), ')') @@ -485,6 +498,10 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default = opt <- attr(x, 'opt') + if (length(opt$vars) == 0) { + opt$vars <- NULL + } + if (!is.null(opt$data) & !is.null(opt$vars)) { title <- paste0("of `", paste0(opt$vars, collapse = "` and `"), "` from ", opt$data) } else if (!is.null(opt$data) & is.null(opt$vars)) { diff --git a/R/g.test.R b/R/g.test.R index bd696846..8cb1ebfd 100644 --- a/R/g.test.R +++ b/R/g.test.R @@ -79,11 +79,7 @@ #' # by a single gene with two co-dominant alleles, you would expect a 1:2:1 #' # ratio. #' -#' x <- c(772, 1611, 737) -#' E <- ratio(x, "1:2:1") -#' E -#' # 780 1560 780 -#' +#' x <- c(772, 1611, 737)#' #' G <- g.test(x, p = c(1, 2, 1) / 4) #' # G$p.value = 0.12574. #' @@ -228,30 +224,3 @@ g.test <- function(x, observed = x, expected = E, residuals = (x - E)/sqrt(E), stdres = (x - E)/sqrt(V)), class = "htest") } - -#' Transform vector to ratio -#' @param x vector of values -#' @param ratio vector with ratios of \code{x} and with same length (like \code{ratio = c(1, 2, 1)}) or a text with characters \code{":"}, \code{"-"} or \code{","} (like \code{ratio = "1:2:1"} or even \code{ratio = "1:2:1.25"}) -#' @export -#' @seealso \code{\link{g.test}} -#' @references McDonald, J.H. 2014. \strong{Handbook of Biological Statistics (3rd ed.)}. Sparky House Publishing, Baltimore, Maryland. -#' @importFrom dplyr %>% -#' @inherit g.test examples -ratio <- function(x, ratio) { - if (!all(is.numeric(x))) { - stop('`x` must be a vector of numeric values.') - } - if (length(ratio) == 1) { - if (ratio %like% '^([0-9]+([.][0-9]+)?[-,:])+[0-9]+([.][0-9]+)?$') { - # support for "1:2:1", "1-2-1", "1,2,1" and even "1.75:2:1.5" - ratio <- ratio %>% base::strsplit("[-,:]") %>% base::unlist() %>% base::as.double() - } else { - stop('Invalid `ratio`: ', ratio, '.') - } - } - if (length(x) != length(ratio)) { - stop('`x` and `ratio` must be of same size.') - } - base::sum(x, na.rm = TRUE) * (ratio / base::sum(ratio, na.rm = TRUE)) -} - diff --git a/R/rsi_calc.R b/R/rsi_calc.R index ff6d79db..a89a31ba 100644 --- a/R/rsi_calc.R +++ b/R/rsi_calc.R @@ -16,7 +16,7 @@ # GNU General Public License for more details. # # ==================================================================== # -#' @importFrom dplyr %>% bind_cols pull +#' @importFrom dplyr %>% pull rsi_calc <- function(..., type, include_I, @@ -34,33 +34,36 @@ rsi_calc <- function(..., stop('`as_percent` must be logical', call. = FALSE) } - dots_length <- ...length() - dots <- ...elt(1) # it needs this evaluation - dots <- rlang::exprs(...) # or this will be a list without actual values + dots_df <- ...elt(1) # it needs this evaluation + dots <- base::eval(base::substitute(base::alist(...))) + ndots <- length(dots) - if ("data.frame" %in% class(dots[[1]]) & dots_length > 1) { - # data.frame passed with other columns, like: - # septic_patients %>% portion_S(amcl, gent) - df <- dots[[1]] - dots_df <- data.frame(col1 = df[,1]) - for (i in 2:dots_length) { - dots_col <- as.character(dots[[i]]) - if (!dots_col %in% colnames(df)) { - stop("variable not found: ", dots_col) - } - dots_df <- dots_df %>% bind_cols(data.frame(df %>% pull(dots_col))) + if ("data.frame" %in% class(dots_df)) { + # data.frame passed with other columns, like: + # septic_patients %>% portion_S(amcl, gent) + dots <- as.character(dots) + dots <- dots[dots != "."] + if (length(dots) == 0 | all(dots == "df")) { + # for complete data.frames, like septic_patients %>% select(amcl, gent) %>% portion_S() + # and the old rsi function, that has "df" as name of the first parameter + x <- dots_df + } else { + x <- dots_df[, dots] } - x <- dots_df[, -1] - } else if (dots_length == 1) { - # only 1 variable passed (count also be data.frame), like: + } else if (ndots == 1) { + # only 1 variable passed (can also be data.frame), like: # portion_S(septic_patients$amcl) # septic_patients$amcl %>% portion_S() - x <- dots[[1]] + x <- dots_df } else { # multiple variables passed without pipe, like: # portion_S(septic_patients$amcl, septic_patients$gent) - # with(septic_patients, portion_S(amcl, gent)) - x <- as.data.frame(rlang::list2(...)) + x <- NULL + try(x <- as.data.frame(dots), silent = TRUE) + if (is.null(x)) { + # support for: with(septic_patients, portion_S(amcl, gent)) + x <- as.data.frame(rlang::list2(...)) + } } print_warning <- FALSE diff --git a/man/g.test.Rd b/man/g.test.Rd index b2db1035..c3a5cf9e 100644 --- a/man/g.test.Rd +++ b/man/g.test.Rd @@ -110,11 +110,7 @@ If there are more than two categories and you want to find out which ones are si # by a single gene with two co-dominant alleles, you would expect a 1:2:1 # ratio. -x <- c(772, 1611, 737) -E <- ratio(x, "1:2:1") -E -# 780 1560 780 - +x <- c(772, 1611, 737)#' G <- g.test(x, p = c(1, 2, 1) / 4) # G$p.value = 0.12574. diff --git a/man/ratio.Rd b/man/ratio.Rd deleted file mode 100644 index eae7c8b9..00000000 --- a/man/ratio.Rd +++ /dev/null @@ -1,60 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/g.test.R -\name{ratio} -\alias{ratio} -\title{Transform vector to ratio} -\usage{ -ratio(x, ratio) -} -\arguments{ -\item{x}{vector of values} - -\item{ratio}{vector with ratios of \code{x} and with same length (like \code{ratio = c(1, 2, 1)}) or a text with characters \code{":"}, \code{"-"} or \code{","} (like \code{ratio = "1:2:1"} or even \code{ratio = "1:2:1.25"})} -} -\description{ -Transform vector to ratio -} -\examples{ -# = EXAMPLE 1 = -# Shivrain et al. (2006) crossed clearfield rice (which are resistant -# to the herbicide imazethapyr) with red rice (which are susceptible to -# imazethapyr). They then crossed the hybrid offspring and examined the -# F2 generation, where they found 772 resistant plants, 1611 moderately -# resistant plants, and 737 susceptible plants. If resistance is controlled -# by a single gene with two co-dominant alleles, you would expect a 1:2:1 -# ratio. - -x <- c(772, 1611, 737) -E <- ratio(x, "1:2:1") -E -# 780 1560 780 - -G <- g.test(x, p = c(1, 2, 1) / 4) -# G$p.value = 0.12574. - -# There is no significant difference from a 1:2:1 ratio. -# Meaning: resistance controlled by a single gene with two co-dominant -# alleles, is plausible. - - -# = EXAMPLE 2 = -# Red crossbills (Loxia curvirostra) have the tip of the upper bill either -# right or left of the lower bill, which helps them extract seeds from pine -# cones. Some have hypothesized that frequency-dependent selection would -# keep the number of right and left-billed birds at a 1:1 ratio. Groth (1992) -# observed 1752 right-billed and 1895 left-billed crossbills. - -x <- c(1752, 1895) -g.test(x) -# p = 0.01787343 - -# There is a significant difference from a 1:1 ratio. -# Meaning: there are significantly more left-billed birds. - -} -\references{ -McDonald, J.H. 2014. \strong{Handbook of Biological Statistics (3rd ed.)}. Sparky House Publishing, Baltimore, Maryland. -} -\seealso{ -\code{\link{g.test}} -} diff --git a/tests/testthat/test-classes.R b/tests/testthat/test-classes.R index 08eb2ba1..a0bfe504 100755 --- a/tests/testthat/test-classes.R +++ b/tests/testthat/test-classes.R @@ -28,9 +28,15 @@ test_that("mic works", { expect_true(is.mic(as.mic(8))) expect_equal(as.double(as.mic(">=32")), 32) + expect_equal(as.numeric(as.mic(">=32")), 32) expect_equal(as.integer(as.mic(">=32")), 32) expect_equal(suppressWarnings(as.logical(as.mic("INVALID VALUE"))), NA) + # all levels should be valid MICs + expect_silent(as.mic(levels(as.mic(1)))) + + expect_warning(as.mic("INVALID VALUE")) + # print plots, should not raise errors barplot(as.mic(c(1, 2, 4, 8))) plot(as.mic(c(1, 2, 4, 8))) diff --git a/tests/testthat/test-freq.R b/tests/testthat/test-freq.R index 3df48f24..8e6a0338 100755 --- a/tests/testthat/test-freq.R +++ b/tests/testthat/test-freq.R @@ -1,6 +1,8 @@ context("freq.R") test_that("frequency table works", { + library(dplyr) + expect_equal(nrow(freq(c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5))), 5) expect_equal(nrow(frequency_tbl(c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5))), 5) @@ -9,6 +11,7 @@ test_that("frequency table works", { expect_equal(nrow(freq(septic_patients$date)), length(unique(septic_patients$date))) + expect_output(print(septic_patients %>% freq(age, nmax = Inf))) expect_output(print(freq(septic_patients$age, nmax = Inf))) expect_output(print(freq(septic_patients$age, nmax = NA))) expect_output(print(freq(septic_patients$age, nmax = NULL))) @@ -31,7 +34,13 @@ test_that("frequency table works", { # rsi expect_output(print(freq(septic_patients$amcl))) # hms - expect_output(print(freq(hms::as.hms(sample(c(0:86399), 50))))) + expect_output(suppressWarnings(print(freq(hms::as.hms(sample(c(0:86399), 50)))))) + # matrix + expect_output(print(freq(as.matrix(septic_patients$age)))) + expect_output(print(freq(as.matrix(septic_patients[, c("age", "sex")])))) + # list + expect_output(print(freq(list(age = septic_patients$age)))) + expect_output(print(freq(list(age = septic_patients$age, sex = septic_patients$sex)))) library(dplyr) expect_output(septic_patients %>% select(1:2) %>% freq() %>% print()) @@ -94,5 +103,11 @@ test_that("frequency table works", { class() %>% .[1], "tbl_df") + + expect_error(septic_patients %>% freq(nonexisting)) + expect_error(septic_patients %>% select(1:10) %>% freq()) + expect_error(septic_patients %>% freq(peni, oxac, clox, amox, amcl, + ampi, pita, czol, cfep, cfur)) + }) diff --git a/tests/testthat/test-g.test.R b/tests/testthat/test-g.test.R index bf46f2a3..e89ab809 100644 --- a/tests/testthat/test-g.test.R +++ b/tests/testthat/test-g.test.R @@ -6,24 +6,30 @@ test_that("G-test works", { # example 1: clearfield rice vs. red rice x <- c(772, 1611, 737) - x.expected <- ratio(x, ratio = "1:2:1") expect_equal(g.test(x, p = c(0.25, 0.50, 0.25))$p.value, expected = 0.12574, tolerance = 0.00001) # example 2: red crossbills x <- c(1752, 1895) - x.expected <- ratio(x, c(1, 1)) expect_equal(g.test(x)$p.value, expected = 0.01787343, tolerance = 0.00000001) + expect_error(g.test(0)) + expect_error(g.test(c(0, 1), 0)) + expect_error(g.test(c(1, 2, 3, 4), p = c(0.25, 0.25))) + expect_error(g.test(c(1, 2, 3, 4), p = c(0.25, 0.25, 0.25, 0.24))) + expect_warning(g.test(c(1, 2, 3, 4), p = c(0.25, 0.25, 0.25, 0.24), rescale.p = TRUE)) # INDEPENDENCE - x <- matrix(data = round(runif(4) * 100000, 0), - ncol = 2, - byrow = TRUE) + x <- as.data.frame( + matrix(data = round(runif(4) * 100000, 0), + ncol = 2, + + byrow = TRUE) + ) expect_lt(g.test(x)$p.value, 1) @@ -31,4 +37,7 @@ test_that("G-test works", { y = c(780, 1560, 780), rescale.p = TRUE)) + expect_error(g.test(matrix(data = c(-1, -2, -3 , -4), ncol = 2, byrow = TRUE))) + expect_error(g.test(matrix(data = c(0, 0, 0, 0), ncol = 2, byrow = TRUE))) + }) diff --git a/tests/testthat/test-mdro.R b/tests/testthat/test-mdro.R index f0982e3b..68214b17 100755 --- a/tests/testthat/test-mdro.R +++ b/tests/testthat/test-mdro.R @@ -4,11 +4,12 @@ context("mdro.R") test_that("MDRO works", { library(dplyr) - outcome <- MDRO(septic_patients, "EUCAST", info = FALSE) + outcome <- suppressWarnings(MDRO(septic_patients, "EUCAST", info = TRUE)) + outcome <- suppressWarnings(EUCAST_exceptional_phenotypes(septic_patients, info = TRUE)) # check class expect_equal(outcome %>% class(), c('ordered', 'factor')) - outcome <- MDRO(septic_patients, "nl", info = FALSE) + outcome <- suppressWarnings(MDRO(septic_patients, "nl", info = TRUE)) # check class expect_equal(outcome %>% class(), c('ordered', 'factor')) @@ -18,4 +19,7 @@ test_that("MDRO works", { expect_equal(BRMO(septic_patients, info = FALSE), MDRO(septic_patients, "nl", info = FALSE)) + # still working on German guidelines + expect_error(suppressWarnings(MRGN(septic_patients, info = TRUE))) + })