removed ratio, better rsi_calc, update for freq

This commit is contained in:
dr. M.S. (Matthijs) Berends 2018-08-24 11:08:20 +02:00
parent 0c0e538ef4
commit a100d07da6
13 changed files with 116 additions and 140 deletions

View File

@ -1,5 +1,5 @@
Package: AMR
Version: 0.3.0.9002
Version: 0.3.0.9003
Date: 2018-08-23
Title: Antimicrobial Resistance Analysis
Authors@R: c(

View File

@ -79,7 +79,6 @@ export(portion_R)
export(portion_S)
export(portion_SI)
export(portion_df)
export(ratio)
export(resistance_predict)
export(right_join_microorganisms)
export(rsi)
@ -127,7 +126,6 @@ importFrom(dplyr,arrange)
importFrom(dplyr,arrange_at)
importFrom(dplyr,as_tibble)
importFrom(dplyr,between)
importFrom(dplyr,bind_cols)
importFrom(dplyr,bind_rows)
importFrom(dplyr,case_when)
importFrom(dplyr,desc)

16
NEWS.md
View File

@ -2,9 +2,12 @@
#### New
* Functions `count_R`, `count_IR`, `count_I`, `count_SI` and `count_S` to selectively count resistant or susceptible isolates
* New function `count_df` to get all counts of S, I and R of a data set with antibiotic columns, with support for grouped variables
* Function `is.rsi.eligible` to check for columns that have valid antimicrobial results, but do not have the `rsi` class yet. Transform the columns of your raw data with: `data %>% mutate_if(is.rsi.eligible, as.rsi)`
#### Changed
* Removed function `ratio`
* Fix in `as.mic` for values ending in zeroes after a real number
* Added parameters `minimum` and `as_percent` to `portion_df`
* Support for quasiquotation in the functions series `count_*` and `portions_*`, and `n_rsi`. This allows to check for more than 2 vectors or columns.
* `septic_patients %>% select(amox, cipr) %>% count_R()`
@ -14,6 +17,19 @@
* Edited `ggplot_rsi` and `geom_rsi` so they can cope with `count_df`. The new `fun` parameter has value `portion_df` at default, but can be set to `count_df`.
* Fix for `ggplot_rsi` when the `ggplot2` package was not loaded
* Added possibility to set any parameter to `geom_rsi` (and `ggplot_rsi`) so you can set your own preferences
* Support for types list and matrix for `freq`
```r
my_matrix = with(septic_patients, matrix(c(age, sex), ncol = 2))
freq(my_matrix)
```
* Subsetting also possible for lists:
```r
my_list = list(age = septic_patients$age, sex = septic_patients$sex)
my_list %>% freq(age)
```
#### Other
* More unit tests to ensure better integrity of functions
# 0.3.0 (latest stable version)
**Published on CRAN: 2018-08-14**

View File

@ -240,10 +240,13 @@ as.mic <- function(x, na.rm = FALSE) {
# remove all after last digit
x <- gsub('[^0-9]+$', '', x)
# remove last zeroes
x <- gsub('[.]?0+$', '', x)
x <- gsub('([.].?)0+$', '\\1', x)
# force to be character
x <- as.character(x)
# previously unempty values now empty - should return a warning later on
x[x.bak != "" & x == ""] <- "invalid"
# these are alllowed MIC values and will become factor levels
lvls <- c("<0.002", "<=0.002", "0.002", ">=0.002", ">0.002",
"<0.003", "<=0.003", "0.003", ">=0.003", ">0.003",
@ -275,7 +278,7 @@ as.mic <- function(x, na.rm = FALSE) {
"<0.25", "<=0.25", "0.25", ">=0.25", ">0.25",
"<0.256", "<=0.256", "0.256", ">=0.256", ">0.256",
"<0.28", "<=0.28", "0.28", ">=0.28", ">0.28",
"<0.30", "<=0.30", "0.30", ">=0.30", ">0.30",
"<0.3", "<=0.3", "0.3", ">=0.3", ">0.3",
"<0.32", "<=0.32", "0.32", ">=0.32", ">0.32",
"<0.36", "<=0.36", "0.36", ">=0.36", ">0.36",
"<0.38", "<=0.38", "0.38", ">=0.38", ">0.38",

View File

@ -152,22 +152,39 @@ frequency_tbl <- function(x,
mult.columns <- 0
x.name <- NULL
cols <- NULL
if (any(class(x) == 'list')) {
cols <- names(x)
x <- as.data.frame(x, stringsAsFactors = FALSE)
x.name <- "a list"
} else if (any(class(x) == 'matrix')) {
x <- as.data.frame(x, stringsAsFactors = FALSE)
x.name <- "a matrix"
cols <- colnames(x)
if (all(cols %like% 'V[0-9]')) {
cols <- NULL
}
}
if (any(class(x) == 'data.frame')) {
x.name <- deparse(substitute(x))
if (is.null(x.name)) {
x.name <- deparse(substitute(x))
}
if (x.name == ".") {
x.name <- NULL
}
dots <- base::eval(base::substitute(base::alist(...)))
ndots <- length(dots)
if (NROW(x) == 0) {
x <- NA
} else if (ndots > 0 & ndots < 10) {
if (ndots < 10) {
cols <- as.character(dots)
if (!all(cols %in% colnames(x))) {
stop("one or more columns not found: `", paste(cols, collapse = "`, `"), '`', call. = FALSE)
}
x <- x[, cols]
if (length(cols) > 0) {
x <- x[, cols]
}
} else if (ndots >= 10) {
stop('A maximum of 9 columns can be analysed at the same time.', call. = FALSE)
} else {
@ -298,10 +315,6 @@ frequency_tbl <- function(x,
header <- header %>% paste0(markdown_line, 'Class: ', class(x) %>% rev() %>% paste(collapse = " > "))
}
if (is.list(x) | is.matrix(x) | is.environment(x) | is.function(x)) {
stop('frequency tables do not support lists, matrices, environments and functions.', call. = FALSE)
}
header <- header %>% paste0(markdown_line, '\nLength: ', (NAs %>% length() + x %>% length()) %>% format(),
' (of which NA: ', NAs %>% length() %>% format(),
' = ', (NAs %>% length() / (NAs %>% length() + x %>% length())) %>% percent(force_zero = TRUE, round = digits) %>% sub('NaN', '0', ., fixed = TRUE), ')')
@ -485,6 +498,10 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
opt <- attr(x, 'opt')
if (length(opt$vars) == 0) {
opt$vars <- NULL
}
if (!is.null(opt$data) & !is.null(opt$vars)) {
title <- paste0("of `", paste0(opt$vars, collapse = "` and `"), "` from ", opt$data)
} else if (!is.null(opt$data) & is.null(opt$vars)) {

View File

@ -79,11 +79,7 @@
#' # by a single gene with two co-dominant alleles, you would expect a 1:2:1
#' # ratio.
#'
#' x <- c(772, 1611, 737)
#' E <- ratio(x, "1:2:1")
#' E
#' # 780 1560 780
#'
#' x <- c(772, 1611, 737)#'
#' G <- g.test(x, p = c(1, 2, 1) / 4)
#' # G$p.value = 0.12574.
#'
@ -228,30 +224,3 @@ g.test <- function(x,
observed = x, expected = E, residuals = (x - E)/sqrt(E),
stdres = (x - E)/sqrt(V)), class = "htest")
}
#' Transform vector to ratio
#' @param x vector of values
#' @param ratio vector with ratios of \code{x} and with same length (like \code{ratio = c(1, 2, 1)}) or a text with characters \code{":"}, \code{"-"} or \code{","} (like \code{ratio = "1:2:1"} or even \code{ratio = "1:2:1.25"})
#' @export
#' @seealso \code{\link{g.test}}
#' @references McDonald, J.H. 2014. \strong{Handbook of Biological Statistics (3rd ed.)}. Sparky House Publishing, Baltimore, Maryland.
#' @importFrom dplyr %>%
#' @inherit g.test examples
ratio <- function(x, ratio) {
if (!all(is.numeric(x))) {
stop('`x` must be a vector of numeric values.')
}
if (length(ratio) == 1) {
if (ratio %like% '^([0-9]+([.][0-9]+)?[-,:])+[0-9]+([.][0-9]+)?$') {
# support for "1:2:1", "1-2-1", "1,2,1" and even "1.75:2:1.5"
ratio <- ratio %>% base::strsplit("[-,:]") %>% base::unlist() %>% base::as.double()
} else {
stop('Invalid `ratio`: ', ratio, '.')
}
}
if (length(x) != length(ratio)) {
stop('`x` and `ratio` must be of same size.')
}
base::sum(x, na.rm = TRUE) * (ratio / base::sum(ratio, na.rm = TRUE))
}

View File

@ -16,7 +16,7 @@
# GNU General Public License for more details. #
# ==================================================================== #
#' @importFrom dplyr %>% bind_cols pull
#' @importFrom dplyr %>% pull
rsi_calc <- function(...,
type,
include_I,
@ -34,33 +34,36 @@ rsi_calc <- function(...,
stop('`as_percent` must be logical', call. = FALSE)
}
dots_length <- ...length()
dots <- ...elt(1) # it needs this evaluation
dots <- rlang::exprs(...) # or this will be a list without actual values
dots_df <- ...elt(1) # it needs this evaluation
dots <- base::eval(base::substitute(base::alist(...)))
ndots <- length(dots)
if ("data.frame" %in% class(dots[[1]]) & dots_length > 1) {
# data.frame passed with other columns, like:
# septic_patients %>% portion_S(amcl, gent)
df <- dots[[1]]
dots_df <- data.frame(col1 = df[,1])
for (i in 2:dots_length) {
dots_col <- as.character(dots[[i]])
if (!dots_col %in% colnames(df)) {
stop("variable not found: ", dots_col)
}
dots_df <- dots_df %>% bind_cols(data.frame(df %>% pull(dots_col)))
if ("data.frame" %in% class(dots_df)) {
# data.frame passed with other columns, like:
# septic_patients %>% portion_S(amcl, gent)
dots <- as.character(dots)
dots <- dots[dots != "."]
if (length(dots) == 0 | all(dots == "df")) {
# for complete data.frames, like septic_patients %>% select(amcl, gent) %>% portion_S()
# and the old rsi function, that has "df" as name of the first parameter
x <- dots_df
} else {
x <- dots_df[, dots]
}
x <- dots_df[, -1]
} else if (dots_length == 1) {
# only 1 variable passed (count also be data.frame), like:
} else if (ndots == 1) {
# only 1 variable passed (can also be data.frame), like:
# portion_S(septic_patients$amcl)
# septic_patients$amcl %>% portion_S()
x <- dots[[1]]
x <- dots_df
} else {
# multiple variables passed without pipe, like:
# portion_S(septic_patients$amcl, septic_patients$gent)
# with(septic_patients, portion_S(amcl, gent))
x <- as.data.frame(rlang::list2(...))
x <- NULL
try(x <- as.data.frame(dots), silent = TRUE)
if (is.null(x)) {
# support for: with(septic_patients, portion_S(amcl, gent))
x <- as.data.frame(rlang::list2(...))
}
}
print_warning <- FALSE

View File

@ -110,11 +110,7 @@ If there are more than two categories and you want to find out which ones are si
# by a single gene with two co-dominant alleles, you would expect a 1:2:1
# ratio.
x <- c(772, 1611, 737)
E <- ratio(x, "1:2:1")
E
# 780 1560 780
x <- c(772, 1611, 737)#'
G <- g.test(x, p = c(1, 2, 1) / 4)
# G$p.value = 0.12574.

View File

@ -1,60 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/g.test.R
\name{ratio}
\alias{ratio}
\title{Transform vector to ratio}
\usage{
ratio(x, ratio)
}
\arguments{
\item{x}{vector of values}
\item{ratio}{vector with ratios of \code{x} and with same length (like \code{ratio = c(1, 2, 1)}) or a text with characters \code{":"}, \code{"-"} or \code{","} (like \code{ratio = "1:2:1"} or even \code{ratio = "1:2:1.25"})}
}
\description{
Transform vector to ratio
}
\examples{
# = EXAMPLE 1 =
# Shivrain et al. (2006) crossed clearfield rice (which are resistant
# to the herbicide imazethapyr) with red rice (which are susceptible to
# imazethapyr). They then crossed the hybrid offspring and examined the
# F2 generation, where they found 772 resistant plants, 1611 moderately
# resistant plants, and 737 susceptible plants. If resistance is controlled
# by a single gene with two co-dominant alleles, you would expect a 1:2:1
# ratio.
x <- c(772, 1611, 737)
E <- ratio(x, "1:2:1")
E
# 780 1560 780
G <- g.test(x, p = c(1, 2, 1) / 4)
# G$p.value = 0.12574.
# There is no significant difference from a 1:2:1 ratio.
# Meaning: resistance controlled by a single gene with two co-dominant
# alleles, is plausible.
# = EXAMPLE 2 =
# Red crossbills (Loxia curvirostra) have the tip of the upper bill either
# right or left of the lower bill, which helps them extract seeds from pine
# cones. Some have hypothesized that frequency-dependent selection would
# keep the number of right and left-billed birds at a 1:1 ratio. Groth (1992)
# observed 1752 right-billed and 1895 left-billed crossbills.
x <- c(1752, 1895)
g.test(x)
# p = 0.01787343
# There is a significant difference from a 1:1 ratio.
# Meaning: there are significantly more left-billed birds.
}
\references{
McDonald, J.H. 2014. \strong{Handbook of Biological Statistics (3rd ed.)}. Sparky House Publishing, Baltimore, Maryland.
}
\seealso{
\code{\link{g.test}}
}

View File

@ -28,9 +28,15 @@ test_that("mic works", {
expect_true(is.mic(as.mic(8)))
expect_equal(as.double(as.mic(">=32")), 32)
expect_equal(as.numeric(as.mic(">=32")), 32)
expect_equal(as.integer(as.mic(">=32")), 32)
expect_equal(suppressWarnings(as.logical(as.mic("INVALID VALUE"))), NA)
# all levels should be valid MICs
expect_silent(as.mic(levels(as.mic(1))))
expect_warning(as.mic("INVALID VALUE"))
# print plots, should not raise errors
barplot(as.mic(c(1, 2, 4, 8)))
plot(as.mic(c(1, 2, 4, 8)))

View File

@ -1,6 +1,8 @@
context("freq.R")
test_that("frequency table works", {
library(dplyr)
expect_equal(nrow(freq(c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5))), 5)
expect_equal(nrow(frequency_tbl(c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5))), 5)
@ -9,6 +11,7 @@ test_that("frequency table works", {
expect_equal(nrow(freq(septic_patients$date)),
length(unique(septic_patients$date)))
expect_output(print(septic_patients %>% freq(age, nmax = Inf)))
expect_output(print(freq(septic_patients$age, nmax = Inf)))
expect_output(print(freq(septic_patients$age, nmax = NA)))
expect_output(print(freq(septic_patients$age, nmax = NULL)))
@ -31,7 +34,13 @@ test_that("frequency table works", {
# rsi
expect_output(print(freq(septic_patients$amcl)))
# hms
expect_output(print(freq(hms::as.hms(sample(c(0:86399), 50)))))
expect_output(suppressWarnings(print(freq(hms::as.hms(sample(c(0:86399), 50))))))
# matrix
expect_output(print(freq(as.matrix(septic_patients$age))))
expect_output(print(freq(as.matrix(septic_patients[, c("age", "sex")]))))
# list
expect_output(print(freq(list(age = septic_patients$age))))
expect_output(print(freq(list(age = septic_patients$age, sex = septic_patients$sex))))
library(dplyr)
expect_output(septic_patients %>% select(1:2) %>% freq() %>% print())
@ -94,5 +103,11 @@ test_that("frequency table works", {
class() %>%
.[1],
"tbl_df")
expect_error(septic_patients %>% freq(nonexisting))
expect_error(septic_patients %>% select(1:10) %>% freq())
expect_error(septic_patients %>% freq(peni, oxac, clox, amox, amcl,
ampi, pita, czol, cfep, cfur))
})

View File

@ -6,24 +6,30 @@ test_that("G-test works", {
# example 1: clearfield rice vs. red rice
x <- c(772, 1611, 737)
x.expected <- ratio(x, ratio = "1:2:1")
expect_equal(g.test(x, p = c(0.25, 0.50, 0.25))$p.value,
expected = 0.12574,
tolerance = 0.00001)
# example 2: red crossbills
x <- c(1752, 1895)
x.expected <- ratio(x, c(1, 1))
expect_equal(g.test(x)$p.value,
expected = 0.01787343,
tolerance = 0.00000001)
expect_error(g.test(0))
expect_error(g.test(c(0, 1), 0))
expect_error(g.test(c(1, 2, 3, 4), p = c(0.25, 0.25)))
expect_error(g.test(c(1, 2, 3, 4), p = c(0.25, 0.25, 0.25, 0.24)))
expect_warning(g.test(c(1, 2, 3, 4), p = c(0.25, 0.25, 0.25, 0.24), rescale.p = TRUE))
# INDEPENDENCE
x <- matrix(data = round(runif(4) * 100000, 0),
ncol = 2,
byrow = TRUE)
x <- as.data.frame(
matrix(data = round(runif(4) * 100000, 0),
ncol = 2,
byrow = TRUE)
)
expect_lt(g.test(x)$p.value,
1)
@ -31,4 +37,7 @@ test_that("G-test works", {
y = c(780, 1560, 780),
rescale.p = TRUE))
expect_error(g.test(matrix(data = c(-1, -2, -3 , -4), ncol = 2, byrow = TRUE)))
expect_error(g.test(matrix(data = c(0, 0, 0, 0), ncol = 2, byrow = TRUE)))
})

View File

@ -4,11 +4,12 @@ context("mdro.R")
test_that("MDRO works", {
library(dplyr)
outcome <- MDRO(septic_patients, "EUCAST", info = FALSE)
outcome <- suppressWarnings(MDRO(septic_patients, "EUCAST", info = TRUE))
outcome <- suppressWarnings(EUCAST_exceptional_phenotypes(septic_patients, info = TRUE))
# check class
expect_equal(outcome %>% class(), c('ordered', 'factor'))
outcome <- MDRO(septic_patients, "nl", info = FALSE)
outcome <- suppressWarnings(MDRO(septic_patients, "nl", info = TRUE))
# check class
expect_equal(outcome %>% class(), c('ordered', 'factor'))
@ -18,4 +19,7 @@ test_that("MDRO works", {
expect_equal(BRMO(septic_patients, info = FALSE), MDRO(septic_patients, "nl", info = FALSE))
# still working on German guidelines
expect_error(suppressWarnings(MRGN(septic_patients, info = TRUE)))
})