From b6655f6454f6c2f6d2674dec8442aa929175db5a Mon Sep 17 00:00:00 2001 From: "Matthijs S. Berends" Date: Fri, 13 Jul 2018 17:23:46 +0200 Subject: [PATCH] Welcome C++! --- .gitignore | 7 + DESCRIPTION | 6 +- NAMESPACE | 10 +- NEWS.md | 9 +- R/RcppExports.R | 15 + R/classes.R | 2 + R/g.test.R | 75 ++-- R/{rsi_analysis.R => resistance.R} | 408 +++++++----------- R/zzz.R | 4 + README.md | 4 +- man/as.mic.Rd | 1 + man/as.rsi.Rd | 1 + man/g.test.Rd | 18 +- man/resistance.Rd | 108 +++++ man/{rsi_predict.Rd => resistance_predict.Rd} | 12 +- man/rsi.Rd | 96 ----- src/RcppExports.cpp | 54 +++ src/rsi_calc.cpp | 29 ++ ...{test-rsi_analysis.R => test-resistance.R} | 45 +- 19 files changed, 469 insertions(+), 435 deletions(-) create mode 100644 R/RcppExports.R rename R/{rsi_analysis.R => resistance.R} (52%) create mode 100644 man/resistance.Rd rename man/{rsi_predict.Rd => resistance_predict.Rd} (88%) mode change 100755 => 100644 delete mode 100755 man/rsi.Rd create mode 100644 src/RcppExports.cpp create mode 100644 src/rsi_calc.cpp rename tests/testthat/{test-rsi_analysis.R => test-resistance.R} (61%) diff --git a/.gitignore b/.gitignore index d7381655..a431e20d 100755 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,10 @@ AMR.Rproj tests/testthat/Rplots.pdf inst/doc +/src/*.o +/src/*.o-* +/src/*.d +/src/*.so +*.dll +vignettes/*.R +.DS_Store diff --git a/DESCRIPTION b/DESCRIPTION index e665f88a..bad4438d 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 0.2.0.9010 -Date: 2018-07-11 +Version: 0.2.0.9011 +Date: 2018-07-13 Title: Antimicrobial Resistance Analysis Authors@R: c( person( @@ -35,6 +35,7 @@ Imports: reshape2 (>= 1.4.0), xml2 (>= 1.0.0), knitr (>= 1.0.0), + Rcpp (>= 0.12.17), readr, rvest (>= 0.3.2), tibble @@ -44,6 +45,7 @@ Suggests: rmarkdown, rstudioapi, tidyr +LinkingTo: Rcpp VignetteBuilder: knitr URL: https://github.com/msberends/AMR BugReports: https://github.com/msberends/AMR/issues diff --git a/NAMESPACE b/NAMESPACE index af4a241a..5787295f 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -57,12 +57,14 @@ export(mo_property) export(n_rsi) export(p.symbol) export(ratio) +export(resistance) +export(resistance_predict) export(right_join_microorganisms) export(rsi) -export(rsi_df) export(rsi_predict) export(semi_join_microorganisms) export(skewness) +export(susceptibility) export(top_freq) exportMethods(as.data.frame.frequency_tbl) exportMethods(as.double.mic) @@ -89,20 +91,18 @@ exportMethods(skewness.default) exportMethods(skewness.matrix) exportMethods(summary.mic) exportMethods(summary.rsi) +importFrom(Rcpp,evalCpp) importFrom(broom,tidy) importFrom(clipr,read_clip_tbl) importFrom(clipr,write_clip) importFrom(curl,nslookup) importFrom(dplyr,"%>%") -importFrom(dplyr,all_vars) -importFrom(dplyr,any_vars) importFrom(dplyr,arrange) importFrom(dplyr,arrange_at) importFrom(dplyr,as_tibble) importFrom(dplyr,between) importFrom(dplyr,desc) importFrom(dplyr,filter) -importFrom(dplyr,filter_at) importFrom(dplyr,group_by) importFrom(dplyr,group_by_at) importFrom(dplyr,if_else) @@ -118,7 +118,6 @@ importFrom(dplyr,slice) importFrom(dplyr,summarise) importFrom(dplyr,tibble) importFrom(dplyr,top_n) -importFrom(dplyr,vars) importFrom(grDevices,boxplot.stats) importFrom(graphics,axis) importFrom(graphics,barplot) @@ -148,3 +147,4 @@ importFrom(utils,packageDescription) importFrom(utils,read.delim) importFrom(utils,write.table) importFrom(xml2,read_html) +useDynLib(AMR, .registration = TRUE) diff --git a/NEWS.md b/NEWS.md index 65c72b55..297e79a2 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,14 +1,15 @@ # 0.2.0.90xx (development version) #### New +* **BREAKING**: `rsi_df` was removed in favour of new functions `resistance` and `susceptibility`. Now, all functions used to calculate resistance (`resistance` and `susceptibility`) or count isolates (`n_rsi`) use **hybrid evaluation**. This means calculations are not done in R directly but rather in C++ using the `Rcpp` package, making them 60 to 65 times faster. The function `rsi` still works, but is deprecated. * Support for Addins menu in RStudio to quickly insert `%in%` or `%like%` (and give them keyboard shortcuts), or to view the datasets that come with this package -* For convience, descriptive statistical functions `kurtosis` and `skewness` that are lacking in base R - they are generic functions and have support for vectors, data.frames and matrices -* Function `g.test` to perform the Χ2 distributed [*G*-test](https://en.wikipedia.org/wiki/G-test), which use is the same as `chisq.test` -* Function `ratio` to transform a vector of values to a preset ratio. For example: +* For convience, new descriptive statistical functions `kurtosis` and `skewness` that are lacking in base R - they are generic functions and have support for vectors, data.frames and matrices +* Function `g.test` as added to perform the Χ2 distributed [*G*-test](https://en.wikipedia.org/wiki/G-test), which use is the same as `chisq.test` +* Function `ratio` was added to transform a vector of values to a preset ratio. For example: ```r ratio(c(772, 1611, 737), ratio = "1:2:1") # [1] 780 1560 780 ``` -* Function `p.symbol` to transform p value to their related symbol: `0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1` +* Function `p.symbol` was added to transform p values to their related symbols: `0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1` * New for frequency tables (function `freq`): * A vignette to explain its usage * Support for `table` to use as input: `freq(table(x, y))` diff --git a/R/RcppExports.R b/R/RcppExports.R new file mode 100644 index 00000000..a8668f34 --- /dev/null +++ b/R/RcppExports.R @@ -0,0 +1,15 @@ +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +rsi_calc_S <- function(x, include_I) { + .Call(`_AMR_rsi_calc_S`, x, include_I) +} + +rsi_calc_R <- function(x, include_I) { + .Call(`_AMR_rsi_calc_R`, x, include_I) +} + +rsi_calc_total <- function(x) { + .Call(`_AMR_rsi_calc_total`, x) +} + diff --git a/R/classes.R b/R/classes.R index fa353cc1..84db671b 100755 --- a/R/classes.R +++ b/R/classes.R @@ -22,6 +22,7 @@ #' @rdname as.rsi #' @param x vector #' @return Ordered factor with new class \code{rsi} and new attributes \code{package} and \code{package.version} +#' @keywords rsi #' @export #' @importFrom dplyr %>% #' @importFrom utils packageDescription @@ -200,6 +201,7 @@ barplot.rsi <- function(height, ...) { #' @param x vector #' @param na.rm a logical indicating whether missing values should be removed #' @return Ordered factor with new class \code{mic} and new attributes \code{package} and \code{package.version} +#' @keywords mic #' @export #' @importFrom dplyr %>% #' @importFrom utils packageDescription diff --git a/R/g.test.R b/R/g.test.R index 3e2366f5..962f383b 100644 --- a/R/g.test.R +++ b/R/g.test.R @@ -24,7 +24,7 @@ #' #' If \code{x} is a matrix with at least two rows and columns, it is taken as a two-dimensional contingency table: the entries of \code{x} must be non-negative integers. Otherwise, \code{x} and \code{y} must be vectors or factors of the same length; cases with missing values are removed, the objects are coerced to factors, and the contingency table is computed from these. Then Pearson's chi-squared test is performed of the null hypothesis that the joint distribution of the cell counts in a 2-dimensional contingency table is the product of the row and column marginals. #' -#' If \code{simulate.p.value} is \code{FALSE}, the p-value is computed from the asymptotic chi-squared distribution of the test statistic. Otherwise the p-value is computed for a Monte Carlo test (Hope, 1968) with \code{B} replicates. +#' The p-value is computed from the asymptotic chi-squared distribution of the test statistic. #' #' In the contingency table case simulation is done by random sampling from the set of all contingency tables with given marginals, and works only if the marginals are strictly positive. Note that this is not the usual sampling situation assumed for a chi-squared test (like the \emph{G}-test) but rather that for Fisher's exact test. #' @@ -61,7 +61,12 @@ #' @keywords chi #' @seealso \code{\link{chisq.test}} #' @references [1] McDonald, J.H. 2014. \strong{Handbook of Biological Statistics (3rd ed.)}. Sparky House Publishing, Baltimore, Maryland. \url{http://www.biostathandbook.com/gtestgof.html}. -#' @source This code is almost perfectly equal to \code{\link{chisq.test}}; only the calculation of the statistic was changed to \code{2 * sum(x * log(x / E))} and Yates' continuity correction was deleted. +#' @source This code is almost identical to \code{\link{chisq.test}}, except that: +#' \itemize{ +#' \item{The calculation of the statistic was changed to \code{2 * sum(x * log(x / E))}} +#' \item{Yates' continuity correction was removed as it does not apply to a \emph{G}-test} +#' \item{The possibility to simulate p values with \code{simulate.p.value} was removed} +#' } #' @export #' @importFrom stats pchisq complete.cases #' @examples @@ -105,9 +110,7 @@ g.test <- function(x, y = NULL, # correct = TRUE, p = rep(1/length(x), length(x)), - rescale.p = FALSE, - simulate.p.value = FALSE, - B = 2000) { + rescale.p = FALSE) { DNAME <- deparse(substitute(x)) if (is.data.frame(x)) x <- as.matrix(x) @@ -141,11 +144,11 @@ g.test <- function(x, stop("all entries of 'x' must be nonnegative and finite") if ((n <- sum(x)) == 0) stop("at least one entry of 'x' must be positive") - if (simulate.p.value) { - setMETH <- function() METHOD <<- paste(METHOD, "with simulated p-value\n\t (based on", - B, "replicates)") - almost.1 <- 1 - 64 * .Machine$double.eps - } + # if (simulate.p.value) { + # setMETH <- function() METHOD <<- paste(METHOD, "with simulated p-value\n\t (based on", + # B, "replicates)") + # almost.1 <- 1 - 64 * .Machine$double.eps + # } if (is.matrix(x)) { METHOD <- "G-test of independence" nr <- as.integer(nrow(x)) @@ -158,17 +161,17 @@ g.test <- function(x, v <- function(r, c, n) c * r * (n - r) * (n - c)/n^3 V <- outer(sr, sc, v, n) dimnames(E) <- dimnames(x) - if (simulate.p.value && all(sr > 0) && all(sc > 0)) { - setMETH() - tmp <- .Call(C_chisq_sim, sr, sc, B, E) - STATISTIC <- 2 * sum(x * log(x / E)) # sum(sort((x - E)^2/E, decreasing = TRUE)) for chisq.test - PARAMETER <- NA - PVAL <- (1 + sum(tmp >= almost.1 * STATISTIC))/(B + - 1) - } - else { - if (simulate.p.value) - warning("cannot compute simulated p-value with zero marginals") + # if (simulate.p.value && all(sr > 0) && all(sc > 0)) { + # setMETH() + # tmp <- .Call(chisq_sim, sr, sc, B, E, PACKAGE = "stats") + # STATISTIC <- 2 * sum(x * log(x / E)) # sum(sort((x - E)^2/E, decreasing = TRUE)) for chisq.test + # PARAMETER <- NA + # PVAL <- (1 + sum(tmp >= almost.1 * STATISTIC))/(B + + # 1) + # } + # else { + # if (simulate.p.value) + # warning("cannot compute simulated p-value with zero marginals") # if (correct && nrow(x) == 2L && ncol(x) == 2L) { # YATES <- min(0.5, abs(x - E)) # if (YATES > 0) @@ -178,7 +181,7 @@ g.test <- function(x, STATISTIC <- 2 * sum(x * log(x / E)) # sum((abs(x - E) - YATES)^2/E) for chisq.test PARAMETER <- (nr - 1L) * (nc - 1L) PVAL <- pchisq(STATISTIC, PARAMETER, lower.tail = FALSE) - } + # } } else { if (length(dim(x)) > 2L) @@ -199,22 +202,22 @@ g.test <- function(x, V <- n * p * (1 - p) STATISTIC <- 2 * sum(x * log(x / E)) # sum((x - E)^2/E) for chisq.test names(E) <- names(x) - if (simulate.p.value) { - setMETH() - nx <- length(x) - sm <- matrix(sample.int(nx, B * n, TRUE, prob = p), - nrow = n) - ss <- apply(sm, 2L, function(x, E, k) { - sum((table(factor(x, levels = 1L:k)) - E)^2/E) - }, E = E, k = nx) - PARAMETER <- NA - PVAL <- (1 + sum(ss >= almost.1 * STATISTIC))/(B + - 1) - } - else { + # if (simulate.p.value) { + # setMETH() + # nx <- length(x) + # sm <- matrix(sample.int(nx, B * n, TRUE, prob = p), + # nrow = n) + # ss <- apply(sm, 2L, function(x, E, k) { + # sum((table(factor(x, levels = 1L:k)) - E)^2/E) + # }, E = E, k = nx) + # PARAMETER <- NA + # PVAL <- (1 + sum(ss >= almost.1 * STATISTIC))/(B + + # 1) + # } + # else { PARAMETER <- length(x) - 1 PVAL <- pchisq(STATISTIC, PARAMETER, lower.tail = FALSE) - } + # } } names(STATISTIC) <- "X-squared" names(PARAMETER) <- "df" diff --git a/R/rsi_analysis.R b/R/resistance.R similarity index 52% rename from R/rsi_analysis.R rename to R/resistance.R index 52f61812..6dfdda56 100755 --- a/R/rsi_analysis.R +++ b/R/resistance.R @@ -16,287 +16,198 @@ # GNU General Public License for more details. # # ==================================================================== # -#' Resistance of isolates +#' Calculate resistance of isolates #' -#' This functions can be used to calculate the (co-)resistance of isolates (i.e. percentage S, SI, I, IR or R [of a vector] of isolates). The functions \code{rsi} and \code{n_rsi} can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}. -#' @param tbl \code{data.frame} containing columns with antibiotic interpretations. -#' @param ab character vector with 1, 2 or 3 antibiotics that occur as column names in \code{tbl}, like \code{ab = c("amox", "amcl")} -#' @param ab1,ab2 vector of antibiotic interpretations, they will be transformed internally with \code{\link{as.rsi}} -#' @param interpretation antimicrobial interpretation of which the portion must be calculated. Valid values are \code{"S"}, \code{"SI"}, \code{"I"}, \code{"IR"} or \code{"R"}. -#' @param minimum minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning (when \code{warning = TRUE}). -#' @param as_percent return output as percent (text), will else (at default) be a double -#' @param info calculate the amount of available isolates and print it, like \code{n = 423} -#' @param warning show a warning when the available amount of isolates is below \code{minimum} -#' @details Remember that you should filter your table to let it contain \strong{only first isolates}! +#' These functions can be used to calculate the (co-)resistance of microbial isolates (i.e. percentage S, SI, I, IR or R). All functions can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}. +#' @param ab,ab1,ab2 vector of antibiotic interpretations, they will be transformed internally with \code{\link{as.rsi}} +#' @param include_I logical to indicate whether antimicrobial interpretations of "I" should be included +#' @param minimum minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA}. +#' @param as_percent logical to indicate whether the output must be returned as percent (text), will else be a double +#' @param interpretation antimicrobial interpretation +#' @details \strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set. +#' +#' All return values are calculated using hybrid evaluation (i.e. using C++), which makes these functions 60-65 times faster than in \code{AMR} v0.2.0 and below. The \code{rsi} function is available for backwards compatibility and deprecated. It now uses the \code{resistance} and \code{susceptibility} functions internally, based on the \code{interpretation} parameter. #' \if{html}{ -#' \cr \cr +#' \cr #' To calculate the probability (\emph{p}) of susceptibility of one antibiotic, we use this formula: #' \out{
}\figure{mono_therapy.png}\out{
} #' To calculate the probability (\emph{p}) of susceptibility of more antibiotics (i.e. combination therapy), we need to check whether one of them has a susceptible result (as numerator) and count all cases where all antibiotics were tested (as denominator). \cr +#' \cr #' For two antibiotics: #' \out{
}\figure{combi_therapy_2.png}\out{
} #' \cr -#' For three antibiotics: +#' Theoretically for three antibiotics: #' \out{
}\figure{combi_therapy_3.png}\out{
} #' } -#' @keywords rsi antibiotics isolate isolates +#' @keywords resistance susceptibility rsi_df antibiotics isolate isolates #' @return Double or, when \code{as_percent = TRUE}, a character. -#' @rdname rsi +#' @rdname resistance #' @export -#' @importFrom dplyr %>% n_distinct filter filter_at pull vars all_vars any_vars #' @examples #' library(dplyr) #' #' septic_patients %>% #' group_by(hospital_id) %>% -#' summarise(cipro_susceptibility = rsi(cipr, interpretation = "S"), +#' summarise(p = susceptibility(cipr), #' n = n_rsi(cipr)) # n_rsi works like n_distinct in dplyr #' #' septic_patients %>% #' group_by(hospital_id) %>% -#' summarise(cipro_S = rsi(cipr, interpretation = "S", -#' as_percent = TRUE, warning = FALSE), +#' summarise(cipro_p = susceptibility(cipr, as_percent = TRUE), #' cipro_n = n_rsi(cipr), -#' genta_S = rsi(gent, interpretation = "S", -#' as_percent = TRUE, warning = FALSE), +#' genta_p = susceptibility(gent, as_percent = TRUE), #' genta_n = n_rsi(gent), -#' combination_S = rsi(cipr, gent, interpretation = "S", -#' as_percent = TRUE, warning = FALSE), +#' combination_p = susceptibility(cipr, gent, as_percent = TRUE), #' combination_n = n_rsi(cipr, gent)) #' -#' # calculate resistance -#' rsi(septic_patients$amox) -#' # or susceptibility -#' rsi(septic_patients$amox, interpretation = "S") #' -#' # calculate co-resistance between amoxicillin/clav acid and gentamicin, -#' # so we can review that combination therapy does a lot more than mono therapy: -#' septic_patients %>% rsi_df(ab = "amcl", interpretation = "S") # = 67.8% -#' septic_patients %>% rsi_df(ab = "gent", interpretation = "S") # = 69.1% -#' septic_patients %>% rsi_df(ab = c("amcl", "gent"), interpretation = "S") # = 90.6% +#' # Calculate resistance +#' resistance(septic_patients$amox) +#' rsi(septic_patients$amox, interpretation = "IR") # deprecated +#' +#' # Or susceptibility +#' susceptibility(septic_patients$amox) +#' rsi(septic_patients$amox, interpretation = "S") # deprecated +#' +#' +#' # Calculate co-resistance between amoxicillin/clav acid and gentamicin, +#' # so we can see that combination therapy does a lot more than mono therapy: +#' susceptibility(septic_patients$amcl) # p = 67.8% +#' n_rsi(septic_patients$amcl) # n = 1641 +#' +#' susceptibility(septic_patients$gent) # p = 69.1% +#' n_rsi(septic_patients$gent) # n = 1863 +#' +#' with(septic_patients, +#' susceptibility(amcl, gent)) # p = 90.6% +#' with(septic_patients, +#' n_rsi(amcl, gent)) # n = 1580 #' #' \dontrun{ #' # calculate current empiric combination therapy of Helicobacter gastritis: #' my_table %>% #' filter(first_isolate == TRUE, #' genus == "Helicobacter") %>% -#' rsi_df(ab = c("amox", "metr")) # amoxicillin with metronidazole +#' summarise(p = susceptibility(amox, metr), # amoxicillin with metronidazole +#' n = n_rsi(amox, metr)) #' } +resistance <- function(ab, + include_I = TRUE, + minimum = 30, + as_percent = FALSE) { + + if (NCOL(ab) > 1) { + stop('`ab` must be a vector of antimicrobial interpretations', call. = FALSE) + } + if (!is.logical(include_I)) { + stop('`include_I` must be logical', call. = FALSE) + } + if (!is.numeric(minimum)) { + stop('`minimum` must be numeric', call. = FALSE) + } + if (!is.logical(as_percent)) { + stop('`as_percent` must be logical', call. = FALSE) + } + + x <- as.integer(as.rsi(ab)) + total <- .Call(`_AMR_rsi_calc_total`, x) + if (total < minimum) { + return(NA) + } + found <- .Call(`_AMR_rsi_calc_R`, x, include_I) + + if (as_percent == TRUE) { + percent(found / total, force_zero = TRUE) + } else { + found / total + } +} + +#' @rdname resistance +#' @export +susceptibility <- function(ab1, + ab2 = NULL, + include_I = FALSE, + minimum = 30, + as_percent = FALSE) { + + if (NCOL(ab1) > 1) { + stop('`ab1` must be a vector of antimicrobial interpretations', call. = FALSE) + } + if (!is.logical(include_I)) { + stop('`include_I` must be logical', call. = FALSE) + } + if (!is.numeric(minimum)) { + stop('`minimum` must be numeric', call. = FALSE) + } + if (!is.logical(as_percent)) { + stop('`as_percent` must be logical', call. = FALSE) + } + + if (!is.null(ab2)) { + if (NCOL(ab2) > 1) { + stop('`ab2` must be a vector of antimicrobial interpretations', call. = FALSE) + } + x <- apply(X = data.frame(ab1 = as.integer(as.rsi(ab1)), + ab2 = as.integer(as.rsi(ab2))), + MARGIN = 1, + FUN = min) + } else { + x <- as.integer(as.rsi(ab1)) + } + total <- .Call(`_AMR_rsi_calc_total`, x) + if (total < minimum) { + return(NA) + } + found <- .Call(`_AMR_rsi_calc_S`, x, include_I) + + if (as_percent == TRUE) { + percent(found / total, force_zero = TRUE) + } else { + found / total + } +} + +#' @rdname resistance +#' @export +n_rsi <- function(ab1, ab2 = NULL) { + if (NCOL(ab1) > 1) { + stop('`ab1` must be a vector of antimicrobial interpretations', call. = FALSE) + } + if (!is.null(ab2)) { + if (NCOL(ab2) > 1) { + stop('`ab2` must be a vector of antimicrobial interpretations', call. = FALSE) + } + x <- apply(X = data.frame(ab1 = as.integer(as.rsi(ab1)), + ab2 = as.integer(as.rsi(ab2))), + MARGIN = 1, + FUN = min) + } else { + x <- as.integer(as.rsi(ab1)) + } + .Call(`_AMR_rsi_calc_total`, x) +} + + +#' @rdname resistance +#' @export rsi <- function(ab1, - ab2 = NA, - interpretation = 'IR', + ab2 = NULL, + interpretation = "IR", minimum = 30, - as_percent = FALSE, - info = FALSE, - warning = TRUE) { - ab1.name <- deparse(substitute(ab1)) - if (ab1.name %like% '.[$].') { - ab1.name <- unlist(strsplit(ab1.name, "$", fixed = TRUE)) - ab1.name <- ab1.name[length(ab1.name)] - } - if (!ab1.name %like% '^[a-z]{3,4}$') { - ab1.name <- 'rsi1' - } - if (length(ab1) == 1 & is.character(ab1)) { - stop('`ab1` must be a vector of antibiotic interpretations.', - '\n Try rsi(', ab1, ', ...) instead of rsi("', ab1, '", ...)', call. = FALSE) - } - ab2.name <- deparse(substitute(ab2)) - if (ab2.name %like% '.[$].') { - ab2.name <- unlist(strsplit(ab2.name, "$", fixed = TRUE)) - ab2.name <- ab2.name[length(ab2.name)] - } - if (!ab2.name %like% '^[a-z]{3,4}$') { - ab2.name <- 'rsi2' - } - if (length(ab2) == 1 & is.character(ab2)) { - stop('`ab2` must be a vector of antibiotic interpretations.', - '\n Try rsi(', ab2, ', ...) instead of rsi("', ab2, '", ...)', call. = FALSE) - } - - interpretation <- paste(interpretation, collapse = "") - - ab1 <- as.rsi(ab1) - ab2 <- as.rsi(ab2) - - tbl <- tibble(rsi1 = ab1, rsi2 = ab2) - colnames(tbl) <- c(ab1.name, ab2.name) - - if (length(ab2) == 1) { - r <- rsi_df(tbl = tbl, - ab = ab1.name, - interpretation = interpretation, - minimum = minimum, - as_percent = FALSE, - info = info, - warning = warning) + as_percent = FALSE) { + warning("'rsi' is deprecated. Use 'resistance' or 'susceptibility' instead.", call. = FALSE) + if (interpretation %in% c('IR', 'RI')) { + resistance(ab = ab1, include_I = TRUE, minimum = minimum, as_percent = as_percent) + } else if (interpretation == 'R') { + resistance(ab = ab1, include_I = FALSE, minimum = minimum, as_percent = as_percent) + } else if (interpretation %in% c('IS', 'SI')) { + susceptibility(ab1 = ab1, ab2 = ab2, include_I = TRUE, minimum = minimum, as_percent = as_percent) + } else if (interpretation == 'S') { + susceptibility(ab1 = ab1, ab2 = ab2, include_I = FALSE, minimum = minimum, as_percent = as_percent) } else { - if (length(ab1) != length(ab2)) { - stop('`ab1` (n = ', length(ab1), ') and `ab2` (n = ', length(ab2), ') must be of same length.', call. = FALSE) - } - if (!interpretation %in% c('S', 'IS', 'SI')) { - warning('`interpretation` not set to S or I/S, albeit analysing a combination therapy.', call. = FALSE) - } - r <- rsi_df(tbl = tbl, - ab = c(ab1.name, ab2.name), - interpretation = interpretation, - minimum = minimum, - as_percent = FALSE, - info = info, - warning = warning) + stop('invalid `interpretation`') } - if (as_percent == TRUE) { - percent(r, force_zero = TRUE) - } else { - r - } -} - -#' @export -#' @rdname rsi -rsi_df <- function(tbl, - ab, - interpretation = 'IR', - minimum = 30, - as_percent = FALSE, - info = TRUE, - warning = TRUE) { - - # in case tbl$interpretation already exists: - interpretations_to_check <- paste(interpretation, collapse = "") - - # validate: - if (min(grepl('^[a-z]{3,4}$', ab)) == 0 & - min(grepl('^rsi[1-2]$', ab)) == 0) { - for (i in 1:length(ab)) { - ab[i] <- paste0('rsi', i) - } - } - if (!grepl('^(S|SI|IS|I|IR|RI|R){1}$', interpretations_to_check)) { - stop('Invalid `interpretation`; must be "S", "SI", "I", "IR", or "R".') - } - if ('is_ic' %in% colnames(tbl)) { - if (n_distinct(tbl$is_ic) > 1 & warning == TRUE) { - warning('Dataset contains isolates from the Intensive Care. Exclude them from proper epidemiological analysis.') - } - } - - # transform when checking for different results - if (interpretations_to_check %in% c('SI', 'IS')) { - for (i in 1:length(ab)) { - tbl[which(tbl[, ab[i]] == 'I'), ab[i]] <- 'S' - } - interpretations_to_check <- 'S' - } - if (interpretations_to_check %in% c('RI', 'IR')) { - for (i in 1:length(ab)) { - tbl[which(tbl[, ab[i]] == 'I'), ab[i]] <- 'R' - } - interpretations_to_check <- 'R' - } - - # get fraction - if (length(ab) == 1) { - numerator <- tbl %>% - filter(pull(., ab[1]) == interpretations_to_check) %>% - nrow() - - denominator <- tbl %>% - filter(pull(., ab[1]) %in% c("S", "I", "R")) %>% - nrow() - - } else if (length(ab) == 2) { - if (interpretations_to_check != 'S') { - warning('`interpretation` not set to S or I/S, albeit analysing a combination therapy.', call. = FALSE) - } - numerator <- tbl %>% - filter_at(vars(ab[1], ab[2]), - any_vars(. == interpretations_to_check)) %>% - filter_at(vars(ab[1], ab[2]), - all_vars(. %in% c("S", "R", "I"))) %>% - nrow() - - denominator <- tbl %>% - filter_at(vars(ab[1], ab[2]), - all_vars(. %in% c("S", "R", "I"))) %>% - nrow() - - } else if (length(ab) == 3) { - if (interpretations_to_check != 'S') { - warning('`interpretation` not set to S or I/S, albeit analysing a combination therapy.', call. = FALSE) - } - numerator <- tbl %>% - filter_at(vars(ab[1], ab[2], ab[3]), - any_vars(. == interpretations_to_check)) %>% - filter_at(vars(ab[1], ab[2], ab[3]), - all_vars(. %in% c("S", "R", "I"))) %>% - nrow() - - denominator <- tbl %>% - filter_at(vars(ab[1], ab[2], ab[3]), - all_vars(. %in% c("S", "R", "I"))) %>% - nrow() - - } else { - stop('Maximum of 3 drugs allowed.') - } - - # build text part - if (info == TRUE) { - cat('n =', denominator) - info.txt1 <- percent(denominator / nrow(tbl)) - if (denominator == 0) { - info.txt1 <- 'none' - } - info.txt2 <- gsub(',', ' and', - ab %>% - abname(tolower = TRUE) %>% - toString(), fixed = TRUE) - info.txt2 <- gsub('rsi1 and rsi2', 'these two drugs', info.txt2, fixed = TRUE) - info.txt2 <- gsub('rsi1', 'this drug', info.txt2, fixed = TRUE) - cat(paste0(' (of ', nrow(tbl), ' in total; ', info.txt1, ' tested on ', info.txt2, ')\n')) - } - - # calculate and format - y <- numerator / denominator - if (as_percent == TRUE) { - y <- percent(y, force_zero = TRUE) - } - - if (denominator < minimum) { - if (warning == TRUE) { - warning(paste0('TOO FEW ISOLATES OF ', toString(ab), ' (n = ', denominator, ', n < ', minimum, '); NO RESULT.')) - } - y <- NA - } - - # output - y -} - -#' @export -#' @rdname rsi -n_rsi <- function(ab1, ab2 = NA) { - - if (length(ab1) == 1 & is.character(ab1)) { - stop('`ab1` must be a vector of antibiotic interpretations.', - '\n Try n_rsi(', ab1, ', ...) instead of n_rsi("', ab1, '", ...)', call. = FALSE) - } - ab1 <- as.rsi(ab1) - - if (length(ab2) == 1 & all(is.na(ab2))) { - # only 1 antibiotic - length(ab1[!is.na(ab1)]) - } else { - if (length(ab2) == 1 & is.character(ab2)) { - stop('`ab2` must be a vector of antibiotic interpretations.', - '\n Try n_rsi(', ab2, ', ...) instead of n_rsi("', ab2, '", ...)', call. = FALSE) - } - ab2 <- as.rsi(ab2) - tbl <- tibble(ab1, ab2) - tbl %>% filter(!is.na(ab1) & !is.na(ab2)) %>% nrow() - } - } #' Predict antimicrobial resistance @@ -312,7 +223,8 @@ n_rsi <- function(ab1, ab2 = NA) { #' @param preserve_measurements overwrite predictions of years that are actually available in the data, with the original data. The standard errors of those years will be \code{NA}. #' @param info print textual analysis with the name and \code{\link{summary}} of the model. #' @return \code{data.frame} with columns \code{year}, \code{probR}, \code{se_min} and \code{se_max}. -#' @seealso \code{\link{lm}} \cr \code{\link{glm}} +#' @seealso \code{\link{resistance}} \cr \code{\link{lm}} \code{\link{glm}} +#' @rdname resistance_predict #' @export #' @importFrom dplyr %>% pull mutate group_by_at summarise filter #' @importFrom reshape2 dcast @@ -354,7 +266,7 @@ n_rsi <- function(ab1, ab2 = NA) { #' year_max = 2025, #' preserve_measurements = FALSE) #' -rsi_predict <- function(tbl, +resistance_predict <- function(tbl, col_ab, col_date, year_max = as.integer(format(as.Date(Sys.Date()), '%Y')) + 15, @@ -491,3 +403,7 @@ rsi_predict <- function(tbl, total } + +#' @rdname resistance_predict +#' @export +rsi_predict <- resistance_predict diff --git a/R/zzz.R b/R/zzz.R index 1460bf58..a04fedce 100755 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,3 +1,7 @@ .onLoad <- function(libname, pkgname) { backports::import(pkgname) } + +#' @importFrom Rcpp evalCpp +#' @useDynLib AMR, .registration = TRUE +NULL diff --git a/README.md b/README.md index c3e31208..d49fb07b 100755 --- a/README.md +++ b/README.md @@ -24,8 +24,8 @@ This R package was created for academic research by PhD students of the Faculty ## Why this package? This R package contains functions to make **microbiological, epidemiological data analysis easier**. It allows the use of some new classes to work with MIC values and antimicrobial interpretations (i.e. values S, I and R). -With `AMR` you can also: -* Conduct AMR analysis with the `rsi` function, that can also be used with the `dplyr` package (e.g. in conjunction with `summarise`) to calculate the resistance percentages (and even co-resistance) of different antibiotic columns of a table +With `AMR` you can: +* Calculate the resistance (and even co-resistance) of microbial isolates with the `resistance` and `susceptibility` functions, that can also be used with the `dplyr` package (e.g. in conjunction with `summarise`). Our functions use expressions that are not evaluated by R, but by alternative C++ code that is dramatically faster and uses less memory. This is called *hybrid evaluation*. * Predict antimicrobial resistance for the nextcoming years with the `rsi_predict` function * Apply [EUCAST rules to isolates](http://www.eucast.org/expert_rules_and_intrinsic_resistance/) with the `EUCAST_rules` function * Identify first isolates of every patient [using guidelines from the CLSI](https://clsi.org/standards/products/microbiology/documents/m39/) (Clinical and Laboratory Standards Institute) with the `first_isolate` function diff --git a/man/as.mic.Rd b/man/as.mic.Rd index 448c0aa2..4cf77e34 100755 --- a/man/as.mic.Rd +++ b/man/as.mic.Rd @@ -30,3 +30,4 @@ as.mic("<=0.002; R") # will return <=0.002 plot(mic_data) barplot(mic_data) } +\keyword{mic} diff --git a/man/as.rsi.Rd b/man/as.rsi.Rd index d9d9a641..61aab991 100755 --- a/man/as.rsi.Rd +++ b/man/as.rsi.Rd @@ -29,3 +29,4 @@ as.rsi("<= 0.002; R") # will return R plot(rsi_data) # for percentages barplot(rsi_data) # for frequencies } +\keyword{rsi} diff --git a/man/g.test.Rd b/man/g.test.Rd index a21d8abe..1a425e87 100644 --- a/man/g.test.Rd +++ b/man/g.test.Rd @@ -4,11 +4,15 @@ \alias{g.test} \title{\emph{G}-test for Count Data} \source{ -This code is almost perfectly equal to \code{\link{chisq.test}}; only the calculation of the statistic was changed to \code{2 * sum(x * log(x / E))} and Yates' continuity correction was deleted. +This code is almost identical to \code{\link{chisq.test}}, except that: +\itemize{ + \item{The calculation of the statistic was changed to \code{2 * sum(x * log(x / E))}} + \item{Yates' continuity correction was removed as it does not apply to a \emph{G}-test} + \item{The possibility to simulate p values with \code{simulate.p.value} was removed} +} } \usage{ -g.test(x, y = NULL, p = rep(1/length(x), length(x)), rescale.p = FALSE, - simulate.p.value = FALSE, B = 2000) +g.test(x, y = NULL, p = rep(1/length(x), length(x)), rescale.p = FALSE) } \arguments{ \item{x}{a numeric vector or matrix. \code{x} and \code{y} can also @@ -23,12 +27,6 @@ g.test(x, y = NULL, p = rep(1/length(x), length(x)), rescale.p = FALSE, \item{rescale.p}{a logical scalar; if TRUE then \code{p} is rescaled (if necessary) to sum to 1. If \code{rescale.p} is FALSE, and \code{p} does not sum to 1, an error is given.} - -\item{simulate.p.value}{a logical indicating whether to compute - p-values by Monte Carlo simulation.} - -\item{B}{an integer specifying the number of replicates used in the - Monte Carlo test.} } \value{ A list with class \code{"htest"} containing the following @@ -58,7 +56,7 @@ If \code{x} is a matrix with one row or column, or if \code{x} is a vector and \ If \code{x} is a matrix with at least two rows and columns, it is taken as a two-dimensional contingency table: the entries of \code{x} must be non-negative integers. Otherwise, \code{x} and \code{y} must be vectors or factors of the same length; cases with missing values are removed, the objects are coerced to factors, and the contingency table is computed from these. Then Pearson's chi-squared test is performed of the null hypothesis that the joint distribution of the cell counts in a 2-dimensional contingency table is the product of the row and column marginals. - If \code{simulate.p.value} is \code{FALSE}, the p-value is computed from the asymptotic chi-squared distribution of the test statistic. Otherwise the p-value is computed for a Monte Carlo test (Hope, 1968) with \code{B} replicates. + The p-value is computed from the asymptotic chi-squared distribution of the test statistic. In the contingency table case simulation is done by random sampling from the set of all contingency tables with given marginals, and works only if the marginals are strictly positive. Note that this is not the usual sampling situation assumed for a chi-squared test (like the \emph{G}-test) but rather that for Fisher's exact test. diff --git a/man/resistance.Rd b/man/resistance.Rd new file mode 100644 index 00000000..2dd77648 --- /dev/null +++ b/man/resistance.Rd @@ -0,0 +1,108 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/resistance.R +\name{resistance} +\alias{resistance} +\alias{susceptibility} +\alias{n_rsi} +\alias{rsi} +\title{Calculate resistance of isolates} +\usage{ +resistance(ab, include_I = TRUE, minimum = 30, as_percent = FALSE) + +susceptibility(ab1, ab2 = NULL, include_I = FALSE, minimum = 30, + as_percent = FALSE) + +n_rsi(ab1, ab2 = NULL) + +rsi(ab1, ab2 = NULL, interpretation = "IR", minimum = 30, + as_percent = FALSE) +} +\arguments{ +\item{ab, ab1, ab2}{vector of antibiotic interpretations, they will be transformed internally with \code{\link{as.rsi}}} + +\item{include_I}{logical to indicate whether antimicrobial interpretations of "I" should be included} + +\item{minimum}{minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA}.} + +\item{as_percent}{logical to indicate whether the output must be returned as percent (text), will else be a double} + +\item{interpretation}{antimicrobial interpretation} +} +\value{ +Double or, when \code{as_percent = TRUE}, a character. +} +\description{ +These functions can be used to calculate the (co-)resistance of microbial isolates (i.e. percentage S, SI, I, IR or R). All functions can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}. +} +\details{ +\strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set. + +All return values are calculated using hybrid evaluation (i.e. using C++), which makes these functions 60-65 times faster than in \code{AMR} v0.2.0 and below. The \code{rsi} function is available for backwards compatibility and deprecated. It now uses the \code{resistance} and \code{susceptibility} functions internally, based on the \code{interpretation} parameter. +\if{html}{ + \cr + To calculate the probability (\emph{p}) of susceptibility of one antibiotic, we use this formula: + \out{
}\figure{mono_therapy.png}\out{
} + To calculate the probability (\emph{p}) of susceptibility of more antibiotics (i.e. combination therapy), we need to check whether one of them has a susceptible result (as numerator) and count all cases where all antibiotics were tested (as denominator). \cr + \cr + For two antibiotics: + \out{
}\figure{combi_therapy_2.png}\out{
} + \cr + Theoretically for three antibiotics: + \out{
}\figure{combi_therapy_3.png}\out{
} +} +} +\examples{ +library(dplyr) + +septic_patients \%>\% + group_by(hospital_id) \%>\% + summarise(p = susceptibility(cipr), + n = n_rsi(cipr)) # n_rsi works like n_distinct in dplyr + +septic_patients \%>\% + group_by(hospital_id) \%>\% + summarise(cipro_p = susceptibility(cipr, as_percent = TRUE), + cipro_n = n_rsi(cipr), + genta_p = susceptibility(gent, as_percent = TRUE), + genta_n = n_rsi(gent), + combination_p = susceptibility(cipr, gent, as_percent = TRUE), + combination_n = n_rsi(cipr, gent)) + + +# Calculate resistance +resistance(septic_patients$amox) +rsi(septic_patients$amox, interpretation = "IR") # deprecated + +# Or susceptibility +susceptibility(septic_patients$amox) +rsi(septic_patients$amox, interpretation = "S") # deprecated + + +# Calculate co-resistance between amoxicillin/clav acid and gentamicin, +# so we can see that combination therapy does a lot more than mono therapy: +susceptibility(septic_patients$amcl) # p = 67.8\% +n_rsi(septic_patients$amcl) # n = 1641 + +susceptibility(septic_patients$gent) # p = 69.1\% +n_rsi(septic_patients$gent) # n = 1863 + +with(septic_patients, + susceptibility(amcl, gent)) # p = 90.6\% +with(septic_patients, + n_rsi(amcl, gent)) # n = 1580 + +\dontrun{ +# calculate current empiric combination therapy of Helicobacter gastritis: +my_table \%>\% + filter(first_isolate == TRUE, + genus == "Helicobacter") \%>\% + summarise(p = susceptibility(amox, metr), # amoxicillin with metronidazole + n = n_rsi(amox, metr)) +} +} +\keyword{antibiotics} +\keyword{isolate} +\keyword{isolates} +\keyword{resistance} +\keyword{rsi_df} +\keyword{susceptibility} diff --git a/man/rsi_predict.Rd b/man/resistance_predict.Rd old mode 100755 new mode 100644 similarity index 88% rename from man/rsi_predict.Rd rename to man/resistance_predict.Rd index b48f9790..064d4c05 --- a/man/rsi_predict.Rd +++ b/man/resistance_predict.Rd @@ -1,9 +1,15 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/rsi_analysis.R -\name{rsi_predict} +% Please edit documentation in R/resistance.R +\name{resistance_predict} +\alias{resistance_predict} \alias{rsi_predict} \title{Predict antimicrobial resistance} \usage{ +resistance_predict(tbl, col_ab, col_date, + year_max = as.integer(format(as.Date(Sys.Date()), "\%Y")) + 15, + year_every = 1, model = "binomial", I_as_R = TRUE, + preserve_measurements = TRUE, info = TRUE) + rsi_predict(tbl, col_ab, col_date, year_max = as.integer(format(as.Date(Sys.Date()), "\%Y")) + 15, year_every = 1, model = "binomial", I_as_R = TRUE, @@ -74,5 +80,5 @@ septic_patients \%>\% } \seealso{ -\code{\link{lm}} \cr \code{\link{glm}} +\code{\link{resistance}} \cr \code{\link{lm}} \code{\link{glm}} } diff --git a/man/rsi.Rd b/man/rsi.Rd deleted file mode 100755 index 6815e651..00000000 --- a/man/rsi.Rd +++ /dev/null @@ -1,96 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/rsi_analysis.R -\name{rsi} -\alias{rsi} -\alias{rsi_df} -\alias{n_rsi} -\title{Resistance of isolates} -\usage{ -rsi(ab1, ab2 = NA, interpretation = "IR", minimum = 30, - as_percent = FALSE, info = FALSE, warning = TRUE) - -rsi_df(tbl, ab, interpretation = "IR", minimum = 30, as_percent = FALSE, - info = TRUE, warning = TRUE) - -n_rsi(ab1, ab2 = NA) -} -\arguments{ -\item{ab1, ab2}{vector of antibiotic interpretations, they will be transformed internally with \code{\link{as.rsi}}} - -\item{interpretation}{antimicrobial interpretation of which the portion must be calculated. Valid values are \code{"S"}, \code{"SI"}, \code{"I"}, \code{"IR"} or \code{"R"}.} - -\item{minimum}{minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning (when \code{warning = TRUE}).} - -\item{as_percent}{return output as percent (text), will else (at default) be a double} - -\item{info}{calculate the amount of available isolates and print it, like \code{n = 423}} - -\item{warning}{show a warning when the available amount of isolates is below \code{minimum}} - -\item{tbl}{\code{data.frame} containing columns with antibiotic interpretations.} - -\item{ab}{character vector with 1, 2 or 3 antibiotics that occur as column names in \code{tbl}, like \code{ab = c("amox", "amcl")}} -} -\value{ -Double or, when \code{as_percent = TRUE}, a character. -} -\description{ -This functions can be used to calculate the (co-)resistance of isolates (i.e. percentage S, SI, I, IR or R [of a vector] of isolates). The functions \code{rsi} and \code{n_rsi} can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}. -} -\details{ -Remember that you should filter your table to let it contain \strong{only first isolates}! -\if{html}{ - \cr \cr - To calculate the probability (\emph{p}) of susceptibility of one antibiotic, we use this formula: - \out{
}\figure{mono_therapy.png}\out{
} - To calculate the probability (\emph{p}) of susceptibility of more antibiotics (i.e. combination therapy), we need to check whether one of them has a susceptible result (as numerator) and count all cases where all antibiotics were tested (as denominator). \cr - For two antibiotics: - \out{
}\figure{combi_therapy_2.png}\out{
} - \cr - For three antibiotics: - \out{
}\figure{combi_therapy_3.png}\out{
} -} -} -\examples{ -library(dplyr) - -septic_patients \%>\% - group_by(hospital_id) \%>\% - summarise(cipro_susceptibility = rsi(cipr, interpretation = "S"), - n = n_rsi(cipr)) # n_rsi works like n_distinct in dplyr - -septic_patients \%>\% - group_by(hospital_id) \%>\% - summarise(cipro_S = rsi(cipr, interpretation = "S", - as_percent = TRUE, warning = FALSE), - cipro_n = n_rsi(cipr), - genta_S = rsi(gent, interpretation = "S", - as_percent = TRUE, warning = FALSE), - genta_n = n_rsi(gent), - combination_S = rsi(cipr, gent, interpretation = "S", - as_percent = TRUE, warning = FALSE), - combination_n = n_rsi(cipr, gent)) - -# calculate resistance -rsi(septic_patients$amox) -# or susceptibility -rsi(septic_patients$amox, interpretation = "S") - -# calculate co-resistance between amoxicillin/clav acid and gentamicin, -# so we can review that combination therapy does a lot more than mono therapy: -septic_patients \%>\% rsi_df(ab = "amcl", interpretation = "S") # = 67.8\% -septic_patients \%>\% rsi_df(ab = "gent", interpretation = "S") # = 69.1\% -septic_patients \%>\% rsi_df(ab = c("amcl", "gent"), interpretation = "S") # = 90.6\% - -\dontrun{ -# calculate current empiric combination therapy of Helicobacter gastritis: -my_table \%>\% - filter(first_isolate == TRUE, - genus == "Helicobacter") \%>\% - rsi_df(ab = c("amox", "metr")) # amoxicillin with metronidazole -} -} -\keyword{antibiotics} -\keyword{isolate} -\keyword{isolates} -\keyword{rsi} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp new file mode 100644 index 00000000..f3ff4a2f --- /dev/null +++ b/src/RcppExports.cpp @@ -0,0 +1,54 @@ +// Generated by using Rcpp::compileAttributes() -> do not edit by hand +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#include + +using namespace Rcpp; + +// rsi_calc_S +int rsi_calc_S(std::vector x, bool include_I); +RcppExport SEXP _AMR_rsi_calc_S(SEXP xSEXP, SEXP include_ISEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< std::vector >::type x(xSEXP); + Rcpp::traits::input_parameter< bool >::type include_I(include_ISEXP); + rcpp_result_gen = Rcpp::wrap(rsi_calc_S(x, include_I)); + return rcpp_result_gen; +END_RCPP +} +// rsi_calc_R +int rsi_calc_R(std::vector x, bool include_I); +RcppExport SEXP _AMR_rsi_calc_R(SEXP xSEXP, SEXP include_ISEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< std::vector >::type x(xSEXP); + Rcpp::traits::input_parameter< bool >::type include_I(include_ISEXP); + rcpp_result_gen = Rcpp::wrap(rsi_calc_R(x, include_I)); + return rcpp_result_gen; +END_RCPP +} +// rsi_calc_total +int rsi_calc_total(std::vector x); +RcppExport SEXP _AMR_rsi_calc_total(SEXP xSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< std::vector >::type x(xSEXP); + rcpp_result_gen = Rcpp::wrap(rsi_calc_total(x)); + return rcpp_result_gen; +END_RCPP +} + +static const R_CallMethodDef CallEntries[] = { + {"_AMR_rsi_calc_S", (DL_FUNC) &_AMR_rsi_calc_S, 2}, + {"_AMR_rsi_calc_R", (DL_FUNC) &_AMR_rsi_calc_R, 2}, + {"_AMR_rsi_calc_total", (DL_FUNC) &_AMR_rsi_calc_total, 1}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_AMR(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} diff --git a/src/rsi_calc.cpp b/src/rsi_calc.cpp new file mode 100644 index 00000000..9b3f8e0d --- /dev/null +++ b/src/rsi_calc.cpp @@ -0,0 +1,29 @@ +#include +#include // for std::vector +#include // for std::less, etc +#include // for count_if + +using namespace Rcpp ; + +// [[Rcpp::export]] +int rsi_calc_S(std::vector x, bool include_I) { + if (include_I == TRUE) { + return count_if(x.begin(), x.end(), bind2nd(std::less_equal(), 2)); + } else { + return count_if(x.begin(), x.end(), bind2nd(std::less(), 2)); + } +} + +// [[Rcpp::export]] +int rsi_calc_R(std::vector x, bool include_I) { + if (include_I == TRUE) { + return count_if(x.begin(), x.end(), bind2nd(std::greater_equal(), 2)); + } else { + return count_if(x.begin(), x.end(), bind2nd(std::greater(), 2)); + } +} + +// [[Rcpp::export]] +int rsi_calc_total(std::vector x) { + return count_if(x.begin(), x.end(), bind2nd(std::less_equal(), 3)); +} diff --git a/tests/testthat/test-rsi_analysis.R b/tests/testthat/test-resistance.R similarity index 61% rename from tests/testthat/test-rsi_analysis.R rename to tests/testthat/test-resistance.R index 215dfccb..d6862d76 100755 --- a/tests/testthat/test-rsi_analysis.R +++ b/tests/testthat/test-resistance.R @@ -1,46 +1,29 @@ -context("rsi_analysis.R") +context("resistance.R") + +test_that("resistance works", { + # amox resistance in `septic_patients` should be around 57.56% + expect_equal(resistance(septic_patients$amox), 0.5756, tolerance = 0.0001) + expect_equal(susceptibility(septic_patients$amox), 1 - 0.5756, tolerance = 0.0001) -test_that("rsi works", { - # amox resistance in `septic_patients` should be around 53.86% - expect_equal(rsi(septic_patients$amox), 0.5756, tolerance = 0.0001) - expect_equal(rsi(septic_patients$amox), 0.5756, tolerance = 0.0001) - expect_equal(rsi_df(septic_patients, - ab = "amox", - info = FALSE), - 0.5756, - tolerance = 0.0001) # pita+genta susceptibility around 98.09% - expect_equal(rsi(septic_patients$pita, - septic_patients$gent, - interpretation = "S", - info = TRUE), + expect_equal(susceptibility(septic_patients$pita, + septic_patients$gent), 0.9809, tolerance = 0.0001) - expect_equal(rsi_df(septic_patients, - ab = c("pita", "gent"), - interpretation = "S", - info = FALSE), + expect_equal(suppressWarnings(rsi(septic_patients$pita, + septic_patients$gent, + interpretation = "S")), 0.9809, tolerance = 0.0001) - # mero+pita+genta susceptibility around 98.58% - expect_equal(rsi_df(septic_patients, - ab = c("mero", "pita", "gent"), - interpretation = "IS", - info = FALSE), - 0.9858, - tolerance = 0.0001) # count of cases expect_equal(septic_patients %>% group_by(hospital_id) %>% - summarise(cipro_S = rsi(cipr, interpretation = "S", - as_percent = TRUE, warning = FALSE), + summarise(cipro_p = susceptibility(cipr, as_percent = TRUE), cipro_n = n_rsi(cipr), - genta_S = rsi(gent, interpretation = "S", - as_percent = TRUE, warning = FALSE), + genta_p = susceptibility(gent, as_percent = TRUE), genta_n = n_rsi(gent), - combination_S = rsi(cipr, gent, interpretation = "S", - as_percent = TRUE, warning = FALSE), + combination_p = susceptibility(cipr, gent, as_percent = TRUE), combination_n = n_rsi(cipr, gent)) %>% pull(combination_n), c(138, 474, 170, 464, 183))