mirror of
https://github.com/msberends/AMR.git
synced 2025-01-13 12:51:38 +01:00
speed improvements
This commit is contained in:
parent
715a7630ca
commit
a5a4354651
@ -1,6 +1,6 @@
|
||||
Package: AMR
|
||||
Version: 0.2.0.9012
|
||||
Date: 2018-07-16
|
||||
Date: 2018-07-17
|
||||
Title: Antimicrobial Resistance Analysis
|
||||
Authors@R: c(
|
||||
person(
|
||||
|
@ -94,7 +94,6 @@ exportMethods(skewness.matrix)
|
||||
exportMethods(summary.mic)
|
||||
exportMethods(summary.rsi)
|
||||
importFrom(Rcpp,evalCpp)
|
||||
importFrom(broom,tidy)
|
||||
importFrom(clipr,read_clip_tbl)
|
||||
importFrom(clipr,write_clip)
|
||||
importFrom(curl,nslookup)
|
||||
@ -141,7 +140,6 @@ importFrom(rvest,html_table)
|
||||
importFrom(stats,complete.cases)
|
||||
importFrom(stats,fivenum)
|
||||
importFrom(stats,mad)
|
||||
importFrom(stats,na.omit)
|
||||
importFrom(stats,pchisq)
|
||||
importFrom(stats,sd)
|
||||
importFrom(tibble,tibble)
|
||||
|
2
NEWS.md
2
NEWS.md
@ -19,7 +19,7 @@ ratio(c(772, 1611, 737), ratio = "1:2:1")
|
||||
* Function `top_freq` function to return the top/below *n* items as vector
|
||||
* Header of frequency tables now also show Mean Absolute Deviaton (MAD) and Interquartile Range (IQR)
|
||||
* Possibility to globally set the default for the amount of items to print, with `options(max.print.freq = n)` where *n* is your preset value
|
||||
* Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS
|
||||
* Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS. These functions use the `clipr` package, but are a little altered to also support headless Linux servers (so you can use it in RStudio Server).
|
||||
|
||||
#### Changed
|
||||
* Pretty printing for tibbles removed as it is not really the scope of this package
|
||||
|
@ -9,7 +9,3 @@ rsi_calc_R <- function(x, include_I) {
|
||||
.Call(`_AMR_rsi_calc_R`, x, include_I)
|
||||
}
|
||||
|
||||
rsi_calc_total <- function(x) {
|
||||
.Call(`_AMR_rsi_calc_total`, x)
|
||||
}
|
||||
|
||||
|
@ -72,7 +72,12 @@ clipboard_import <- function(sep = '\t',
|
||||
encoding = "UTF-8",
|
||||
info = TRUE) {
|
||||
|
||||
# this will fail when clipr is not available
|
||||
if (!clipr::clipr_available() & Sys.info()['sysname'] == "Linux") {
|
||||
# try to support on X11, by setting the R variable DISPLAY
|
||||
Sys.setenv(DISPLAY = "localhost:10.0")
|
||||
}
|
||||
|
||||
# this will fail when clipr is (still) not available
|
||||
import_tbl <- clipr::read_clip_tbl(file = file,
|
||||
sep = sep,
|
||||
header = header,
|
||||
@ -134,6 +139,11 @@ clipboard_export <- function(x,
|
||||
header = TRUE,
|
||||
info = TRUE) {
|
||||
|
||||
if (!clipr::clipr_available() & Sys.info()['sysname'] == "Linux") {
|
||||
# try to support on X11, by setting the R variable DISPLAY
|
||||
Sys.setenv(DISPLAY = "localhost:10.0")
|
||||
}
|
||||
|
||||
clipr::write_clip(content = x,
|
||||
na = na,
|
||||
sep = sep,
|
||||
|
@ -136,10 +136,11 @@ resistance <- function(ab,
|
||||
|
||||
if (!is.rsi(ab)) {
|
||||
x <- as.rsi(ab)
|
||||
warning("Increase speed by transforming to class `rsi` on beforehand: df %>% mutate_at(vars(col10:col20), as.rsi)")
|
||||
} else {
|
||||
x <- ab
|
||||
}
|
||||
total <- .Call(`_AMR_rsi_calc_total`, x)
|
||||
total <- length(x) - sum(is.na(x)) # faster than C++
|
||||
if (total < minimum) {
|
||||
return(NA)
|
||||
}
|
||||
@ -173,8 +174,10 @@ susceptibility <- function(ab1,
|
||||
stop('`as_percent` must be logical', call. = FALSE)
|
||||
}
|
||||
|
||||
print_warning <- FALSE
|
||||
if (!is.rsi(ab1)) {
|
||||
ab1 <- as.rsi(ab1)
|
||||
print_warning <- TRUE
|
||||
}
|
||||
if (!is.null(ab2)) {
|
||||
if (NCOL(ab2) > 1) {
|
||||
@ -182,6 +185,7 @@ susceptibility <- function(ab1,
|
||||
}
|
||||
if (!is.rsi(ab2)) {
|
||||
ab2 <- as.rsi(ab2)
|
||||
print_warning <- TRUE
|
||||
}
|
||||
x <- apply(X = data.frame(ab1 = as.integer(ab1),
|
||||
ab2 = as.integer(ab2)),
|
||||
@ -190,12 +194,16 @@ susceptibility <- function(ab1,
|
||||
} else {
|
||||
x <- ab1
|
||||
}
|
||||
total <- .Call(`_AMR_rsi_calc_total`, x)
|
||||
total <- length(x) - sum(is.na(x))
|
||||
if (total < minimum) {
|
||||
return(NA)
|
||||
}
|
||||
found <- .Call(`_AMR_rsi_calc_S`, x, include_I)
|
||||
|
||||
if (print_warning == TRUE) {
|
||||
warning("Increase speed by transforming to class `rsi` on beforehand: df %>% mutate_at(vars(col10:col20), as.rsi)")
|
||||
}
|
||||
|
||||
if (as_percent == TRUE) {
|
||||
percent(found / total, force_zero = TRUE)
|
||||
} else {
|
||||
@ -219,14 +227,10 @@ n_rsi <- function(ab1, ab2 = NULL) {
|
||||
if (!is.rsi(ab2)) {
|
||||
ab2 <- as.rsi(ab2)
|
||||
}
|
||||
x <- apply(X = data.frame(ab1 = as.integer(ab1),
|
||||
ab2 = as.integer(ab2)),
|
||||
MARGIN = 1,
|
||||
FUN = min)
|
||||
sum(!is.na(ab1) & !is.na(ab2))
|
||||
} else {
|
||||
x <- ab1
|
||||
sum(!is.na(ab1))
|
||||
}
|
||||
.Call(`_AMR_rsi_calc_total`, x)
|
||||
}
|
||||
|
||||
#' @rdname resistance
|
||||
@ -370,24 +374,8 @@ rsi_df <- function(tbl,
|
||||
all_vars(. %in% c("S", "R", "I"))) %>%
|
||||
nrow()
|
||||
|
||||
} else if (length(ab) == 3) {
|
||||
if (interpretations_to_check != 'S') {
|
||||
warning('`interpretation` not set to S or I/S, albeit analysing a combination therapy.', call. = FALSE)
|
||||
}
|
||||
numerator <- tbl %>%
|
||||
filter_at(vars(ab[1], ab[2], ab[3]),
|
||||
any_vars(. == interpretations_to_check)) %>%
|
||||
filter_at(vars(ab[1], ab[2], ab[3]),
|
||||
all_vars(. %in% c("S", "R", "I"))) %>%
|
||||
nrow()
|
||||
|
||||
denominator <- tbl %>%
|
||||
filter_at(vars(ab[1], ab[2], ab[3]),
|
||||
all_vars(. %in% c("S", "R", "I"))) %>%
|
||||
nrow()
|
||||
|
||||
} else {
|
||||
stop('Maximum of 3 drugs allowed.')
|
||||
stop('Maximum of 2 drugs allowed.')
|
||||
}
|
||||
|
||||
# build text part
|
||||
|
123
R/trends.R
123
R/trends.R
@ -1,123 +0,0 @@
|
||||
#' Detect trends using Machine Learning
|
||||
#'
|
||||
#' Test text
|
||||
#' @param data a \code{data.frame}
|
||||
#' @param threshold_unique do not analyse more unique \code{threshold_unique} items per variable
|
||||
#' @param na.rm a logical value indicating whether \code{NA} values should be stripped before the computation proceeds.
|
||||
#' @param info print relevant combinations to console
|
||||
#' @return A \code{list} with class \code{"trends"}
|
||||
#' @importFrom stats na.omit
|
||||
#' @importFrom broom tidy
|
||||
# @export
|
||||
trends <- function(data, threshold_unique = 30, na.rm = TRUE, info = TRUE) {
|
||||
|
||||
cols <- colnames(data)
|
||||
relevant <- list()
|
||||
count <- 0
|
||||
for (x in 1:length(cols)) {
|
||||
for (y in 1:length(cols)) {
|
||||
if (x == y) {
|
||||
next
|
||||
}
|
||||
if (n_distinct(data[, x]) > threshold_unique | n_distinct(data[, y]) > threshold_unique) {
|
||||
next
|
||||
}
|
||||
count <- count + 1
|
||||
df <- data %>%
|
||||
group_by_at(c(cols[x], cols[y])) %>%
|
||||
summarise(n = n())
|
||||
n <- df %>% pull(n)
|
||||
# linear regression model
|
||||
lin <- stats::lm(1:length(n) ~ n, na.action = ifelse(na.rm == TRUE, na.omit, NULL))
|
||||
|
||||
res <- list(
|
||||
df = df,
|
||||
x = cols[x],
|
||||
y = cols[y],
|
||||
m = base::mean(n, na.rm = na.rm),
|
||||
sd = stats::sd(n, na.rm = na.rm),
|
||||
cv = cv(n, na.rm = na.rm),
|
||||
cqv = cqv(n, na.rm = na.rm),
|
||||
kurtosis = kurtosis(n, na.rm = na.rm),
|
||||
skewness = skewness(n, na.rm = na.rm),
|
||||
lin.p = broom::tidy(lin)[2, 'p.value']
|
||||
#binom.p <- broom::tidy(binom)[2, 'p.value']
|
||||
)
|
||||
|
||||
include <- TRUE
|
||||
# ML part
|
||||
if (res$cv > 0.25) {
|
||||
res$reason <- "cv > 0.25"
|
||||
} else if (res$cqv > 0.75) {
|
||||
res$reason <- "cqv > 0.75"
|
||||
} else {
|
||||
include <- FALSE
|
||||
}
|
||||
|
||||
if (include == TRUE) {
|
||||
relevant <- c(relevant, list(res))
|
||||
if (info == TRUE) {
|
||||
# minus one because the whole data will be added later
|
||||
cat(paste0("[", length(relevant), "]"), "Relevant:", cols[x], "vs.", cols[y], "\n")
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
cat("Total of", count, "combinations analysed;", length(relevant), "seem relevant.\n")
|
||||
class(relevant) <- 'trends'
|
||||
relevant <- c(relevant, list(data = data))
|
||||
relevant
|
||||
|
||||
}
|
||||
|
||||
# @exportMethod print.trends
|
||||
# @export
|
||||
#' @noRd
|
||||
print.trends <- function(x, ...) {
|
||||
cat(length(x) - 1, "relevant trends, out of", length(x$data)^2, "\n")
|
||||
}
|
||||
|
||||
# @exportMethod plot.trends
|
||||
# @export
|
||||
#' @noRd
|
||||
# plot.trends <- function(x, n = NULL, ...) {
|
||||
# if (is.null(n)) {
|
||||
# oask <- devAskNewPage(TRUE)
|
||||
# on.exit(devAskNewPage(oask))
|
||||
# n <- c(1:(length(x) - 1))
|
||||
# } else {
|
||||
# if (n > length(x) - 1) {
|
||||
# stop('trend unavailable, max is ', length(x) - 1, call. = FALSE)
|
||||
# }
|
||||
# oask <- NULL
|
||||
# }
|
||||
# for (i in n) {
|
||||
# data <- x[[i]]$df
|
||||
# if (as.character(i) %like% '1$') {
|
||||
# suffix <- "st"
|
||||
# } else if (as.character(i) %like% '2$') {
|
||||
# suffix <- "nd"
|
||||
# } else if (as.character(i) %like% '3$') {
|
||||
# suffix <- "rd"
|
||||
# } else {
|
||||
# suffix <- "th"
|
||||
# }
|
||||
# if (!is.null(oask)) {
|
||||
# cat(paste("Coming up:", colnames(data)[1], "vs.", colnames(data)[2]), "\n")
|
||||
# }
|
||||
# print(
|
||||
# ggplot(
|
||||
# data,
|
||||
# aes_string(x = colnames(data)[1],
|
||||
# y = colnames(data)[3],
|
||||
# group = colnames(data)[2],
|
||||
# fill = colnames(data)[2])) +
|
||||
# geom_col(position = "dodge") +
|
||||
# theme_minimal() +
|
||||
# labs(title = paste(colnames(data)[1], "vs.", colnames(data)[2]),
|
||||
# subtitle = paste0(i, suffix, " trend"))
|
||||
# )
|
||||
# }
|
||||
# }
|
@ -1,23 +0,0 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/trends.R
|
||||
\name{trends}
|
||||
\alias{trends}
|
||||
\title{Detect trends using Machine Learning}
|
||||
\usage{
|
||||
trends(data, threshold_unique = 30, na.rm = TRUE, info = TRUE)
|
||||
}
|
||||
\arguments{
|
||||
\item{data}{a \code{data.frame}}
|
||||
|
||||
\item{threshold_unique}{do not analyse more unique \code{threshold_unique} items per variable}
|
||||
|
||||
\item{na.rm}{a logical value indicating whether \code{NA} values should be stripped before the computation proceeds.}
|
||||
|
||||
\item{info}{print relevant combinations to console}
|
||||
}
|
||||
\value{
|
||||
A \code{list} with class \code{"trends"}
|
||||
}
|
||||
\description{
|
||||
Test text
|
||||
}
|
@ -29,22 +29,10 @@ BEGIN_RCPP
|
||||
return rcpp_result_gen;
|
||||
END_RCPP
|
||||
}
|
||||
// rsi_calc_total
|
||||
int rsi_calc_total(DoubleVector x);
|
||||
RcppExport SEXP _AMR_rsi_calc_total(SEXP xSEXP) {
|
||||
BEGIN_RCPP
|
||||
Rcpp::RObject rcpp_result_gen;
|
||||
Rcpp::RNGScope rcpp_rngScope_gen;
|
||||
Rcpp::traits::input_parameter< DoubleVector >::type x(xSEXP);
|
||||
rcpp_result_gen = Rcpp::wrap(rsi_calc_total(x));
|
||||
return rcpp_result_gen;
|
||||
END_RCPP
|
||||
}
|
||||
|
||||
static const R_CallMethodDef CallEntries[] = {
|
||||
{"_AMR_rsi_calc_S", (DL_FUNC) &_AMR_rsi_calc_S, 2},
|
||||
{"_AMR_rsi_calc_R", (DL_FUNC) &_AMR_rsi_calc_R, 2},
|
||||
{"_AMR_rsi_calc_total", (DL_FUNC) &_AMR_rsi_calc_total, 1},
|
||||
{NULL, NULL, 0}
|
||||
};
|
||||
|
||||
|
@ -1,28 +1,21 @@
|
||||
#include <Rcpp.h>
|
||||
#include <functional> // for std::less, etc
|
||||
#include <algorithm> // for count_if
|
||||
// #include <functional> // for std::less_equal and std::greater_equal
|
||||
// #include <algorithm> // for count_if
|
||||
|
||||
using namespace Rcpp;
|
||||
|
||||
// [[Rcpp::export]]
|
||||
int rsi_calc_S(DoubleVector x, bool include_I) {
|
||||
if (include_I == TRUE) {
|
||||
return count_if(x.begin(), x.end(), bind2nd(std::less_equal<double>(), 2));
|
||||
} else {
|
||||
return count_if(x.begin(), x.end(), bind2nd(std::less<double>(), 2));
|
||||
}
|
||||
return count_if(x.begin(),
|
||||
x.end(),
|
||||
bind2nd(std::less_equal<double>(),
|
||||
1 + include_I));
|
||||
}
|
||||
|
||||
// [[Rcpp::export]]
|
||||
int rsi_calc_R(DoubleVector x, bool include_I) {
|
||||
if (include_I == TRUE) {
|
||||
return count_if(x.begin(), x.end(), bind2nd(std::greater_equal<double>(), 2));
|
||||
} else {
|
||||
return count_if(x.begin(), x.end(), bind2nd(std::greater<double>(), 2));
|
||||
}
|
||||
}
|
||||
|
||||
// [[Rcpp::export]]
|
||||
int rsi_calc_total(DoubleVector x) {
|
||||
return count_if(x.begin(), x.end(), bind2nd(std::less_equal<double>(), 3));
|
||||
return count_if(x.begin(),
|
||||
x.end(),
|
||||
bind2nd(std::greater_equal<double>(),
|
||||
3 - include_I));
|
||||
}
|
||||
|
@ -1,13 +1,19 @@
|
||||
context("clipboard.R")
|
||||
|
||||
test_that("clipboard works", {
|
||||
|
||||
if (!clipr::clipr_available() & Sys.info()['sysname'] == "Linux") {
|
||||
# try to support on X11, by setting the R variable DISPLAY
|
||||
Sys.setenv(DISPLAY = "localhost:10.0")
|
||||
}
|
||||
|
||||
skip_if_not(clipr::clipr_available())
|
||||
|
||||
clipboard_export(antibiotics)
|
||||
expect_identical(antibiotics,
|
||||
clipboard_import(date_format = "yyyy-mm-dd"))
|
||||
expect_identical(as.data.frame(antibiotics, stringsAsFactors = FALSE),
|
||||
clipboard_import())
|
||||
|
||||
clipboard_export(septic_patients[1:100,])
|
||||
expect_identical(tbl_parse_guess(septic_patients[1:100,]),
|
||||
clipboard_import(guess_col_types = TRUE))
|
||||
expect_identical(as.data.frame(tbl_parse_guess(septic_patients[1:100,]), stringsAsFactors = FALSE),
|
||||
clipboard_import(guess_col_types = TRUE, stringsAsFactors = FALSE))
|
||||
})
|
||||
|
@ -27,6 +27,12 @@ test_that("resistance works", {
|
||||
combination_n = n_rsi(cipr, gent)) %>%
|
||||
pull(combination_n),
|
||||
c(138, 474, 170, 464, 183))
|
||||
|
||||
expect_warning(resistance(as.character(septic_patients$amcl)))
|
||||
expect_warning(susceptibility(as.character(septic_patients$amcl)))
|
||||
expect_warning(susceptibility(as.character(septic_patients$amcl,
|
||||
septic_patients$gent)))
|
||||
|
||||
})
|
||||
|
||||
test_that("prediction of rsi works", {
|
||||
|
Loading…
Reference in New Issue
Block a user