speed improvements

2026-02-24 08:29:09 +01:00 · 2018-07-17 10:32:26 +02:00
parent 715a7630ca
commit a5a4354651
12 changed files with 52 additions and 213 deletions
--- a/2
+++ b/2
@@ -1,6 +1,6 @@
 Package: AMR
 Version: 0.2.0.9012
-Date: 2018-07-16
+Date: 2018-07-17
 Title: Antimicrobial Resistance Analysis
 Authors@R: c(
    person(
--- a/2
+++ b/2
@@ -94,7 +94,6 @@ exportMethods(skewness.matrix)
 exportMethods(summary.mic)
 exportMethods(summary.rsi)
 importFrom(Rcpp,evalCpp)
 importFrom(broom,tidy)
 importFrom(clipr,read_clip_tbl)
 importFrom(clipr,write_clip)
 importFrom(curl,nslookup)
@@ -141,7 +140,6 @@ importFrom(rvest,html_table)
 importFrom(stats,complete.cases)
 importFrom(stats,fivenum)
 importFrom(stats,mad)
 importFrom(stats,na.omit)
 importFrom(stats,pchisq)
 importFrom(stats,sd)
 importFrom(tibble,tibble)
--- a/NEWS.md
+++ b/NEWS.md
@@ -19,7 +19,7 @@ ratio(c(772, 1611, 737), ratio = "1:2:1")
  * Function `top_freq` function to return the top/below *n* items as vector
  * Header of frequency tables now also show Mean Absolute Deviaton (MAD) and Interquartile Range (IQR)
  * Possibility to globally set the default for the amount of items to print, with `options(max.print.freq = n)` where *n* is your preset value
-* Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS
+* Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS. These functions use the `clipr` package, but are a little altered to also support headless Linux servers (so you can use it in RStudio Server).
 #### Changed
 * Pretty printing for tibbles removed as it is not really the scope of this package
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -9,7 +9,3 @@ rsi_calc_R <- function(x, include_I) {
    .Call(`_AMR_rsi_calc_R`, x, include_I)
 }
 rsi_calc_total <- function(x) {
    .Call(`_AMR_rsi_calc_total`, x)
 }
--- a/R/clipboard.R
+++ b/R/clipboard.R
@@ -72,7 +72,12 @@ clipboard_import <- function(sep = '\t',
                             encoding = "UTF-8",
                             info = TRUE) {
-  # this will fail when clipr is not available
+  if (!clipr::clipr_available() & Sys.info()['sysname'] == "Linux") {
    # try to support on X11, by setting the R variable DISPLAY
    Sys.setenv(DISPLAY = "localhost:10.0")
  }
  # this will fail when clipr is (still) not available
  import_tbl <- clipr::read_clip_tbl(file = file,
                                     sep = sep,
                                     header = header,
@@ -134,6 +139,11 @@ clipboard_export <- function(x,
                             header = TRUE,
                             info = TRUE) {
  if (!clipr::clipr_available() & Sys.info()['sysname'] == "Linux") {
    # try to support on X11, by setting the R variable DISPLAY
    Sys.setenv(DISPLAY = "localhost:10.0")
  }
  clipr::write_clip(content = x,
                    na = na,
                    sep = sep,
--- a/R/resistance.R
+++ b/R/resistance.R
@@ -136,10 +136,11 @@ resistance <- function(ab,
  if (!is.rsi(ab)) {
    x <- as.rsi(ab)
    warning("Increase speed by transforming to class `rsi` on beforehand: df %>% mutate_at(vars(col10:col20), as.rsi)")
  } else {
    x <- ab
  }
-  total <-  .Call(`_AMR_rsi_calc_total`, x)
+  total <- length(x) - sum(is.na(x)) # faster than C++
  if (total < minimum) {
    return(NA)
  }
@@ -173,8 +174,10 @@ susceptibility <- function(ab1,
    stop('`as_percent` must be logical', call. = FALSE)
  }
  print_warning <- FALSE
  if (!is.rsi(ab1)) {
    ab1 <- as.rsi(ab1)
    print_warning <- TRUE
  }
  if (!is.null(ab2)) {
    if (NCOL(ab2) > 1) {
@@ -182,6 +185,7 @@ susceptibility <- function(ab1,
    }
    if (!is.rsi(ab2)) {
      ab2 <- as.rsi(ab2)
      print_warning <- TRUE
    }
    x <- apply(X = data.frame(ab1 = as.integer(ab1),
                              ab2 = as.integer(ab2)),
@@ -190,12 +194,16 @@ susceptibility <- function(ab1,
  } else {
    x <- ab1
  }
-  total <-  .Call(`_AMR_rsi_calc_total`, x)
+  total <- length(x) - sum(is.na(x))
  if (total < minimum) {
    return(NA)
  }
  found <- .Call(`_AMR_rsi_calc_S`, x, include_I)
  if (print_warning == TRUE) {
    warning("Increase speed by transforming to class `rsi` on beforehand: df %>% mutate_at(vars(col10:col20), as.rsi)")
  }
  if (as_percent == TRUE) {
    percent(found / total, force_zero = TRUE)
  } else {
@@ -219,14 +227,10 @@ n_rsi <- function(ab1, ab2 = NULL) {
    if (!is.rsi(ab2)) {
      ab2 <- as.rsi(ab2)
    }
-    x <- apply(X = data.frame(ab1 = as.integer(ab1),
+    sum(!is.na(ab1) & !is.na(ab2))
                              ab2 = as.integer(ab2)),
               MARGIN = 1,
               FUN = min)
  } else {
-    x <- ab1
+    sum(!is.na(ab1))
  }
  .Call(`_AMR_rsi_calc_total`, x)
 }
 #' @rdname resistance
@@ -370,24 +374,8 @@ rsi_df <- function(tbl,
                all_vars(. %in% c("S", "R", "I"))) %>%
      nrow()
  } else if (length(ab) == 3) {
    if (interpretations_to_check != 'S') {
      warning('`interpretation` not set to S or I/S, albeit analysing a combination therapy.', call. = FALSE)
    }
    numerator <- tbl %>%
      filter_at(vars(ab[1], ab[2], ab[3]),
                any_vars(. == interpretations_to_check)) %>%
      filter_at(vars(ab[1], ab[2], ab[3]),
                all_vars(. %in% c("S", "R", "I"))) %>%
      nrow()
    denominator <- tbl %>%
      filter_at(vars(ab[1], ab[2], ab[3]),
                all_vars(. %in% c("S", "R", "I"))) %>%
      nrow()
  } else {
-    stop('Maximum of 3 drugs allowed.')
+    stop('Maximum of 2 drugs allowed.')
  }
  # build text part
--- a/R/trends.R
+++ b/R/trends.R
@@ -1,123 +0,0 @@
 #' Detect trends using Machine Learning
 #'
 #' Test text
 #' @param data a \code{data.frame}
 #' @param threshold_unique do not analyse more unique \code{threshold_unique} items per variable
 #' @param na.rm a logical value indicating whether \code{NA} values should be stripped before the computation proceeds.
 #' @param info print relevant combinations to console
 #' @return A \code{list} with class \code{"trends"}
 #' @importFrom stats na.omit
 #' @importFrom broom tidy
 # @export
 trends <- function(data, threshold_unique = 30, na.rm = TRUE, info = TRUE) {
  cols <- colnames(data)
  relevant <- list()
  count <- 0
  for (x in 1:length(cols)) {
    for (y in 1:length(cols)) {
      if (x == y) {
        next
      }
      if (n_distinct(data[, x]) > threshold_unique | n_distinct(data[, y]) > threshold_unique) {
        next
      }
      count <- count + 1
      df <- data %>%
        group_by_at(c(cols[x], cols[y])) %>%
        summarise(n = n())
      n <- df %>% pull(n)
      # linear regression model
      lin <- stats::lm(1:length(n) ~ n, na.action = ifelse(na.rm == TRUE, na.omit, NULL))
      res <- list(
        df = df,
        x = cols[x],
        y = cols[y],
        m = base::mean(n, na.rm = na.rm),
        sd = stats::sd(n, na.rm = na.rm),
        cv = cv(n, na.rm = na.rm),
        cqv = cqv(n, na.rm = na.rm),
        kurtosis = kurtosis(n, na.rm = na.rm),
        skewness = skewness(n, na.rm = na.rm),
        lin.p = broom::tidy(lin)[2, 'p.value']
        #binom.p <- broom::tidy(binom)[2, 'p.value']
      )
      include <- TRUE
      # ML part
      if (res$cv > 0.25) {
        res$reason <- "cv > 0.25"
      } else if (res$cqv > 0.75) {
        res$reason <- "cqv > 0.75"
      } else {
        include <- FALSE
      }
      if (include == TRUE) {
        relevant <- c(relevant, list(res))
        if (info == TRUE) {
          # minus one because the whole data will be added later
          cat(paste0("[", length(relevant), "]"), "Relevant:", cols[x], "vs.", cols[y], "\n")
        }
      }
    }
  }
  cat("Total of", count, "combinations analysed;", length(relevant), "seem relevant.\n")
  class(relevant) <- 'trends'
  relevant <- c(relevant, list(data = data))
  relevant
 }
 # @exportMethod print.trends
 # @export
 #' @noRd
 print.trends <- function(x, ...) {
  cat(length(x) - 1, "relevant trends, out of", length(x$data)^2, "\n")
 }
 # @exportMethod plot.trends
 # @export
 #' @noRd
 # plot.trends <- function(x, n = NULL, ...) {
 #   if (is.null(n)) {
 #     oask <- devAskNewPage(TRUE)
 #     on.exit(devAskNewPage(oask))
 #     n <- c(1:(length(x) - 1))
 #   } else {
 #     if (n > length(x) - 1) {
 #       stop('trend unavailable, max is ', length(x) - 1, call. = FALSE)
 #     }
 #     oask <- NULL
 #   }
 #   for (i in n) {
 #     data <- x[[i]]$df
 #     if (as.character(i) %like% '1$') {
 #       suffix <- "st"
 #     } else if (as.character(i) %like% '2$') {
 #       suffix <- "nd"
 #     } else if (as.character(i) %like% '3$') {
 #       suffix <- "rd"
 #     } else {
 #       suffix <- "th"
 #     }
 #     if (!is.null(oask)) {
 #       cat(paste("Coming up:", colnames(data)[1], "vs.", colnames(data)[2]), "\n")
 #     }
 #     print(
 #       ggplot(
 #         data,
 #         aes_string(x = colnames(data)[1],
 #                    y = colnames(data)[3],
 #                    group = colnames(data)[2],
 #                    fill = colnames(data)[2])) +
 #         geom_col(position = "dodge") +
 #         theme_minimal() +
 #         labs(title = paste(colnames(data)[1], "vs.", colnames(data)[2]),
 #              subtitle = paste0(i, suffix, " trend"))
 #     )
 #   }
 # }
--- a/man/trends.Rd
+++ b/man/trends.Rd
@@ -1,23 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/trends.R
 \name{trends}
 \alias{trends}
 \title{Detect trends using Machine Learning}
 \usage{
 trends(data, threshold_unique = 30, na.rm = TRUE, info = TRUE)
 }
 \arguments{
 \item{data}{a \code{data.frame}}
 \item{threshold_unique}{do not analyse more unique \code{threshold_unique} items per variable}
 \item{na.rm}{a logical value indicating whether \code{NA} values should be stripped before the computation proceeds.}
 \item{info}{print relevant combinations to console}
 }
 \value{
 A \code{list} with class \code{"trends"}
 }
 \description{
 Test text
 }
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -29,22 +29,10 @@ BEGIN_RCPP
    return rcpp_result_gen;
 END_RCPP
 }
 // rsi_calc_total
 int rsi_calc_total(DoubleVector x);
 RcppExport SEXP _AMR_rsi_calc_total(SEXP xSEXP) {
 BEGIN_RCPP
    Rcpp::RObject rcpp_result_gen;
    Rcpp::RNGScope rcpp_rngScope_gen;
    Rcpp::traits::input_parameter< DoubleVector >::type x(xSEXP);
    rcpp_result_gen = Rcpp::wrap(rsi_calc_total(x));
    return rcpp_result_gen;
 END_RCPP
 }
 static const R_CallMethodDef CallEntries[] = {
    {"_AMR_rsi_calc_S", (DL_FUNC) &_AMR_rsi_calc_S, 2},
    {"_AMR_rsi_calc_R", (DL_FUNC) &_AMR_rsi_calc_R, 2},
    {"_AMR_rsi_calc_total", (DL_FUNC) &_AMR_rsi_calc_total, 1},
    {NULL, NULL, 0}
 };
--- a/src/rsi_calc.cpp
+++ b/src/rsi_calc.cpp
@@ -1,28 +1,21 @@
 #include <Rcpp.h>
-#include <functional>    // for std::less, etc
+// #include <functional> // for std::less_equal and std::greater_equal
-#include <algorithm>     // for count_if
+// #include <algorithm>  // for count_if
 using namespace Rcpp;
 // [[Rcpp::export]]
 int rsi_calc_S(DoubleVector x, bool include_I) {
-  if (include_I == TRUE) {
+  return count_if(x.begin(),
-    return count_if(x.begin(), x.end(), bind2nd(std::less_equal<double>(), 2));
+                  x.end(),
-  } else {
+                  bind2nd(std::less_equal<double>(),
-    return count_if(x.begin(), x.end(), bind2nd(std::less<double>(), 2));
+                          1 + include_I));
  }
 }
 // [[Rcpp::export]]
 int rsi_calc_R(DoubleVector x, bool include_I) {
-  if (include_I == TRUE) {
+  return count_if(x.begin(),
-    return count_if(x.begin(), x.end(), bind2nd(std::greater_equal<double>(), 2));
+                  x.end(),
-  } else {
+                  bind2nd(std::greater_equal<double>(),
-    return count_if(x.begin(), x.end(), bind2nd(std::greater<double>(), 2));
+                          3 - include_I));
  }
 }
 // [[Rcpp::export]]
 int rsi_calc_total(DoubleVector x) {
  return count_if(x.begin(), x.end(), bind2nd(std::less_equal<double>(), 3));
 }
--- a/tests/testthat/test-clipboard.R
+++ b/tests/testthat/test-clipboard.R
@@ -1,13 +1,19 @@
 context("clipboard.R")
 test_that("clipboard works", {
  if (!clipr::clipr_available() & Sys.info()['sysname'] == "Linux") {
    # try to support on X11, by setting the R variable DISPLAY
    Sys.setenv(DISPLAY = "localhost:10.0")
  }
  skip_if_not(clipr::clipr_available())
  clipboard_export(antibiotics)
-  expect_identical(antibiotics,
+  expect_identical(as.data.frame(antibiotics, stringsAsFactors = FALSE),
-                   clipboard_import(date_format = "yyyy-mm-dd"))
+                   clipboard_import())
  clipboard_export(septic_patients[1:100,])
-  expect_identical(tbl_parse_guess(septic_patients[1:100,]),
+  expect_identical(as.data.frame(tbl_parse_guess(septic_patients[1:100,]), stringsAsFactors = FALSE),
-                   clipboard_import(guess_col_types = TRUE))
+                   clipboard_import(guess_col_types = TRUE, stringsAsFactors = FALSE))
 })
--- a/tests/testthat/test-resistance.R
+++ b/tests/testthat/test-resistance.R
@@ -27,6 +27,12 @@ test_that("resistance works", {
                           combination_n = n_rsi(cipr, gent)) %>%
                 pull(combination_n),
               c(138, 474, 170, 464, 183))
  expect_warning(resistance(as.character(septic_patients$amcl)))
  expect_warning(susceptibility(as.character(septic_patients$amcl)))
  expect_warning(susceptibility(as.character(septic_patients$amcl,
                                             septic_patients$gent)))
 })
 test_that("prediction of rsi works", {