speed improvements

2025-08-24 16:22:10 +02:00 · 2018-07-17 10:32:26 +02:00
parent 715a7630ca
commit a5a4354651
12 changed files with 52 additions and 213 deletions
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -9,7 +9,3 @@ rsi_calc_R <- function(x, include_I) {
    .Call(`_AMR_rsi_calc_R`, x, include_I)
 }

-rsi_calc_total <- function(x) {
-    .Call(`_AMR_rsi_calc_total`, x)
-}
-
--- a/R/clipboard.R
+++ b/R/clipboard.R
@@ -72,7 +72,12 @@ clipboard_import <- function(sep = '\t',
                             encoding = "UTF-8",
                             info = TRUE) {

-  # this will fail when clipr is not available
+  if (!clipr::clipr_available() & Sys.info()['sysname'] == "Linux") {
+    # try to support on X11, by setting the R variable DISPLAY
+    Sys.setenv(DISPLAY = "localhost:10.0")
+  }
+
+  # this will fail when clipr is (still) not available
  import_tbl <- clipr::read_clip_tbl(file = file,
                                     sep = sep,
                                     header = header,
@@ -134,6 +139,11 @@ clipboard_export <- function(x,
                             header = TRUE,
                             info = TRUE) {

+  if (!clipr::clipr_available() & Sys.info()['sysname'] == "Linux") {
+    # try to support on X11, by setting the R variable DISPLAY
+    Sys.setenv(DISPLAY = "localhost:10.0")
+  }
+
  clipr::write_clip(content = x,
                    na = na,
                    sep = sep,
--- a/R/resistance.R
+++ b/R/resistance.R
@@ -136,10 +136,11 @@ resistance <- function(ab,

  if (!is.rsi(ab)) {
    x <- as.rsi(ab)
+    warning("Increase speed by transforming to class `rsi` on beforehand: df %>% mutate_at(vars(col10:col20), as.rsi)")
  } else {
    x <- ab
  }
-  total <-  .Call(`_AMR_rsi_calc_total`, x)
+  total <- length(x) - sum(is.na(x)) # faster than C++
  if (total < minimum) {
    return(NA)
  }
@@ -173,8 +174,10 @@ susceptibility <- function(ab1,
    stop('`as_percent` must be logical', call. = FALSE)
  }

+  print_warning <- FALSE
  if (!is.rsi(ab1)) {
    ab1 <- as.rsi(ab1)
+    print_warning <- TRUE
  }
  if (!is.null(ab2)) {
    if (NCOL(ab2) > 1) {
@@ -182,6 +185,7 @@ susceptibility <- function(ab1,
    }
    if (!is.rsi(ab2)) {
      ab2 <- as.rsi(ab2)
+      print_warning <- TRUE
    }
    x <- apply(X = data.frame(ab1 = as.integer(ab1),
                              ab2 = as.integer(ab2)),
@@ -190,12 +194,16 @@ susceptibility <- function(ab1,
  } else {
    x <- ab1
  }
-  total <-  .Call(`_AMR_rsi_calc_total`, x)
+  total <- length(x) - sum(is.na(x))
  if (total < minimum) {
    return(NA)
  }
  found <- .Call(`_AMR_rsi_calc_S`, x, include_I)

+  if (print_warning == TRUE) {
+    warning("Increase speed by transforming to class `rsi` on beforehand: df %>% mutate_at(vars(col10:col20), as.rsi)")
+  }
+
  if (as_percent == TRUE) {
    percent(found / total, force_zero = TRUE)
  } else {
@@ -219,14 +227,10 @@ n_rsi <- function(ab1, ab2 = NULL) {
    if (!is.rsi(ab2)) {
      ab2 <- as.rsi(ab2)
    }
-    x <- apply(X = data.frame(ab1 = as.integer(ab1),
-                              ab2 = as.integer(ab2)),
-               MARGIN = 1,
-               FUN = min)
+    sum(!is.na(ab1) & !is.na(ab2))
  } else {
-    x <- ab1
+    sum(!is.na(ab1))
  }
-  .Call(`_AMR_rsi_calc_total`, x)
 }

 #' @rdname resistance
@@ -370,24 +374,8 @@ rsi_df <- function(tbl,
                all_vars(. %in% c("S", "R", "I"))) %>%
      nrow()

-  } else if (length(ab) == 3) {
-    if (interpretations_to_check != 'S') {
-      warning('`interpretation` not set to S or I/S, albeit analysing a combination therapy.', call. = FALSE)
-    }
-    numerator <- tbl %>%
-      filter_at(vars(ab[1], ab[2], ab[3]),
-                any_vars(. == interpretations_to_check)) %>%
-      filter_at(vars(ab[1], ab[2], ab[3]),
-                all_vars(. %in% c("S", "R", "I"))) %>%
-      nrow()
-
-    denominator <- tbl %>%
-      filter_at(vars(ab[1], ab[2], ab[3]),
-                all_vars(. %in% c("S", "R", "I"))) %>%
-      nrow()
-
  } else {
-    stop('Maximum of 3 drugs allowed.')
+    stop('Maximum of 2 drugs allowed.')
  }

  # build text part
--- a/R/trends.R
+++ b/R/trends.R
@@ -1,123 +0,0 @@
-#' Detect trends using Machine Learning
-#'
-#' Test text
-#' @param data a \code{data.frame}
-#' @param threshold_unique do not analyse more unique \code{threshold_unique} items per variable
-#' @param na.rm a logical value indicating whether \code{NA} values should be stripped before the computation proceeds.
-#' @param info print relevant combinations to console
-#' @return A \code{list} with class \code{"trends"}
-#' @importFrom stats na.omit
-#' @importFrom broom tidy
-# @export
-trends <- function(data, threshold_unique = 30, na.rm = TRUE, info = TRUE) {
-
-  cols <- colnames(data)
-  relevant <- list()
-  count <- 0
-  for (x in 1:length(cols)) {
-    for (y in 1:length(cols)) {
-      if (x == y) {
-        next
-      }
-      if (n_distinct(data[, x]) > threshold_unique | n_distinct(data[, y]) > threshold_unique) {
-        next
-      }
-      count <- count + 1
-      df <- data %>%
-        group_by_at(c(cols[x], cols[y])) %>%
-        summarise(n = n())
-      n <- df %>% pull(n)
-      # linear regression model
-      lin <- stats::lm(1:length(n) ~ n, na.action = ifelse(na.rm == TRUE, na.omit, NULL))
-
-      res <- list(
-        df = df,
-        x = cols[x],
-        y = cols[y],
-        m = base::mean(n, na.rm = na.rm),
-        sd = stats::sd(n, na.rm = na.rm),
-        cv = cv(n, na.rm = na.rm),
-        cqv = cqv(n, na.rm = na.rm),
-        kurtosis = kurtosis(n, na.rm = na.rm),
-        skewness = skewness(n, na.rm = na.rm),
-        lin.p = broom::tidy(lin)[2, 'p.value']
-        #binom.p <- broom::tidy(binom)[2, 'p.value']
-      )
-
-      include <- TRUE
-      # ML part
-      if (res$cv > 0.25) {
-        res$reason <- "cv > 0.25"
-      } else if (res$cqv > 0.75) {
-        res$reason <- "cqv > 0.75"
-      } else {
-        include <- FALSE
-      }
-
-      if (include == TRUE) {
-        relevant <- c(relevant, list(res))
-        if (info == TRUE) {
-          # minus one because the whole data will be added later
-          cat(paste0("[", length(relevant), "]"), "Relevant:", cols[x], "vs.", cols[y], "\n")
-        }
-      }
-
-    }
-  }
-
-  cat("Total of", count, "combinations analysed;", length(relevant), "seem relevant.\n")
-  class(relevant) <- 'trends'
-  relevant <- c(relevant, list(data = data))
-  relevant
-
-}
-
-# @exportMethod print.trends
-# @export
-#' @noRd
-print.trends <- function(x, ...) {
-  cat(length(x) - 1, "relevant trends, out of", length(x$data)^2, "\n")
-}
-
-# @exportMethod plot.trends
-# @export
-#' @noRd
-# plot.trends <- function(x, n = NULL, ...) {
-#   if (is.null(n)) {
-#     oask <- devAskNewPage(TRUE)
-#     on.exit(devAskNewPage(oask))
-#     n <- c(1:(length(x) - 1))
-#   } else {
-#     if (n > length(x) - 1) {
-#       stop('trend unavailable, max is ', length(x) - 1, call. = FALSE)
-#     }
-#     oask <- NULL
-#   }
-#   for (i in n) {
-#     data <- x[[i]]$df
-#     if (as.character(i) %like% '1$') {
-#       suffix <- "st"
-#     } else if (as.character(i) %like% '2$') {
-#       suffix <- "nd"
-#     } else if (as.character(i) %like% '3$') {
-#       suffix <- "rd"
-#     } else {
-#       suffix <- "th"
-#     }
-#     if (!is.null(oask)) {
-#       cat(paste("Coming up:", colnames(data)[1], "vs.", colnames(data)[2]), "\n")
-#     }
-#     print(
-#       ggplot(
-#         data,
-#         aes_string(x = colnames(data)[1],
-#                    y = colnames(data)[3],
-#                    group = colnames(data)[2],
-#                    fill = colnames(data)[2])) +
-#         geom_col(position = "dodge") +
-#         theme_minimal() +
-#         labs(title = paste(colnames(data)[1], "vs.", colnames(data)[2]),
-#              subtitle = paste0(i, suffix, " trend"))
-#     )
-#   }
-# }