From 46cf423eab39b9cd5892496e19308f54879ca713 Mon Sep 17 00:00:00 2001 From: "Matthijs S. Berends" Date: Fri, 3 Jul 2020 10:51:55 +0200 Subject: [PATCH] (v1.2.0.9024) AMR calculation speed improvement --- DESCRIPTION | 4 ++-- NEWS.md | 9 ++++++--- R/rsi_calc.R | 24 +++++++++--------------- docs/404.html | 2 +- docs/LICENSE-text.html | 2 +- docs/articles/index.html | 2 +- docs/authors.html | 2 +- docs/index.html | 2 +- docs/news/index.html | 20 +++++++++++++------- docs/pkgdown.yml | 2 +- docs/reference/ab_from_text.html | 2 +- docs/reference/index.html | 2 +- 12 files changed, 38 insertions(+), 35 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ea60bc38..6a844944 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 1.2.0.9023 -Date: 2020-07-02 +Version: 1.2.0.9024 +Date: 2020-07-03 Title: Antimicrobial Resistance Analysis Authors@R: c( person(role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index 8546527a..c9d5a484 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,5 @@ -# AMR 1.2.0.9023 -## Last updated: 02-Jul-2020 +# AMR 1.2.0.9024 +## Last updated: 03-Jul-2020 ### New * Function `ab_from_text()` to retrieve antimicrobial drug names, doses and forms of administration from clinical texts in e.g. health care records, which also corrects for misspelling since it uses `as.ab()` internally @@ -19,7 +19,10 @@ * Added Monuril as trade name for fosfomycin ### Changed -* Using unexisting columns in all `count_*()`, `proportion_*()`, `susceptibility()` and `resistance()` functions wil now return an error instead of dropping them silently. Using variables for column names (as well as `dplyr::all_of()`) now works again. +* Improvements for `susceptibility()` and `resistance()` and all `count_*()`, `proportion_*()` functions: + * 95% speed improvement (!) by using other base R functions for calculation + * Using unexisting columns wil now return an error instead of dropping them silently + * Using variables for column names (as well as `dplyr::all_of()`) now works again * Improvements for `as.ab()`: * Dramatic improvement of the algorithm behind `as.ab()`, making many more input errors translatable, such as digitalised health care records, using too few or too many vowels or consonants and many more * Added progress bar diff --git a/R/rsi_calc.R b/R/rsi_calc.R index 16cd8cba..343b08c7 100755 --- a/R/rsi_calc.R +++ b/R/rsi_calc.R @@ -101,7 +101,7 @@ rsi_calc <- function(..., if (is.data.frame(x)) { rsi_integrity_check <- character(0) for (i in seq_len(ncol(x))) { - # check integrity of columns: force rsi class + # check integrity of columns: force class if (!is.rsi(x[, i, drop = TRUE])) { rsi_integrity_check <- c(rsi_integrity_check, as.character(x[, i, drop = TRUE])) x[, i] <- suppressWarnings(as.rsi(x[, i, drop = TRUE])) # warning will be given later @@ -113,22 +113,16 @@ rsi_calc <- function(..., rsi_integrity_check <- as.rsi(rsi_integrity_check) } + x_transposed <- as.list(as.data.frame(t(x))) if (only_all_tested == TRUE) { - # THE NUMBER OF ISOLATES WHERE *ALL* ABx ARE S/I/R - x <- apply(X = as.data.frame(lapply(x, as.integer), stringsAsFactors = FALSE), - MARGIN = 1, - FUN = base::min) - numerator <- sum(as.integer(x) %in% as.integer(ab_result), na.rm = TRUE) - denominator <- length(x) - sum(is.na(x)) - + # no NAs in any column + numerator <- sum(sapply(x_transposed, function(y) !any(is.na(y)) & any(y %in% ab_result, na.rm = TRUE))) + denominator <- sum(sapply(x_transposed, function(y) !(any(is.na(y))))) } else { - # THE NUMBER OF ISOLATES WHERE *ANY* ABx IS S/I/R + # may contain NAs in any column other_values <- base::setdiff(c(NA, levels(ab_result)), ab_result) - other_values_filter <- base::apply(x, 1, function(y) { - base::all(y %in% other_values) & base::any(is.na(y)) - }) - numerator <- sum(as.logical(by(x, seq_len(nrow(x)), function(row) any(unlist(row) %in% ab_result, na.rm = TRUE)))) - denominator <- nrow(x[!other_values_filter, , drop = FALSE]) + numerator <- sum(sapply(x_transposed, function(y) any(y %in% ab_result, na.rm = TRUE))) + denominator <- sum(sapply(x_transposed, function(y) !(all(y %in% other_values) & any(is.na(y))))) } } else { # x is not a data.frame @@ -153,7 +147,7 @@ rsi_calc <- function(..., if (data_vars != "") { data_vars <- paste(" for", data_vars) } - warning("Introducing NA: only ", denominator, " results available", data_vars, " (`minimum` was set to ", minimum, ").", call. = FALSE) + warning("Introducing NA: only ", denominator, " results available", data_vars, " (`minimum` = ", minimum, ").", call. = FALSE) fraction <- NA } else { fraction <- numerator / denominator diff --git a/docs/404.html b/docs/404.html index df2ee468..bd952060 100644 --- a/docs/404.html +++ b/docs/404.html @@ -81,7 +81,7 @@ AMR (for R) - 1.2.0.9023 + 1.2.0.9024 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 083cf74c..dcbcdb94 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -81,7 +81,7 @@ AMR (for R) - 1.2.0.9023 + 1.2.0.9024 diff --git a/docs/articles/index.html b/docs/articles/index.html index 136d3715..81a34fab 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.2.0.9023 + 1.2.0.9024 diff --git a/docs/authors.html b/docs/authors.html index c451f3a9..bd4f36bd 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -81,7 +81,7 @@ AMR (for R) - 1.2.0.9023 + 1.2.0.9024 diff --git a/docs/index.html b/docs/index.html index 2fe3fc71..296fbceb 100644 --- a/docs/index.html +++ b/docs/index.html @@ -43,7 +43,7 @@ AMR (for R) - 1.2.0.9023 + 1.2.0.9024 diff --git a/docs/news/index.html b/docs/news/index.html index f57035df..e8c386b5 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.2.0.9023 + 1.2.0.9024 @@ -229,13 +229,13 @@ Source: NEWS.md -
-

-AMR 1.2.0.9023 Unreleased +
+

+AMR 1.2.0.9024 Unreleased

-
+

-Last updated: 02-Jul-2020 +Last updated: 03-Jul-2020

@@ -262,7 +262,13 @@

Changed

    -
  • Using unexisting columns in all count_*(), proportion_*(), susceptibility() and resistance() functions wil now return an error instead of dropping them silently. Using variables for column names (as well as dplyr::all_of()) now works again.
  • +
  • Improvements for susceptibility() and resistance() and all count_*(), proportion_*() functions: +
      +
    • 95% speed improvement (!) by using other base R functions for calculation
    • +
    • Using unexisting columns wil now return an error instead of dropping them silently
    • +
    • Using variables for column names (as well as dplyr::all_of()) now works again
    • +
    +
  • Improvements for as.ab():
    • Dramatic improvement of the algorithm behind as.ab(), making many more input errors translatable, such as digitalised health care records, using too few or too many vowels or consonants and many more
    • diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index e688ebdc..51de5147 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -10,7 +10,7 @@ articles: WHONET: WHONET.html benchmarks: benchmarks.html resistance_predict: resistance_predict.html -last_built: 2020-07-02T19:12Z +last_built: 2020-07-03T08:50Z urls: reference: https://msberends.gitlab.io/AMR/reference article: https://msberends.gitlab.io/AMR/articles diff --git a/docs/reference/ab_from_text.html b/docs/reference/ab_from_text.html index d8b64430..b2d2fdc2 100644 --- a/docs/reference/ab_from_text.html +++ b/docs/reference/ab_from_text.html @@ -82,7 +82,7 @@ AMR (for R) - 1.2.0.9023 + 1.2.0.9024
diff --git a/docs/reference/index.html b/docs/reference/index.html index f12ee94f..0b3754f4 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.2.0.9023 + 1.2.0.9024