keyab fixes

2025-07-19 01:43:13 +02:00 · 2018-07-17 19:51:09 +02:00
parent 2a4d759fbc
commit 0d64c166f0
5 changed files with 42 additions and 39 deletions
--- a/NEWS.md
+++ b/NEWS.md
@ -1,18 +1,16 @@
 # 0.2.0.90xx (development version)
 #### New
-* **BREAKING**: `rsi_df` was removed in favour of new functions `resistance` and `susceptibility`. Now, all functions used to calculate resistance (`resistance` and `susceptibility`) or count isolates (`n_rsi`) use **hybrid evaluation**. This means calculations are not done in R directly but rather in C++ using the `Rcpp` package, making them 25 to 30 times faster. The function `rsi` still works, but is deprecated.
-* **BREAKING**: the methodology for determining first weighted isolates was changed. The antibiotics (call *key antibiotics*) that are compared between isolated to include more first isolates (called first *weighted* isolates) are now as follows:
+* **BREAKING**: `rsi_df` was removed in favour of new functions `resistance` and `susceptibility`. Now, all functions used to calculate resistance (`resistance` and `susceptibility`) use **hybrid evaluation**. This means calculations are not done in R directly but rather in C++ using the `Rcpp` package, making them 25 to 30 times faster. The function `rsi` still works, but is deprecated.
+* **BREAKING**: the methodology for determining first weighted isolates was changed. The antibiotics that are compared between isolates (call *key antibiotics*) to include more first isolates (afterwards called first *weighted* isolates) are now as follows:
  * Gram-positive: amoxicillin, amoxicillin/clavlanic acid, cefuroxime, piperacillin/tazobactam, ciprofloxacin, trimethoprim/sulfamethoxazole, vancomycin, teicoplanin, tetracycline, erythromycin, oxacillin, rifampicin
  * Gram-negative: amoxicillin, amoxicillin/clavlanic acid, cefuroxime, piperacillin/tazobactam, ciprofloxacin, trimethoprim/sulfamethoxazole, gentamicin, tobramycin, colistin, cefotaxime, ceftazidime, meropenem
-* Support for Addins menu in RStudio to quickly insert `%in%` or `%like%` (and give them keyboard shortcuts), or to view the datasets that come with this package
 * For convience, new descriptive statistical functions `kurtosis` and `skewness` that are lacking in base R - they are generic functions and have support for vectors, data.frames and matrices
-* Function `g.test` as added to perform the Χ<sup>2</sup> distributed [*G*-test](https://en.wikipedia.org/wiki/G-test), which use is the same as `chisq.test`
-* Function `ratio` was added to transform a vector of values to a preset ratio. For example:
-```r
-ratio(c(772, 1611, 737), ratio = "1:2:1")
-# [1]  780 1560  780
-```
-* Function `p.symbol` was added to transform p values to their related symbols: `0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1`
+* Function `g.test` to perform the Χ<sup>2</sup> distributed [*G*-test](https://en.wikipedia.org/wiki/G-test), which use is the same as `chisq.test`
+* Function `ratio` to transform a vector of values to a preset ratio
+  * For example: `ratio(c(10, 500, 10), ratio = "1:2:1")` would return `130, 260, 130`
+* Support for Addins menu in RStudio to quickly insert `%in%` or `%like%` (and give them keyboard shortcuts), or to view the datasets that come with this package
+* Function `p.symbol` to transform p values to their related symbols: `0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1`
+* Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS. These functions use the `clipr` package, but are a little altered to also support headless Linux servers (so you can use it in RStudio Server)
 * New for frequency tables (function `freq`):
  * A vignette to explain its usage
  * Support for `table` to use as input: `freq(table(x, y))`
@ -22,7 +20,6 @@ ratio(c(772, 1611, 737), ratio = "1:2:1")
  * Function `top_freq` function to return the top/below *n* items as vector
  * Header of frequency tables now also show Mean Absolute Deviaton (MAD) and Interquartile Range (IQR)
  * Possibility to globally set the default for the amount of items to print, with `options(max.print.freq = n)` where *n* is your preset value
-* Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS. These functions use the `clipr` package, but are a little altered to also support headless Linux servers (so you can use it in RStudio Server).

 #### Changed
 * Pretty printing for tibbles removed as it is not really the scope of this package
--- a/R/key_antibiotics.R
+++ b/R/key_antibiotics.R
@ -136,7 +136,8 @@ key_antibiotics <- function(tbl,
  # format
  key_abs <- tbl %>%
    pull(key_ab) %>%
-    gsub('(NA|NULL)', '-', .)
+    gsub('(NA|NULL)', '.', .) %>%
+    gsub('[^SIR]', '.', ., ignore.case = TRUE)

  key_abs

@ -162,22 +163,24 @@ key_antibiotics_equal <- function(x,

  if (type == "keyantibiotics") {
    if (ignore_I == TRUE) {
-      # evaluation using regular expression will treat '?' as any character
+      # evaluation using regular expression will treat '.' as any character
      # so I is actually ignored then
-      x <- gsub('I', '?', x, ignore.case = TRUE)
-      y <- gsub('I', '?', y, ignore.case = TRUE)
+      x <- gsub('I', '.', x, ignore.case = TRUE)
+      y <- gsub('I', '.', y, ignore.case = TRUE)
    }
    for (i in 1:length(x)) {
-      result[i] <- grepl(x = x[i],
-                         pattern = y[i],
-                         ignore.case = TRUE) |
-        grepl(x = y[i],
-              pattern = x[i],
-              ignore.case = TRUE)
+      result[i] <- nchar(x[i]) == nchar(y[i]) &
+        (x[i] %like% paste0("^", y[i], "$") |
+           y[i] %like% paste0("^", x[i], "$"))
    }
    return(result)
+
  } else {

+    if (type != 'points') {
+      stop('`', type, '` is not a valid value for type, must be "points" or "keyantibiotics". See ?first_isolate.')
+    }
+
    if (info == TRUE) {
      p <- dplyr::progress_estimated(length(x))
    }
@ -208,22 +211,17 @@ key_antibiotics_equal <- function(x,
        x2 <- strsplit(x[i], "")[[1]]
        y2 <- strsplit(y[i], "")[[1]]

-        if (type == 'points') {
-          # count points for every single character:
-          # - no change is 0 points
-          # - I <-> S|R is 0.5 point
-          # - S|R <-> R|S is 1 point
-          # use the levels of as.rsi (S = 1, I = 2, R = 3)
+        # count points for every single character:
+        # - no change is 0 points
+        # - I <-> S|R is 0.5 point
+        # - S|R <-> R|S is 1 point
+        # use the levels of as.rsi (S = 1, I = 2, R = 3)

-          suppressWarnings(x2 <- x2 %>% as.rsi() %>% as.double())
-          suppressWarnings(y2 <- y2 %>% as.rsi() %>% as.double())
+        suppressWarnings(x2 <- x2 %>% as.rsi() %>% as.double())
+        suppressWarnings(y2 <- y2 %>% as.rsi() %>% as.double())

-          points <- (x2 - y2) %>% abs() %>% sum(na.rm = TRUE)
-          result[i] <- ((points / 2) >= points_threshold)
-
-        } else {
-          stop('`', type, '` is not a valid value for type, must be "points" or "keyantibiotics". See ?first_isolate.')
-        }
+        points <- (x2 - y2) %>% abs() %>% sum(na.rm = TRUE)
+        result[i] <- ((points / 2) >= points_threshold)
      }
    }
    if (info == TRUE) {
--- a/tests/testthat/test-clipboard.R
+++ b/tests/testthat/test-clipboard.R
@ -14,6 +14,8 @@ test_that("clipboard works", {
                   clipboard_import())

  clipboard_export(septic_patients[1:100,])
-  expect_identical(as.data.frame(tbl_parse_guess(septic_patients[1:100,]), stringsAsFactors = FALSE),
-                   clipboard_import(guess_col_types = TRUE, stringsAsFactors = FALSE))
+  expect_identical(as.data.frame(tbl_parse_guess(septic_patients[1:100,]),
+                                 stringsAsFactors = FALSE),
+                   clipboard_import(guess_col_types = TRUE,
+                                    stringsAsFactors = FALSE))
 })
--- a/tests/testthat/test-first_isolates.R
+++ b/tests/testthat/test-first_isolates.R
@ -3,6 +3,7 @@ context("first_isolates.R")
 test_that("keyantibiotics work", {
  expect_equal(length(key_antibiotics(septic_patients, info = FALSE)), nrow(septic_patients))
  expect_true(key_antibiotics_equal("SSS", "SSS"))
+  expect_false(key_antibiotics_equal("SSS", "SRS"))
  expect_true(key_antibiotics_equal("SSS", "SIS", ignore_I = TRUE))
  expect_false(key_antibiotics_equal("SSS", "SIS", ignore_I = FALSE))
 })
@ -19,7 +20,7 @@ test_that("first isolates work", {
      na.rm = TRUE),
    1959)

-  # septic_patients contains 1963 out of 2000 first *weighted* isolates
+  # septic_patients contains 1962 out of 2000 first *weighted* isolates
  expect_equal(
    suppressWarnings(
      sum(
@ -31,7 +32,7 @@ test_that("first isolates work", {
                      type = "keyantibiotics",
                      info = TRUE),
        na.rm = TRUE)),
-    1963)
+    1962)
  # and 1997 when using points
  expect_equal(
    suppressWarnings(
--- a/tests/testthat/test-misc.R
+++ b/tests/testthat/test-misc.R
@ -18,3 +18,8 @@ test_that("functions missing in older R versions work", {
  expect_equal(trimws(" test ", "l"), "test ")
  expect_equal(trimws(" test ", "r"), " test")
 })
+
+test_that("generic dates work", {
+  expect_equal(date_generic("yyyy-mm-dd"), "%Y-%m-%d")
+  expect_equal(date_generic("dddd d mmmm yyyy"), "%A %e %B %Y")
+})