From 0d64c166f06d10c4d93d5b9ad2f0c61391506b66 Mon Sep 17 00:00:00 2001
From: "Matthijs S. Berends" <m.s.berends@umcg.nl>
Date: Tue, 17 Jul 2018 19:51:09 +0200
Subject: [PATCH] keyab fixes

---
 NEWS.md                              | 19 +++++-------
 R/key_antibiotics.R                  | 46 +++++++++++++---------------
 tests/testthat/test-clipboard.R      |  6 ++--
 tests/testthat/test-first_isolates.R |  5 +--
 tests/testthat/test-misc.R           |  5 +++
 5 files changed, 42 insertions(+), 39 deletions(-)
diff --git a/NEWS.md b/NEWS.md
index 085e81e4..dba7cb5d 100755
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,18 +1,16 @@
 # 0.2.0.90xx (development version)
 #### New
-* **BREAKING**: `rsi_df` was removed in favour of new functions `resistance` and `susceptibility`. Now, all functions used to calculate resistance (`resistance` and `susceptibility`) or count isolates (`n_rsi`) use **hybrid evaluation**. This means calculations are not done in R directly but rather in C++ using the `Rcpp` package, making them 25 to 30 times faster. The function `rsi` still works, but is deprecated.
-* **BREAKING**: the methodology for determining first weighted isolates was changed. The antibiotics (call *key antibiotics*) that are compared between isolated to include more first isolates (called first *weighted* isolates) are now as follows:
+* **BREAKING**: `rsi_df` was removed in favour of new functions `resistance` and `susceptibility`. Now, all functions used to calculate resistance (`resistance` and `susceptibility`) use **hybrid evaluation**. This means calculations are not done in R directly but rather in C++ using the `Rcpp` package, making them 25 to 30 times faster. The function `rsi` still works, but is deprecated.
+* **BREAKING**: the methodology for determining first weighted isolates was changed. The antibiotics that are compared between isolates (call *key antibiotics*) to include more first isolates (afterwards called first *weighted* isolates) are now as follows:
   * Gram-positive: amoxicillin, amoxicillin/clavlanic acid, cefuroxime, piperacillin/tazobactam, ciprofloxacin, trimethoprim/sulfamethoxazole, vancomycin, teicoplanin, tetracycline, erythromycin, oxacillin, rifampicin
   * Gram-negative: amoxicillin, amoxicillin/clavlanic acid, cefuroxime, piperacillin/tazobactam, ciprofloxacin, trimethoprim/sulfamethoxazole, gentamicin, tobramycin, colistin, cefotaxime, ceftazidime, meropenem
-* Support for Addins menu in RStudio to quickly insert `%in%` or `%like%` (and give them keyboard shortcuts), or to view the datasets that come with this package
 * For convience, new descriptive statistical functions `kurtosis` and `skewness` that are lacking in base R - they are generic functions and have support for vectors, data.frames and matrices
-* Function `g.test` as added to perform the Χ<sup>2</sup> distributed [*G*-test](https://en.wikipedia.org/wiki/G-test), which use is the same as `chisq.test`
-* Function `ratio` was added to transform a vector of values to a preset ratio. For example:
-```r
-ratio(c(772, 1611, 737), ratio = "1:2:1")
-# [1]  780 1560  780
-```
-* Function `p.symbol` was added to transform p values to their related symbols: `0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1`
+* Function `g.test` to perform the Χ<sup>2</sup> distributed [*G*-test](https://en.wikipedia.org/wiki/G-test), which use is the same as `chisq.test`
+* Function `ratio` to transform a vector of values to a preset ratio
+  * For example: `ratio(c(10, 500, 10), ratio = "1:2:1")` would return `130, 260, 130`
+* Support for Addins menu in RStudio to quickly insert `%in%` or `%like%` (and give them keyboard shortcuts), or to view the datasets that come with this package
+* Function `p.symbol` to transform p values to their related symbols: `0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1`
+* Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS. These functions use the `clipr` package, but are a little altered to also support headless Linux servers (so you can use it in RStudio Server)
 * New for frequency tables (function `freq`):
   * A vignette to explain its usage
   * Support for `table` to use as input: `freq(table(x, y))`
@@ -22,7 +20,6 @@ ratio(c(772, 1611, 737), ratio = "1:2:1")
   * Function `top_freq` function to return the top/below *n* items as vector
   * Header of frequency tables now also show Mean Absolute Deviaton (MAD) and Interquartile Range (IQR)
   * Possibility to globally set the default for the amount of items to print, with `options(max.print.freq = n)` where *n* is your preset value
-* Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS. These functions use the `clipr` package, but are a little altered to also support headless Linux servers (so you can use it in RStudio Server).
 
 #### Changed
 * Pretty printing for tibbles removed as it is not really the scope of this package
diff --git a/R/key_antibiotics.R b/R/key_antibiotics.R
index ad1edf98..b560dd3c 100644
--- a/R/key_antibiotics.R
+++ b/R/key_antibiotics.R
@@ -136,7 +136,8 @@ key_antibiotics <- function(tbl,
   # format
   key_abs <- tbl %>%
     pull(key_ab) %>%
-    gsub('(NA|NULL)', '-', .)
+    gsub('(NA|NULL)', '.', .) %>%
+    gsub('[^SIR]', '.', ., ignore.case = TRUE)
 
   key_abs
 
@@ -162,22 +163,24 @@ key_antibiotics_equal <- function(x,
 
   if (type == "keyantibiotics") {
     if (ignore_I == TRUE) {
-      # evaluation using regular expression will treat '?' as any character
+      # evaluation using regular expression will treat '.' as any character
       # so I is actually ignored then
-      x <- gsub('I', '?', x, ignore.case = TRUE)
-      y <- gsub('I', '?', y, ignore.case = TRUE)
+      x <- gsub('I', '.', x, ignore.case = TRUE)
+      y <- gsub('I', '.', y, ignore.case = TRUE)
     }
     for (i in 1:length(x)) {
-      result[i] <- grepl(x = x[i],
-                         pattern = y[i],
-                         ignore.case = TRUE) |
-        grepl(x = y[i],
-              pattern = x[i],
-              ignore.case = TRUE)
+      result[i] <- nchar(x[i]) == nchar(y[i]) &
+        (x[i] %like% paste0("^", y[i], "$") |
+           y[i] %like% paste0("^", x[i], "$"))
     }
     return(result)
+
   } else {
 
+    if (type != 'points') {
+      stop('`', type, '` is not a valid value for type, must be "points" or "keyantibiotics". See ?first_isolate.')
+    }
+
     if (info == TRUE) {
       p <- dplyr::progress_estimated(length(x))
     }
@@ -208,22 +211,17 @@ key_antibiotics_equal <- function(x,
         x2 <- strsplit(x[i], "")[[1]]
         y2 <- strsplit(y[i], "")[[1]]
 
-        if (type == 'points') {
-          # count points for every single character:
-          # - no change is 0 points
-          # - I <-> S|R is 0.5 point
-          # - S|R <-> R|S is 1 point
-          # use the levels of as.rsi (S = 1, I = 2, R = 3)
+        # count points for every single character:
+        # - no change is 0 points
+        # - I <-> S|R is 0.5 point
+        # - S|R <-> R|S is 1 point
+        # use the levels of as.rsi (S = 1, I = 2, R = 3)
 
-          suppressWarnings(x2 <- x2 %>% as.rsi() %>% as.double())
-          suppressWarnings(y2 <- y2 %>% as.rsi() %>% as.double())
+        suppressWarnings(x2 <- x2 %>% as.rsi() %>% as.double())
+        suppressWarnings(y2 <- y2 %>% as.rsi() %>% as.double())
 
-          points <- (x2 - y2) %>% abs() %>% sum(na.rm = TRUE)
-          result[i] <- ((points / 2) >= points_threshold)
-
-        } else {
-          stop('`', type, '` is not a valid value for type, must be "points" or "keyantibiotics". See ?first_isolate.')
-        }
+        points <- (x2 - y2) %>% abs() %>% sum(na.rm = TRUE)
+        result[i] <- ((points / 2) >= points_threshold)
       }
     }
     if (info == TRUE) {
diff --git a/tests/testthat/test-clipboard.R b/tests/testthat/test-clipboard.R
index 11bc3bc9..bb7c0a80 100644
--- a/tests/testthat/test-clipboard.R
+++ b/tests/testthat/test-clipboard.R
@@ -14,6 +14,8 @@ test_that("clipboard works", {
                    clipboard_import())
 
   clipboard_export(septic_patients[1:100,])
-  expect_identical(as.data.frame(tbl_parse_guess(septic_patients[1:100,]), stringsAsFactors = FALSE),
-                   clipboard_import(guess_col_types = TRUE, stringsAsFactors = FALSE))
+  expect_identical(as.data.frame(tbl_parse_guess(septic_patients[1:100,]),
+                                 stringsAsFactors = FALSE),
+                   clipboard_import(guess_col_types = TRUE,
+                                    stringsAsFactors = FALSE))
 })
diff --git a/tests/testthat/test-first_isolates.R b/tests/testthat/test-first_isolates.R
index af4fbdc5..54561bad 100755
--- a/tests/testthat/test-first_isolates.R
+++ b/tests/testthat/test-first_isolates.R
@@ -3,6 +3,7 @@ context("first_isolates.R")
 test_that("keyantibiotics work", {
   expect_equal(length(key_antibiotics(septic_patients, info = FALSE)), nrow(septic_patients))
   expect_true(key_antibiotics_equal("SSS", "SSS"))
+  expect_false(key_antibiotics_equal("SSS", "SRS"))
   expect_true(key_antibiotics_equal("SSS", "SIS", ignore_I = TRUE))
   expect_false(key_antibiotics_equal("SSS", "SIS", ignore_I = FALSE))
 })
@@ -19,7 +20,7 @@ test_that("first isolates work", {
       na.rm = TRUE),
     1959)
 
-  # septic_patients contains 1963 out of 2000 first *weighted* isolates
+  # septic_patients contains 1962 out of 2000 first *weighted* isolates
   expect_equal(
     suppressWarnings(
       sum(
@@ -31,7 +32,7 @@ test_that("first isolates work", {
                       type = "keyantibiotics",
                       info = TRUE),
         na.rm = TRUE)),
-    1963)
+    1962)
   # and 1997 when using points
   expect_equal(
     suppressWarnings(
diff --git a/tests/testthat/test-misc.R b/tests/testthat/test-misc.R
index 164a9c9f..150f3338 100755
--- a/tests/testthat/test-misc.R
+++ b/tests/testthat/test-misc.R
@@ -18,3 +18,8 @@ test_that("functions missing in older R versions work", {
   expect_equal(trimws(" test ", "l"), "test ")
   expect_equal(trimws(" test ", "r"), " test")
 })
+
+test_that("generic dates work", {
+  expect_equal(date_generic("yyyy-mm-dd"), "%Y-%m-%d")
+  expect_equal(date_generic("dddd d mmmm yyyy"), "%A %e %B %Y")
+})