From 0d64c166f06d10c4d93d5b9ad2f0c61391506b66 Mon Sep 17 00:00:00 2001 From: "Matthijs S. Berends" Date: Tue, 17 Jul 2018 19:51:09 +0200 Subject: [PATCH] keyab fixes --- NEWS.md | 19 +++++------- R/key_antibiotics.R | 46 +++++++++++++--------------- tests/testthat/test-clipboard.R | 6 ++-- tests/testthat/test-first_isolates.R | 5 +-- tests/testthat/test-misc.R | 5 +++ 5 files changed, 42 insertions(+), 39 deletions(-) diff --git a/NEWS.md b/NEWS.md index 085e81e4..dba7cb5d 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,18 +1,16 @@ # 0.2.0.90xx (development version) #### New -* **BREAKING**: `rsi_df` was removed in favour of new functions `resistance` and `susceptibility`. Now, all functions used to calculate resistance (`resistance` and `susceptibility`) or count isolates (`n_rsi`) use **hybrid evaluation**. This means calculations are not done in R directly but rather in C++ using the `Rcpp` package, making them 25 to 30 times faster. The function `rsi` still works, but is deprecated. -* **BREAKING**: the methodology for determining first weighted isolates was changed. The antibiotics (call *key antibiotics*) that are compared between isolated to include more first isolates (called first *weighted* isolates) are now as follows: +* **BREAKING**: `rsi_df` was removed in favour of new functions `resistance` and `susceptibility`. Now, all functions used to calculate resistance (`resistance` and `susceptibility`) use **hybrid evaluation**. This means calculations are not done in R directly but rather in C++ using the `Rcpp` package, making them 25 to 30 times faster. The function `rsi` still works, but is deprecated. +* **BREAKING**: the methodology for determining first weighted isolates was changed. The antibiotics that are compared between isolates (call *key antibiotics*) to include more first isolates (afterwards called first *weighted* isolates) are now as follows: * Gram-positive: amoxicillin, amoxicillin/clavlanic acid, cefuroxime, piperacillin/tazobactam, ciprofloxacin, trimethoprim/sulfamethoxazole, vancomycin, teicoplanin, tetracycline, erythromycin, oxacillin, rifampicin * Gram-negative: amoxicillin, amoxicillin/clavlanic acid, cefuroxime, piperacillin/tazobactam, ciprofloxacin, trimethoprim/sulfamethoxazole, gentamicin, tobramycin, colistin, cefotaxime, ceftazidime, meropenem -* Support for Addins menu in RStudio to quickly insert `%in%` or `%like%` (and give them keyboard shortcuts), or to view the datasets that come with this package * For convience, new descriptive statistical functions `kurtosis` and `skewness` that are lacking in base R - they are generic functions and have support for vectors, data.frames and matrices -* Function `g.test` as added to perform the Χ2 distributed [*G*-test](https://en.wikipedia.org/wiki/G-test), which use is the same as `chisq.test` -* Function `ratio` was added to transform a vector of values to a preset ratio. For example: -```r -ratio(c(772, 1611, 737), ratio = "1:2:1") -# [1] 780 1560 780 -``` -* Function `p.symbol` was added to transform p values to their related symbols: `0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1` +* Function `g.test` to perform the Χ2 distributed [*G*-test](https://en.wikipedia.org/wiki/G-test), which use is the same as `chisq.test` +* Function `ratio` to transform a vector of values to a preset ratio + * For example: `ratio(c(10, 500, 10), ratio = "1:2:1")` would return `130, 260, 130` +* Support for Addins menu in RStudio to quickly insert `%in%` or `%like%` (and give them keyboard shortcuts), or to view the datasets that come with this package +* Function `p.symbol` to transform p values to their related symbols: `0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1` +* Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS. These functions use the `clipr` package, but are a little altered to also support headless Linux servers (so you can use it in RStudio Server) * New for frequency tables (function `freq`): * A vignette to explain its usage * Support for `table` to use as input: `freq(table(x, y))` @@ -22,7 +20,6 @@ ratio(c(772, 1611, 737), ratio = "1:2:1") * Function `top_freq` function to return the top/below *n* items as vector * Header of frequency tables now also show Mean Absolute Deviaton (MAD) and Interquartile Range (IQR) * Possibility to globally set the default for the amount of items to print, with `options(max.print.freq = n)` where *n* is your preset value -* Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS. These functions use the `clipr` package, but are a little altered to also support headless Linux servers (so you can use it in RStudio Server). #### Changed * Pretty printing for tibbles removed as it is not really the scope of this package diff --git a/R/key_antibiotics.R b/R/key_antibiotics.R index ad1edf98..b560dd3c 100644 --- a/R/key_antibiotics.R +++ b/R/key_antibiotics.R @@ -136,7 +136,8 @@ key_antibiotics <- function(tbl, # format key_abs <- tbl %>% pull(key_ab) %>% - gsub('(NA|NULL)', '-', .) + gsub('(NA|NULL)', '.', .) %>% + gsub('[^SIR]', '.', ., ignore.case = TRUE) key_abs @@ -162,22 +163,24 @@ key_antibiotics_equal <- function(x, if (type == "keyantibiotics") { if (ignore_I == TRUE) { - # evaluation using regular expression will treat '?' as any character + # evaluation using regular expression will treat '.' as any character # so I is actually ignored then - x <- gsub('I', '?', x, ignore.case = TRUE) - y <- gsub('I', '?', y, ignore.case = TRUE) + x <- gsub('I', '.', x, ignore.case = TRUE) + y <- gsub('I', '.', y, ignore.case = TRUE) } for (i in 1:length(x)) { - result[i] <- grepl(x = x[i], - pattern = y[i], - ignore.case = TRUE) | - grepl(x = y[i], - pattern = x[i], - ignore.case = TRUE) + result[i] <- nchar(x[i]) == nchar(y[i]) & + (x[i] %like% paste0("^", y[i], "$") | + y[i] %like% paste0("^", x[i], "$")) } return(result) + } else { + if (type != 'points') { + stop('`', type, '` is not a valid value for type, must be "points" or "keyantibiotics". See ?first_isolate.') + } + if (info == TRUE) { p <- dplyr::progress_estimated(length(x)) } @@ -208,22 +211,17 @@ key_antibiotics_equal <- function(x, x2 <- strsplit(x[i], "")[[1]] y2 <- strsplit(y[i], "")[[1]] - if (type == 'points') { - # count points for every single character: - # - no change is 0 points - # - I <-> S|R is 0.5 point - # - S|R <-> R|S is 1 point - # use the levels of as.rsi (S = 1, I = 2, R = 3) + # count points for every single character: + # - no change is 0 points + # - I <-> S|R is 0.5 point + # - S|R <-> R|S is 1 point + # use the levels of as.rsi (S = 1, I = 2, R = 3) - suppressWarnings(x2 <- x2 %>% as.rsi() %>% as.double()) - suppressWarnings(y2 <- y2 %>% as.rsi() %>% as.double()) + suppressWarnings(x2 <- x2 %>% as.rsi() %>% as.double()) + suppressWarnings(y2 <- y2 %>% as.rsi() %>% as.double()) - points <- (x2 - y2) %>% abs() %>% sum(na.rm = TRUE) - result[i] <- ((points / 2) >= points_threshold) - - } else { - stop('`', type, '` is not a valid value for type, must be "points" or "keyantibiotics". See ?first_isolate.') - } + points <- (x2 - y2) %>% abs() %>% sum(na.rm = TRUE) + result[i] <- ((points / 2) >= points_threshold) } } if (info == TRUE) { diff --git a/tests/testthat/test-clipboard.R b/tests/testthat/test-clipboard.R index 11bc3bc9..bb7c0a80 100644 --- a/tests/testthat/test-clipboard.R +++ b/tests/testthat/test-clipboard.R @@ -14,6 +14,8 @@ test_that("clipboard works", { clipboard_import()) clipboard_export(septic_patients[1:100,]) - expect_identical(as.data.frame(tbl_parse_guess(septic_patients[1:100,]), stringsAsFactors = FALSE), - clipboard_import(guess_col_types = TRUE, stringsAsFactors = FALSE)) + expect_identical(as.data.frame(tbl_parse_guess(septic_patients[1:100,]), + stringsAsFactors = FALSE), + clipboard_import(guess_col_types = TRUE, + stringsAsFactors = FALSE)) }) diff --git a/tests/testthat/test-first_isolates.R b/tests/testthat/test-first_isolates.R index af4fbdc5..54561bad 100755 --- a/tests/testthat/test-first_isolates.R +++ b/tests/testthat/test-first_isolates.R @@ -3,6 +3,7 @@ context("first_isolates.R") test_that("keyantibiotics work", { expect_equal(length(key_antibiotics(septic_patients, info = FALSE)), nrow(septic_patients)) expect_true(key_antibiotics_equal("SSS", "SSS")) + expect_false(key_antibiotics_equal("SSS", "SRS")) expect_true(key_antibiotics_equal("SSS", "SIS", ignore_I = TRUE)) expect_false(key_antibiotics_equal("SSS", "SIS", ignore_I = FALSE)) }) @@ -19,7 +20,7 @@ test_that("first isolates work", { na.rm = TRUE), 1959) - # septic_patients contains 1963 out of 2000 first *weighted* isolates + # septic_patients contains 1962 out of 2000 first *weighted* isolates expect_equal( suppressWarnings( sum( @@ -31,7 +32,7 @@ test_that("first isolates work", { type = "keyantibiotics", info = TRUE), na.rm = TRUE)), - 1963) + 1962) # and 1997 when using points expect_equal( suppressWarnings( diff --git a/tests/testthat/test-misc.R b/tests/testthat/test-misc.R index 164a9c9f..150f3338 100755 --- a/tests/testthat/test-misc.R +++ b/tests/testthat/test-misc.R @@ -18,3 +18,8 @@ test_that("functions missing in older R versions work", { expect_equal(trimws(" test ", "l"), "test ") expect_equal(trimws(" test ", "r"), " test") }) + +test_that("generic dates work", { + expect_equal(date_generic("yyyy-mm-dd"), "%Y-%m-%d") + expect_equal(date_generic("dddd d mmmm yyyy"), "%A %e %B %Y") +})