keyab fixes

This commit is contained in:
dr. M.S. (Matthijs) Berends 2018-07-17 19:51:09 +02:00
parent 2a4d759fbc
commit 0d64c166f0
5 changed files with 42 additions and 39 deletions

19
NEWS.md
View File

@ -1,18 +1,16 @@
# 0.2.0.90xx (development version)
#### New
* **BREAKING**: `rsi_df` was removed in favour of new functions `resistance` and `susceptibility`. Now, all functions used to calculate resistance (`resistance` and `susceptibility`) or count isolates (`n_rsi`) use **hybrid evaluation**. This means calculations are not done in R directly but rather in C++ using the `Rcpp` package, making them 25 to 30 times faster. The function `rsi` still works, but is deprecated.
* **BREAKING**: the methodology for determining first weighted isolates was changed. The antibiotics (call *key antibiotics*) that are compared between isolated to include more first isolates (called first *weighted* isolates) are now as follows:
* **BREAKING**: `rsi_df` was removed in favour of new functions `resistance` and `susceptibility`. Now, all functions used to calculate resistance (`resistance` and `susceptibility`) use **hybrid evaluation**. This means calculations are not done in R directly but rather in C++ using the `Rcpp` package, making them 25 to 30 times faster. The function `rsi` still works, but is deprecated.
* **BREAKING**: the methodology for determining first weighted isolates was changed. The antibiotics that are compared between isolates (call *key antibiotics*) to include more first isolates (afterwards called first *weighted* isolates) are now as follows:
* Gram-positive: amoxicillin, amoxicillin/clavlanic acid, cefuroxime, piperacillin/tazobactam, ciprofloxacin, trimethoprim/sulfamethoxazole, vancomycin, teicoplanin, tetracycline, erythromycin, oxacillin, rifampicin
* Gram-negative: amoxicillin, amoxicillin/clavlanic acid, cefuroxime, piperacillin/tazobactam, ciprofloxacin, trimethoprim/sulfamethoxazole, gentamicin, tobramycin, colistin, cefotaxime, ceftazidime, meropenem
* Support for Addins menu in RStudio to quickly insert `%in%` or `%like%` (and give them keyboard shortcuts), or to view the datasets that come with this package
* For convience, new descriptive statistical functions `kurtosis` and `skewness` that are lacking in base R - they are generic functions and have support for vectors, data.frames and matrices
* Function `g.test` as added to perform the Χ<sup>2</sup> distributed [*G*-test](https://en.wikipedia.org/wiki/G-test), which use is the same as `chisq.test`
* Function `ratio` was added to transform a vector of values to a preset ratio. For example:
```r
ratio(c(772, 1611, 737), ratio = "1:2:1")
# [1] 780 1560 780
```
* Function `p.symbol` was added to transform p values to their related symbols: `0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1`
* Function `g.test` to perform the Χ<sup>2</sup> distributed [*G*-test](https://en.wikipedia.org/wiki/G-test), which use is the same as `chisq.test`
* Function `ratio` to transform a vector of values to a preset ratio
* For example: `ratio(c(10, 500, 10), ratio = "1:2:1")` would return `130, 260, 130`
* Support for Addins menu in RStudio to quickly insert `%in%` or `%like%` (and give them keyboard shortcuts), or to view the datasets that come with this package
* Function `p.symbol` to transform p values to their related symbols: `0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1`
* Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS. These functions use the `clipr` package, but are a little altered to also support headless Linux servers (so you can use it in RStudio Server)
* New for frequency tables (function `freq`):
* A vignette to explain its usage
* Support for `table` to use as input: `freq(table(x, y))`
@ -22,7 +20,6 @@ ratio(c(772, 1611, 737), ratio = "1:2:1")
* Function `top_freq` function to return the top/below *n* items as vector
* Header of frequency tables now also show Mean Absolute Deviaton (MAD) and Interquartile Range (IQR)
* Possibility to globally set the default for the amount of items to print, with `options(max.print.freq = n)` where *n* is your preset value
* Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS. These functions use the `clipr` package, but are a little altered to also support headless Linux servers (so you can use it in RStudio Server).
#### Changed
* Pretty printing for tibbles removed as it is not really the scope of this package

View File

@ -136,7 +136,8 @@ key_antibiotics <- function(tbl,
# format
key_abs <- tbl %>%
pull(key_ab) %>%
gsub('(NA|NULL)', '-', .)
gsub('(NA|NULL)', '.', .) %>%
gsub('[^SIR]', '.', ., ignore.case = TRUE)
key_abs
@ -162,22 +163,24 @@ key_antibiotics_equal <- function(x,
if (type == "keyantibiotics") {
if (ignore_I == TRUE) {
# evaluation using regular expression will treat '?' as any character
# evaluation using regular expression will treat '.' as any character
# so I is actually ignored then
x <- gsub('I', '?', x, ignore.case = TRUE)
y <- gsub('I', '?', y, ignore.case = TRUE)
x <- gsub('I', '.', x, ignore.case = TRUE)
y <- gsub('I', '.', y, ignore.case = TRUE)
}
for (i in 1:length(x)) {
result[i] <- grepl(x = x[i],
pattern = y[i],
ignore.case = TRUE) |
grepl(x = y[i],
pattern = x[i],
ignore.case = TRUE)
result[i] <- nchar(x[i]) == nchar(y[i]) &
(x[i] %like% paste0("^", y[i], "$") |
y[i] %like% paste0("^", x[i], "$"))
}
return(result)
} else {
if (type != 'points') {
stop('`', type, '` is not a valid value for type, must be "points" or "keyantibiotics". See ?first_isolate.')
}
if (info == TRUE) {
p <- dplyr::progress_estimated(length(x))
}
@ -208,22 +211,17 @@ key_antibiotics_equal <- function(x,
x2 <- strsplit(x[i], "")[[1]]
y2 <- strsplit(y[i], "")[[1]]
if (type == 'points') {
# count points for every single character:
# - no change is 0 points
# - I <-> S|R is 0.5 point
# - S|R <-> R|S is 1 point
# use the levels of as.rsi (S = 1, I = 2, R = 3)
# count points for every single character:
# - no change is 0 points
# - I <-> S|R is 0.5 point
# - S|R <-> R|S is 1 point
# use the levels of as.rsi (S = 1, I = 2, R = 3)
suppressWarnings(x2 <- x2 %>% as.rsi() %>% as.double())
suppressWarnings(y2 <- y2 %>% as.rsi() %>% as.double())
suppressWarnings(x2 <- x2 %>% as.rsi() %>% as.double())
suppressWarnings(y2 <- y2 %>% as.rsi() %>% as.double())
points <- (x2 - y2) %>% abs() %>% sum(na.rm = TRUE)
result[i] <- ((points / 2) >= points_threshold)
} else {
stop('`', type, '` is not a valid value for type, must be "points" or "keyantibiotics". See ?first_isolate.')
}
points <- (x2 - y2) %>% abs() %>% sum(na.rm = TRUE)
result[i] <- ((points / 2) >= points_threshold)
}
}
if (info == TRUE) {

View File

@ -14,6 +14,8 @@ test_that("clipboard works", {
clipboard_import())
clipboard_export(septic_patients[1:100,])
expect_identical(as.data.frame(tbl_parse_guess(septic_patients[1:100,]), stringsAsFactors = FALSE),
clipboard_import(guess_col_types = TRUE, stringsAsFactors = FALSE))
expect_identical(as.data.frame(tbl_parse_guess(septic_patients[1:100,]),
stringsAsFactors = FALSE),
clipboard_import(guess_col_types = TRUE,
stringsAsFactors = FALSE))
})

View File

@ -3,6 +3,7 @@ context("first_isolates.R")
test_that("keyantibiotics work", {
expect_equal(length(key_antibiotics(septic_patients, info = FALSE)), nrow(septic_patients))
expect_true(key_antibiotics_equal("SSS", "SSS"))
expect_false(key_antibiotics_equal("SSS", "SRS"))
expect_true(key_antibiotics_equal("SSS", "SIS", ignore_I = TRUE))
expect_false(key_antibiotics_equal("SSS", "SIS", ignore_I = FALSE))
})
@ -19,7 +20,7 @@ test_that("first isolates work", {
na.rm = TRUE),
1959)
# septic_patients contains 1963 out of 2000 first *weighted* isolates
# septic_patients contains 1962 out of 2000 first *weighted* isolates
expect_equal(
suppressWarnings(
sum(
@ -31,7 +32,7 @@ test_that("first isolates work", {
type = "keyantibiotics",
info = TRUE),
na.rm = TRUE)),
1963)
1962)
# and 1997 when using points
expect_equal(
suppressWarnings(

View File

@ -18,3 +18,8 @@ test_that("functions missing in older R versions work", {
expect_equal(trimws(" test ", "l"), "test ")
expect_equal(trimws(" test ", "r"), " test")
})
test_that("generic dates work", {
expect_equal(date_generic("yyyy-mm-dd"), "%Y-%m-%d")
expect_equal(date_generic("dddd d mmmm yyyy"), "%A %e %B %Y")
})