small improvement for is.rsi.eligible, more unit tests

This commit is contained in:
dr. M.S. (Matthijs) Berends 2018-09-14 11:54:01 +02:00
parent cf5711fb0b
commit b0ca49d68d
6 changed files with 31 additions and 29 deletions

19
NEWS.md
View File

@ -18,8 +18,8 @@
thousands_of_E_colis <- rep("E. coli", 25000)
microbenchmark::microbenchmark(as.mo(thousands_of_E_colis), unit = "s")
# Unit: seconds
# min median max neval
# 0.00861352 0.008774335 0.01952958 100
# min median max neval
# 0.00861352 0.008774335 0.01952958 100
```
* Renamed all previous references to `bactid` to `mo`, like:
* Column names inputs of `EUCAST_rules`, `first_isolate` and `key_antibiotics`
@ -55,7 +55,8 @@
* For `first_isolate`, rows will be ignored when there's no species available
* Function `ratio` is now deprecated and will be removed in a future release, as it is not really the scope of this package
* Fix for `as.mic` for values ending in zeroes after a real number
* Tremendous speed improvement for `as.bactid` (now `as.mo`)
* Small fix where *B. fragilis* would not be found in the `microorganisms.umcg` data set
* Fix for `is.rsi.eligible`, now ignores reading marks
* Added parameters `minimum` and `as_percent` to `portion_df`
* Support for quasiquotation in the functions series `count_*` and `portions_*`, and `n_rsi`. This allows to check for more than 2 vectors or columns.
```r
@ -77,12 +78,12 @@
my_matrix = with(septic_patients, matrix(c(age, sex), ncol = 2))
freq(my_matrix)
```
* Subsetting also possible for lists:
```r
my_list = list(age = septic_patients$age, sex = septic_patients$sex)
my_list %>% freq(age)
my_list %>% freq(sex)
```
For lists, subsetting is possible:
```r
my_list = list(age = septic_patients$age, sex = septic_patients$sex)
my_list %>% freq(age)
my_list %>% freq(sex)
```
#### Other
* More unit tests to ensure better integrity of functions

21
R/mo.R
View File

@ -138,8 +138,6 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
# replace space by regex sign
x_withspaces <- gsub(" ", ".* ", x, fixed = TRUE)
x <- gsub(" ", ".*", x, fixed = TRUE)
# for species
x_species <- paste(x, 'species')
# add start en stop regex
x <- paste0('^', x, '$')
x_withspaces_all <- x_withspaces
@ -228,12 +226,14 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
x[i] <- found[1L]
next
}
# try the same, now based on genus + species ----
found <- MOs[which(paste(MOs$genus, MOs$species) %like% x_withspaces[i]),]$mo
if (length(found) > 0) {
x[i] <- found[1L]
next
}
# try any match keeping spaces, not ending with $ ----
found <- MOs[which(MOs$fullname %like% x_withspaces_start[i]),]$mo
if (length(found) > 0) {
@ -248,21 +248,6 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
next
}
# try exact match of only genus, with 'species' attached ----
# (this prevents Streptococcus from becoming Peptostreptococcus, since "p" < "s")
found <- MOs[which(MOs$fullname == x_species[i]),]$mo
if (length(found) > 0) {
x[i] <- found[1L]
next
}
# try any match of only genus, with 'species' attached ----
found <- MOs[which(MOs$fullname %like% x_species[i]),]$mo
if (length(found) > 0) {
x[i] <- found[1L]
next
}
# try fullname without start and stop regex, to also find subspecies ----
# like "K. pneu rhino" -> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH
found <- MOs[which(gsub("[\\(\\)]", "", MOs$fullname) %like% x_withspaces_all[i]),]$mo
@ -322,6 +307,8 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
x[x == "PASAER" & toupper(x_backup) != "PASAER" & !(x_backup %like% '^pas?' | x_backup %like% 'aero')] <- "PSEAER"
# avoid detection of Legionella non pneumophila in case of Legionella pneumophila ----
x[x == "LEGNON" & toupper(x_backup) != "LEGNON" & !x_backup %like% 'non'] <- "LEGPNE"
# avoid detection of Streptobacillus in case of Streptococcus ----
x[x == "STB" & toupper(x_backup) != "STB" & !x_backup %like% 'streptob'] <- "STC"
failures <- failures[!failures %in% c(NA, NULL, NaN)]
if (length(failures) > 0) {

View File

@ -99,10 +99,14 @@ is.rsi <- function(x) {
#' @export
#' @importFrom dplyr %>%
is.rsi.eligible <- function(x) {
distinct_val <- x %>% unique() %>% sort() %>% as.character()
# remove all but a-z
distinct_val <- x %>% unique() %>% sort() %>% as.character() %>% gsub("(\\W|\\d)+", "", .)
# remove NAs and empty values
distinct_val <- distinct_val[!is.na(distinct_val) & trimws(distinct_val) != ""]
# get RSI class
distinct_val_rsi <- as.character(suppressWarnings(as.rsi(distinct_val)))
# is not empty and identical to new class
length(distinct_val) > 0 &
identical(distinct_val, distinct_val_rsi)
}

Binary file not shown.

View File

@ -4,7 +4,12 @@ context("mdro.R")
test_that("MDRO works", {
library(dplyr)
outcome <- suppressWarnings(MDRO(septic_patients, "EUCAST", info = TRUE))
expect_error(suppressWarnings(MDRO(septic_patients, "invalid", col_bactid = "mo", info = TRUE)))
expect_error(suppressWarnings(MDRO(septic_patients, "fr", col_bactid = "mo", info = TRUE)))
expect_error(suppressWarnings(MDRO(septic_patients, country = c("de", "nl"), info = TRUE)))
expect_error(suppressWarnings(MDRO(septic_patients, col_mo = "invalid", info = TRUE)))
outcome <- suppressWarnings(MDRO(septic_patients))
outcome <- suppressWarnings(EUCAST_exceptional_phenotypes(septic_patients, info = TRUE))
# check class
expect_equal(outcome %>% class(), c('ordered', 'factor'))

View File

@ -22,6 +22,11 @@ test_that("as.mo works", {
expect_equal(as.character(as.mo("L. pneumophila")), "LEGPNE")
expect_equal(as.character(as.mo("L. non pneumophila")), "LEGNON")
expect_equal(as.character(as.mo("S. beta-haemolytic")), "STCHAE")
expect_equal(as.character(as.mo("Strepto")), "STC") # not Streptobacillus
expect_equal(as.character(as.mo("Streptococcus")), "STC") # not Peptostreptoccus
expect_equal(as.character(as.mo(c("GAS", "GBS"))), c("STCGRA", "STCGRB"))
expect_equal(as.character(as.mo("S. pyo")), "STCPYO") # not Actinomyces pyogenes
@ -31,7 +36,7 @@ test_that("as.mo works", {
expect_equal(as.character(as.mo("Gram negative rods")), "GNR")
# GLIMS
expect_equal(as.character(as.mo("shiboy")), "SHIBOY")
expect_equal(as.character(as.mo("bctfgr")), "BACFRA")
expect_equal(as.character(as.mo("MRSE")), "STAEPI")
expect_equal(as.character(as.mo("VRE")), "ENC")