small improvement for is.rsi.eligible, more unit tests

2026-06-30 03:36:21 +02:00 · 2018-09-14 11:54:01 +02:00
parent cf5711fb0b
commit b0ca49d68d
6 changed files with 31 additions and 29 deletions
--- a/NEWS.md
+++ b/NEWS.md
@@ -18,8 +18,8 @@
  thousands_of_E_colis <- rep("E. coli", 25000)
  microbenchmark::microbenchmark(as.mo(thousands_of_E_colis), unit = "s")
  # Unit: seconds
-  #          min      median         max neval
+  #         min       median         max  neval
-  #  0.00861352  0.008774335  0.01952958   100
+  #  0.00861352  0.008774335  0.01952958    100
  ```
 * Renamed all previous references to `bactid` to `mo`, like:
  * Column names inputs of `EUCAST_rules`, `first_isolate` and `key_antibiotics`
@@ -55,7 +55,8 @@
 * For `first_isolate`, rows will be ignored when there's no species available
 * Function `ratio` is now deprecated and will be removed in a future release, as it is not really the scope of this package
 * Fix for `as.mic` for values ending in zeroes after a real number
-* Tremendous speed improvement for `as.bactid` (now `as.mo`)
+* Small fix where *B. fragilis* would not be found in the `microorganisms.umcg` data set
 * Fix for `is.rsi.eligible`, now ignores reading marks
 * Added parameters `minimum` and `as_percent` to `portion_df`
 * Support for quasiquotation in the functions series `count_*` and `portions_*`, and `n_rsi`. This allows to check for more than 2 vectors or columns.
  ```r
@@ -77,12 +78,12 @@
  my_matrix = with(septic_patients, matrix(c(age, sex), ncol = 2))
  freq(my_matrix)
  ```
-  * Subsetting also possible for lists:
+  For lists, subsetting is possible:
-    ```r
+  ```r
-    my_list = list(age = septic_patients$age, sex = septic_patients$sex)
+  my_list = list(age = septic_patients$age, sex = septic_patients$sex)
-    my_list %>% freq(age)
+  my_list %>% freq(age)
-    my_list %>% freq(sex)
+  my_list %>% freq(sex)
-    ```
+  ```
 #### Other
 * More unit tests to ensure better integrity of functions
--- a/R/mo.R
+++ b/R/mo.R
@@ -138,8 +138,6 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
  # replace space by regex sign
  x_withspaces <- gsub(" ", ".* ", x, fixed = TRUE)
  x <- gsub(" ", ".*", x, fixed = TRUE)
  # for species
  x_species <- paste(x, 'species')
  # add start en stop regex
  x <- paste0('^', x, '$')
  x_withspaces_all <- x_withspaces
@@ -228,12 +226,14 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
      x[i] <- found[1L]
      next
    }
    # try the same, now based on genus + species ----
    found <- MOs[which(paste(MOs$genus, MOs$species) %like% x_withspaces[i]),]$mo
    if (length(found) > 0) {
      x[i] <- found[1L]
      next
    }
    # try any match keeping spaces, not ending with $ ----
    found <- MOs[which(MOs$fullname %like% x_withspaces_start[i]),]$mo
    if (length(found) > 0) {
@@ -248,21 +248,6 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
      next
    }
    # try exact match of only genus, with 'species' attached ----
    # (this prevents Streptococcus from becoming Peptostreptococcus, since "p" < "s")
    found <- MOs[which(MOs$fullname == x_species[i]),]$mo
    if (length(found) > 0) {
      x[i] <- found[1L]
      next
    }
    # try any match of only genus, with 'species' attached ----
    found <- MOs[which(MOs$fullname %like% x_species[i]),]$mo
    if (length(found) > 0) {
      x[i] <- found[1L]
      next
    }
    # try fullname without start and stop regex, to also find subspecies ----
    # like "K. pneu rhino" -> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH
    found <- MOs[which(gsub("[\\(\\)]", "", MOs$fullname) %like% x_withspaces_all[i]),]$mo
@@ -322,6 +307,8 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
  x[x == "PASAER" & toupper(x_backup) != "PASAER" & !(x_backup %like% '^pas?' | x_backup %like% 'aero')] <- "PSEAER"
  # avoid detection of Legionella non pneumophila in case of Legionella pneumophila ----
  x[x == "LEGNON" & toupper(x_backup) != "LEGNON" & !x_backup %like% 'non'] <- "LEGPNE"
  # avoid detection of Streptobacillus in case of Streptococcus ----
  x[x == "STB" & toupper(x_backup) != "STB" & !x_backup %like% 'streptob'] <- "STC"
  failures <- failures[!failures %in% c(NA, NULL, NaN)]
  if (length(failures) > 0) {
--- a/R/rsi.R
+++ b/R/rsi.R
@@ -99,10 +99,14 @@ is.rsi <- function(x) {
 #' @export
 #' @importFrom dplyr %>%
 is.rsi.eligible <- function(x) {
-  distinct_val <- x %>% unique() %>% sort() %>% as.character()
+  # remove all but a-z
  distinct_val <- x %>% unique() %>% sort() %>% as.character() %>% gsub("(\\W|\\d)+", "", .)
  # remove NAs and empty values
  distinct_val <- distinct_val[!is.na(distinct_val) & trimws(distinct_val) != ""]
  # get RSI class
  distinct_val_rsi <- as.character(suppressWarnings(as.rsi(distinct_val)))
  # is not empty and identical to new class
  length(distinct_val) > 0 &
    identical(distinct_val, distinct_val_rsi)
 }
--- a/data/microorganisms.umcg.rda
+++ b/data/microorganisms.umcg.rda
--- a/tests/testthat/test-mdro.R
+++ b/tests/testthat/test-mdro.R
@@ -4,7 +4,12 @@ context("mdro.R")
 test_that("MDRO works", {
  library(dplyr)
-  outcome <- suppressWarnings(MDRO(septic_patients, "EUCAST", info = TRUE))
+  expect_error(suppressWarnings(MDRO(septic_patients, "invalid", col_bactid = "mo", info = TRUE)))
  expect_error(suppressWarnings(MDRO(septic_patients, "fr", col_bactid = "mo", info = TRUE)))
  expect_error(suppressWarnings(MDRO(septic_patients, country = c("de", "nl"), info = TRUE)))
  expect_error(suppressWarnings(MDRO(septic_patients, col_mo = "invalid", info = TRUE)))
  outcome <- suppressWarnings(MDRO(septic_patients))
  outcome <- suppressWarnings(EUCAST_exceptional_phenotypes(septic_patients, info = TRUE))
  # check class
  expect_equal(outcome %>% class(), c('ordered', 'factor'))
--- a/tests/testthat/test-mo.R
+++ b/tests/testthat/test-mo.R
@@ -22,6 +22,11 @@ test_that("as.mo works", {
  expect_equal(as.character(as.mo("L. pneumophila")), "LEGPNE")
  expect_equal(as.character(as.mo("L. non pneumophila")), "LEGNON")
  expect_equal(as.character(as.mo("S. beta-haemolytic")), "STCHAE")
  expect_equal(as.character(as.mo("Strepto")), "STC") # not Streptobacillus
  expect_equal(as.character(as.mo("Streptococcus")), "STC") # not Peptostreptoccus
  expect_equal(as.character(as.mo(c("GAS", "GBS"))), c("STCGRA", "STCGRB"))
  expect_equal(as.character(as.mo("S. pyo")), "STCPYO") # not Actinomyces pyogenes
@@ -31,7 +36,7 @@ test_that("as.mo works", {
  expect_equal(as.character(as.mo("Gram negative rods")), "GNR")
  # GLIMS
-  expect_equal(as.character(as.mo("shiboy")), "SHIBOY")
+  expect_equal(as.character(as.mo("bctfgr")), "BACFRA")
  expect_equal(as.character(as.mo("MRSE")), "STAEPI")
  expect_equal(as.character(as.mo("VRE")), "ENC")