small improvement for is.rsi.eligible, more unit tests

2025-07-14 21:33:19 +02:00 · 2018-09-14 11:54:01 +02:00
parent cf5711fb0b
commit b0ca49d68d
6 changed files with 31 additions and 29 deletions
--- a/NEWS.md
+++ b/NEWS.md
@ -18,8 +18,8 @@
  thousands_of_E_colis <- rep("E. coli", 25000)
  microbenchmark::microbenchmark(as.mo(thousands_of_E_colis), unit = "s")
  # Unit: seconds
-  #          min      median         max neval
-  #  0.00861352  0.008774335  0.01952958   100
+  #         min       median         max  neval
+  #  0.00861352  0.008774335  0.01952958    100
  ```
 * Renamed all previous references to `bactid` to `mo`, like:
  * Column names inputs of `EUCAST_rules`, `first_isolate` and `key_antibiotics`
@ -55,7 +55,8 @@
 * For `first_isolate`, rows will be ignored when there's no species available
 * Function `ratio` is now deprecated and will be removed in a future release, as it is not really the scope of this package
 * Fix for `as.mic` for values ending in zeroes after a real number
-* Tremendous speed improvement for `as.bactid` (now `as.mo`)
+* Small fix where *B. fragilis* would not be found in the `microorganisms.umcg` data set
+* Fix for `is.rsi.eligible`, now ignores reading marks
 * Added parameters `minimum` and `as_percent` to `portion_df`
 * Support for quasiquotation in the functions series `count_*` and `portions_*`, and `n_rsi`. This allows to check for more than 2 vectors or columns.
  ```r
@ -77,12 +78,12 @@
  my_matrix = with(septic_patients, matrix(c(age, sex), ncol = 2))
  freq(my_matrix)
  ```
-  * Subsetting also possible for lists:
-    ```r
-    my_list = list(age = septic_patients$age, sex = septic_patients$sex)
-    my_list %>% freq(age)
-    my_list %>% freq(sex)
-    ```
+  For lists, subsetting is possible:
+  ```r
+  my_list = list(age = septic_patients$age, sex = septic_patients$sex)
+  my_list %>% freq(age)
+  my_list %>% freq(sex)
+  ```

 #### Other
 * More unit tests to ensure better integrity of functions
--- a/R/mo.R
+++ b/R/mo.R
@ -138,8 +138,6 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
  # replace space by regex sign
  x_withspaces <- gsub(" ", ".* ", x, fixed = TRUE)
  x <- gsub(" ", ".*", x, fixed = TRUE)
-  # for species
-  x_species <- paste(x, 'species')
  # add start en stop regex
  x <- paste0('^', x, '$')
  x_withspaces_all <- x_withspaces
@ -228,12 +226,14 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
      x[i] <- found[1L]
      next
    }
+
    # try the same, now based on genus + species ----
    found <- MOs[which(paste(MOs$genus, MOs$species) %like% x_withspaces[i]),]$mo
    if (length(found) > 0) {
      x[i] <- found[1L]
      next
    }
+
    # try any match keeping spaces, not ending with $ ----
    found <- MOs[which(MOs$fullname %like% x_withspaces_start[i]),]$mo
    if (length(found) > 0) {
@ -248,21 +248,6 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
      next
    }

-    # try exact match of only genus, with 'species' attached ----
-    # (this prevents Streptococcus from becoming Peptostreptococcus, since "p" < "s")
-    found <- MOs[which(MOs$fullname == x_species[i]),]$mo
-    if (length(found) > 0) {
-      x[i] <- found[1L]
-      next
-    }
-
-    # try any match of only genus, with 'species' attached ----
-    found <- MOs[which(MOs$fullname %like% x_species[i]),]$mo
-    if (length(found) > 0) {
-      x[i] <- found[1L]
-      next
-    }
-
    # try fullname without start and stop regex, to also find subspecies ----
    # like "K. pneu rhino" -> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH
    found <- MOs[which(gsub("[\\(\\)]", "", MOs$fullname) %like% x_withspaces_all[i]),]$mo
@ -322,6 +307,8 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
  x[x == "PASAER" & toupper(x_backup) != "PASAER" & !(x_backup %like% '^pas?' | x_backup %like% 'aero')] <- "PSEAER"
  # avoid detection of Legionella non pneumophila in case of Legionella pneumophila ----
  x[x == "LEGNON" & toupper(x_backup) != "LEGNON" & !x_backup %like% 'non'] <- "LEGPNE"
+  # avoid detection of Streptobacillus in case of Streptococcus ----
+  x[x == "STB" & toupper(x_backup) != "STB" & !x_backup %like% 'streptob'] <- "STC"

  failures <- failures[!failures %in% c(NA, NULL, NaN)]
  if (length(failures) > 0) {
--- a/R/rsi.R
+++ b/R/rsi.R
@ -99,10 +99,14 @@ is.rsi <- function(x) {
 #' @export
 #' @importFrom dplyr %>%
 is.rsi.eligible <- function(x) {
-  distinct_val <- x %>% unique() %>% sort() %>% as.character()
+  # remove all but a-z
+  distinct_val <- x %>% unique() %>% sort() %>% as.character() %>% gsub("(\\W|\\d)+", "", .)
+  # remove NAs and empty values
  distinct_val <- distinct_val[!is.na(distinct_val) & trimws(distinct_val) != ""]
+  # get RSI class
  distinct_val_rsi <- as.character(suppressWarnings(as.rsi(distinct_val)))

+  # is not empty and identical to new class
  length(distinct_val) > 0 &
    identical(distinct_val, distinct_val_rsi)
 }
--- a/data/microorganisms.umcg.rda
+++ b/data/microorganisms.umcg.rda
--- a/tests/testthat/test-mdro.R
+++ b/tests/testthat/test-mdro.R
@ -4,7 +4,12 @@ context("mdro.R")
 test_that("MDRO works", {
  library(dplyr)

-  outcome <- suppressWarnings(MDRO(septic_patients, "EUCAST", info = TRUE))
+  expect_error(suppressWarnings(MDRO(septic_patients, "invalid", col_bactid = "mo", info = TRUE)))
+  expect_error(suppressWarnings(MDRO(septic_patients, "fr", col_bactid = "mo", info = TRUE)))
+  expect_error(suppressWarnings(MDRO(septic_patients, country = c("de", "nl"), info = TRUE)))
+  expect_error(suppressWarnings(MDRO(septic_patients, col_mo = "invalid", info = TRUE)))
+
+  outcome <- suppressWarnings(MDRO(septic_patients))
  outcome <- suppressWarnings(EUCAST_exceptional_phenotypes(septic_patients, info = TRUE))
  # check class
  expect_equal(outcome %>% class(), c('ordered', 'factor'))
--- a/tests/testthat/test-mo.R
+++ b/tests/testthat/test-mo.R
@ -22,6 +22,11 @@ test_that("as.mo works", {
  expect_equal(as.character(as.mo("L. pneumophila")), "LEGPNE")
  expect_equal(as.character(as.mo("L. non pneumophila")), "LEGNON")
  expect_equal(as.character(as.mo("S. beta-haemolytic")), "STCHAE")
+  expect_equal(as.character(as.mo("Strepto")), "STC") # not Streptobacillus
+  expect_equal(as.character(as.mo("Streptococcus")), "STC") # not Peptostreptoccus
+
+  expect_equal(as.character(as.mo(c("GAS", "GBS"))), c("STCGRA", "STCGRB"))
+

  expect_equal(as.character(as.mo("S. pyo")), "STCPYO") # not Actinomyces pyogenes

@ -31,7 +36,7 @@ test_that("as.mo works", {
  expect_equal(as.character(as.mo("Gram negative rods")), "GNR")

  # GLIMS
-  expect_equal(as.character(as.mo("shiboy")), "SHIBOY")
+  expect_equal(as.character(as.mo("bctfgr")), "BACFRA")

  expect_equal(as.character(as.mo("MRSE")), "STAEPI")
  expect_equal(as.character(as.mo("VRE")), "ENC")