more unit tests

2026-03-07 12:03:10 +01:00 · 2018-11-02 10:27:57 +01:00
parent e601fd259a
commit 40a159e78d
5 changed files with 87 additions and 89 deletions
--- a/R/freq.R
+++ b/R/freq.R
@@ -62,7 +62,7 @@
 #' @importFrom dplyr %>% select pull n_distinct group_by arrange desc mutate summarise n_distinct tibble
 #' @importFrom utils browseVignettes installed.packages
 #' @importFrom hms is.hms
-#' @importFrom crayon red silver
+#' @importFrom crayon red green silver
 #' @keywords summary summarise frequency freq
 #' @rdname freq
 #' @name freq
@@ -254,20 +254,16 @@ frequency_tbl <- function(x,
    }
  }

-  NAs_to_red <- function(x) {
-    if (!x %in% c("0", "0.00%")) {
-      red(x)
-    } else {
-      x
-    }
+  na_txt <- paste0(NAs %>% length() %>% format(), ' = ',
+                   (NAs %>% length() / (NAs %>% length() + x %>% length())) %>% percent(force_zero = TRUE, round = digits) %>%
+                     sub('NaN', '0', ., fixed = TRUE))
+  if (!na_txt %like% "^0 =") {
+    na_txt <- red(na_txt)
+  } else {
+    na_txt <- green(na_txt)
  }
-
  header_txt <- header_txt %>% paste0(markdown_line, '\nLength:    ', (NAs %>% length() + x %>% length()) %>% format(),
-                                      ' (of which NA: ', NAs %>% length() %>% format() %>% NAs_to_red(),
-                                      ' = ', (NAs %>% length() / (NAs %>% length() + x %>% length())) %>%
-                                        percent(force_zero = TRUE, round = digits) %>%
-                                        sub('NaN', '0', ., fixed = TRUE) %>%
-                                        NAs_to_red(), ')')
+                                      ' (of which NA: ', na_txt, ')')
  header_txt <- header_txt %>% paste0(markdown_line, '\nUnique:    ', x %>% n_distinct() %>% format())

  if (NROW(x) > 0 & any(class(x) == "character")) {
@@ -304,13 +300,13 @@ frequency_tbl <- function(x,
  if (NROW(x) > 0 & any(class(x) == "rsi")) {
    header_txt <- header_txt %>% paste0('\n')
    cnt_S <- sum(x == "S", na.rm = TRUE)
-    cnt_I <- sum(x == "I", na.rm = TRUE)
-    cnt_R <- sum(x == "R", na.rm = TRUE)
+    cnt_IR <- sum(x %in% c("I", "R"), na.rm = TRUE)
    header_txt <- header_txt %>% paste(markdown_line, '\n%IR:      ',
-                                       ((cnt_I + cnt_R) / sum(!is.na(x), na.rm = TRUE)) %>% percent(force_zero = TRUE, round = digits))
-    header_txt <- header_txt %>% paste0(markdown_line, '\nRatio SIR: 1.0 : ',
-                                        (cnt_I / cnt_S) %>% format(digits = 1, nsmall = 1), " : ",
-                                        (cnt_R / cnt_S) %>% format(digits = 1, nsmall = 1))
+                                       (cnt_IR / sum(!is.na(x), na.rm = TRUE)) %>% percent(force_zero = TRUE, round = digits),
+                                       paste0('(ratio S : IR = 1.0 : ', (cnt_IR / cnt_S) %>% format(digits = 1, nsmall = 1), ")"))
+    if (NROW(x) < 30) {
+      header_txt <- header_txt %>% paste(markdown_line, red('\nToo few isolates for reliable resistance interpretation.'))
+    }
  }

  formatdates <- "%e %B %Y" # = d mmmm yyyy
@@ -535,7 +531,7 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
  if (opt$tbl_format == "pandoc") {
    title <- bold(title)
  } else if (opt$tbl_format == "markdown") {
-    title <- paste0("**", title, "**")
+    title <- paste0("\n**", title, "**")
  }

  if (opt$header == TRUE) {
@@ -607,7 +603,7 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
  x$cum_percent <- percent(x$cum_percent, force_zero = TRUE)

  if (opt$tbl_format == "markdown") {
-    cat("\n\n")
+    cat("\n")
  }

  print(
--- a/R/mo.R
+++ b/R/mo.R
@@ -320,6 +320,18 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
          x[i] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L]
          next
        }
+        if (tolower(x[i]) %like% '^gram[ -]+nega.*'
+            | tolower(x_trimmed[i]) %like% '^gram[ -]+nega.*') {
+          # coerce S. coagulase positive
+          x[i] <- microorganismsDT[mo == 'B_GRAMN', ..property][[1]][1L]
+          next
+        }
+        if (tolower(x[i]) %like% '^gram[ -]+posi.*'
+            | tolower(x_trimmed[i]) %like% '^gram[ -]+posi.*') {
+          # coerce S. coagulase positive
+          x[i] <- microorganismsDT[mo == 'B_GRAMP', ..property][[1]][1L]
+          next
+        }
      }

      # FIRST TRY FULLNAMES AND CODES
@@ -404,6 +416,22 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
        next
      }

+
+      # try splitting of characters in the middle and then find ID ----
+      # only when text length is 6 or lower
+      # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus
+      if (nchar(x_trimmed[i]) <= 6) {
+        x_length <- nchar(x_trimmed[i])
+        x[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2),
+                       '.* ',
+                       x_trimmed[i] %>% substr((x_length / 2) + 1, x_length))
+        found <- microorganisms.prevDT[fullname %like% paste0('^', x[i]), ..property][[1]]
+        if (length(found) > 0) {
+          x[i] <- found[1L]
+          next
+        }
+      }
+
      # try fullname without start and stop regex, to also find subspecies ----
      # like "K. pneu rhino" -> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH
      found <- microorganisms.prevDT[fullname %like% x_withspaces_start[i], ..property][[1]]
@@ -412,37 +440,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
        next
      }

-      # try splitting of characters in the middle and then find ID ----
-      # only when text length is 6 or lower
-      # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus
-      if (nchar(x_trimmed[i]) <= 6) {
-        x_split <- x
-        x_length <- nchar(x_trimmed[i])
-        x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(),
-                             '.* ',
-                             x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
-        found <- microorganisms.prevDT[fullname %like% paste0('^', x_split[i]), ..property][[1]]
-        if (length(found) > 0) {
-          x[i] <- found[1L]
-          next
-        }
-      }
-
-      # try any match with text before and after original search string ----
-      # so "negative rods" will be "GNR"
-      # if (x_trimmed[i] %like% "^Gram") {
-      #   x_trimmed[i] <- gsub("^Gram", "", x_trimmed[i], ignore.case = TRUE)
-      #   # remove leading and trailing spaces again
-      #   x_trimmed[i] <- trimws(x_trimmed[i], which = "both")
-      # }
-      # if (!is.na(x_trimmed[i])) {
-      #   found <- microorganisms.prevDT[fullname %like% x_trimmed[i], ..property][[1]]
-      #   if (length(found) > 0) {
-      #     x[i] <- found[1L]
-      #     next
-      #   }
-      # }
-
      # THEN TRY ALL OTHERS ----
      found <- microorganisms.unprevDT[tolower(fullname) == tolower(x_backup[i]), ..property][[1]]
      # most probable: is exact match in fullname
@@ -490,6 +487,21 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
        next
      }

+      # try splitting of characters in the middle and then find ID ----
+      # only when text length is 6 or lower
+      # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus
+      if (nchar(x_trimmed[i]) <= 6) {
+        x_length <- nchar(x_trimmed[i])
+        x[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2),
+                       '.* ',
+                       x_trimmed[i] %>% substr((x_length / 2) + 1, x_length))
+        found <- microorganisms.unprevDT[fullname %like% paste0('^', x[i]), ..property][[1]]
+        if (length(found) > 0) {
+          x[i] <- found[1L]
+          next
+        }
+      }
+
      # try fullname without start and stop regex, to also find subspecies ----
      # like "K. pneu rhino" -> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH
      found <- microorganisms.unprevDT[fullname %like% x_withspaces_start[i], ..property][[1]]
@@ -498,37 +510,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
        next
      }

-      # try splitting of characters in the middle and then find ID ----
-      # only when text length is 6 or lower
-      # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus
-      if (nchar(x_trimmed[i]) <= 6) {
-        x_split <- x
-        x_length <- nchar(x_trimmed[i])
-        x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(),
-                             '.* ',
-                             x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
-        found <- microorganisms.unprevDT[fullname %like% paste0('^', x_split[i]), ..property][[1]]
-        if (length(found) > 0) {
-          x[i] <- found[1L]
-          next
-        }
-      }
-
-      # # try any match with text before and after original search string ----
-      # # so "negative rods" will be "GNR"
-      # if (x_trimmed[i] %like% "^Gram") {
-      #   x_trimmed[i] <- gsub("^Gram", "", x_trimmed[i], ignore.case = TRUE)
-      #   # remove leading and trailing spaces again
-      #   x_trimmed[i] <- trimws(x_trimmed[i], which = "both")
-      # }
-      # if (!is.na(x_trimmed[i])) {
-      #   found <- microorganisms.unprevDT[fullname %like% x_trimmed[i], ..property][[1]]
-      #   if (length(found) > 0) {
-      #     x[i] <- found[1L]
-      #     next
-      #   }
-      # }
-
      # MISCELLANEOUS ----

      # look for old taxonomic names ----
--- a/README.md
+++ b/README.md
@@ -153,7 +153,7 @@ Percentage of syntax lines checked | [![Code_Coverage](https://codecov.io/gl/msb
 If so, try it with:
 ```r
 install.packages("devtools") 
-devtools::install_git("https://gitlab.com/msberends/AMR")
+devtools::install_gitlab("msberends/AMR")
 ```

 ## How to use it?
--- a/tests/testthat/test-atc.R
+++ b/tests/testthat/test-atc.R
@@ -10,8 +10,8 @@ test_that("atc_property works", {
    expect_equal(atc_property("J01CA04", property = "DDD"),
                 atc_ddd("J01CA04"))

-    # expect_identical(atc_property("J01CA04", property = "Groups"),
-    #                  atc_groups("J01CA04"))
+    expect_identical(atc_property("J01CA04", property = "Groups"),
+                     atc_groups("J01CA04"))

    expect_warning(atc_property("ABCDEFG", property = "DDD"))

--- a/tests/testthat/test-mo.R
+++ b/tests/testthat/test-mo.R
@@ -22,14 +22,11 @@ test_that("as.mo works", {
  expect_equal(as.character(as.mo("Bartonella")), "B_BRTNL")
  expect_equal(as.character(as.mo("C. difficile")), "B_CTRDM_DIF")
  expect_equal(as.character(as.mo("L. pneumophila")), "B_LGNLL_PNE")
-  # expect_equal(as.character(as.mo("L. non pneumophila")), "LEGNON")
-  # expect_equal(as.character(as.mo("S. beta-haemolytic")), "STCHAE")
  expect_equal(as.character(as.mo("Strepto")), "B_STRPTC")
  expect_equal(as.character(as.mo("Streptococcus")), "B_STRPTC") # not Peptostreptoccus

  expect_equal(as.character(as.mo(c("GAS", "GBS"))), c("B_STRPTC_GRA", "B_STRPTC_GRB"))

-
  expect_equal(as.character(as.mo("S. pyo")), "B_STRPTC_PYO") # not Actinomyces pyogenes

  expect_equal(as.character(as.mo("P. aer")), "B_PDMNS_AER") # not Pasteurella aerogenes
@@ -50,6 +47,9 @@ test_that("as.mo works", {
  expect_equal(as.character(as.mo("CPS")), "B_STPHY_CPS")
  expect_equal(as.character(as.mo("CoPS")), "B_STPHY_CPS")

+  expect_equal(as.character(as.mo(c("Gram negative", "Gram positive"))), c("B_GRAMN", "B_GRAMP"))
+
+  # prevalent MO
  expect_identical(
    as.character(
      as.mo(c("stau",
@@ -61,6 +61,21 @@ test_that("as.mo works", {
              "MRSA",
              "VISA"))),
    rep("B_STPHY_AUR", 8))
+  # unprevalent MO
+  expect_identical(
+    as.character(
+      as.mo(c("buno",
+              "BUNO",
+              "burnod",
+              "B. nodosa",
+              "B nodosa",
+              "Burkholderia nodosa"))),
+    rep("B_BRKHL_NOD", 6))
+
+  # empty values
+  expect_identical(as.character(as.mo(c("", NA, NaN))), rep(NA_character_, 3))
+  # too few characters
+  expect_warning(as.mo("ab"))

  # check for Becker classification
  expect_identical(as.character(guess_mo("S. epidermidis", Becker = FALSE)), "B_STPHY_EPI")
@@ -143,6 +158,7 @@ test_that("as.mo works", {

  # check less prevalent MOs
  expect_equal(as.character(as.mo("Gomphosphaeria aponina delicatula")), "B_GMPHS_APO_DEL")
+  expect_equal(as.character(as.mo("Gomphosphaeria apo del")), "B_GMPHS_APO_DEL")
  expect_equal(as.character(as.mo("G apo deli")), "B_GMPHS_APO_DEL")
  expect_equal(as.character(as.mo("Gomphosphaeria  aponina")), "B_GMPHS_APO")
  expect_equal(as.character(as.mo("Gomphosphaeria  species")), "B_GMPHS")
@@ -152,6 +168,11 @@ test_that("as.mo works", {

  # check old names
  expect_equal(suppressMessages(as.character(as.mo("Escherichia blattae"))), "B_SHMWL_BLA")
+  # - Didymosphaeria spartinae (unprevalent)
+  expect_warning(suppressMessages(as.mo("D spartin", allow_uncertain = TRUE)))
+  # - was renames to Leptosphaeria obiones
+  expect_equal(suppressWarnings(suppressMessages(as.character(as.mo("D spartin", allow_uncertain = TRUE)))),
+                                "F_LPTSP_OBI")

  # check uncertain names
  expect_equal(suppressWarnings(as.character(as.mo("esco extra_text", allow_uncertain = FALSE))), NA_character_)