diff --git a/R/freq.R b/R/freq.R index d0f8c164..90ed50d5 100755 --- a/R/freq.R +++ b/R/freq.R @@ -62,7 +62,7 @@ #' @importFrom dplyr %>% select pull n_distinct group_by arrange desc mutate summarise n_distinct tibble #' @importFrom utils browseVignettes installed.packages #' @importFrom hms is.hms -#' @importFrom crayon red silver +#' @importFrom crayon red green silver #' @keywords summary summarise frequency freq #' @rdname freq #' @name freq @@ -254,20 +254,16 @@ frequency_tbl <- function(x, } } - NAs_to_red <- function(x) { - if (!x %in% c("0", "0.00%")) { - red(x) - } else { - x - } + na_txt <- paste0(NAs %>% length() %>% format(), ' = ', + (NAs %>% length() / (NAs %>% length() + x %>% length())) %>% percent(force_zero = TRUE, round = digits) %>% + sub('NaN', '0', ., fixed = TRUE)) + if (!na_txt %like% "^0 =") { + na_txt <- red(na_txt) + } else { + na_txt <- green(na_txt) } - header_txt <- header_txt %>% paste0(markdown_line, '\nLength: ', (NAs %>% length() + x %>% length()) %>% format(), - ' (of which NA: ', NAs %>% length() %>% format() %>% NAs_to_red(), - ' = ', (NAs %>% length() / (NAs %>% length() + x %>% length())) %>% - percent(force_zero = TRUE, round = digits) %>% - sub('NaN', '0', ., fixed = TRUE) %>% - NAs_to_red(), ')') + ' (of which NA: ', na_txt, ')') header_txt <- header_txt %>% paste0(markdown_line, '\nUnique: ', x %>% n_distinct() %>% format()) if (NROW(x) > 0 & any(class(x) == "character")) { @@ -304,13 +300,13 @@ frequency_tbl <- function(x, if (NROW(x) > 0 & any(class(x) == "rsi")) { header_txt <- header_txt %>% paste0('\n') cnt_S <- sum(x == "S", na.rm = TRUE) - cnt_I <- sum(x == "I", na.rm = TRUE) - cnt_R <- sum(x == "R", na.rm = TRUE) + cnt_IR <- sum(x %in% c("I", "R"), na.rm = TRUE) header_txt <- header_txt %>% paste(markdown_line, '\n%IR: ', - ((cnt_I + cnt_R) / sum(!is.na(x), na.rm = TRUE)) %>% percent(force_zero = TRUE, round = digits)) - header_txt <- header_txt %>% paste0(markdown_line, '\nRatio SIR: 1.0 : ', - (cnt_I / cnt_S) %>% format(digits = 1, nsmall = 1), " : ", - (cnt_R / cnt_S) %>% format(digits = 1, nsmall = 1)) + (cnt_IR / sum(!is.na(x), na.rm = TRUE)) %>% percent(force_zero = TRUE, round = digits), + paste0('(ratio S : IR = 1.0 : ', (cnt_IR / cnt_S) %>% format(digits = 1, nsmall = 1), ")")) + if (NROW(x) < 30) { + header_txt <- header_txt %>% paste(markdown_line, red('\nToo few isolates for reliable resistance interpretation.')) + } } formatdates <- "%e %B %Y" # = d mmmm yyyy @@ -535,7 +531,7 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default = if (opt$tbl_format == "pandoc") { title <- bold(title) } else if (opt$tbl_format == "markdown") { - title <- paste0("**", title, "**") + title <- paste0("\n**", title, "**") } if (opt$header == TRUE) { @@ -607,7 +603,7 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default = x$cum_percent <- percent(x$cum_percent, force_zero = TRUE) if (opt$tbl_format == "markdown") { - cat("\n\n") + cat("\n") } print( diff --git a/R/mo.R b/R/mo.R index c06f6f54..74089b36 100644 --- a/R/mo.R +++ b/R/mo.R @@ -320,6 +320,18 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = x[i] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L] next } + if (tolower(x[i]) %like% '^gram[ -]+nega.*' + | tolower(x_trimmed[i]) %like% '^gram[ -]+nega.*') { + # coerce S. coagulase positive + x[i] <- microorganismsDT[mo == 'B_GRAMN', ..property][[1]][1L] + next + } + if (tolower(x[i]) %like% '^gram[ -]+posi.*' + | tolower(x_trimmed[i]) %like% '^gram[ -]+posi.*') { + # coerce S. coagulase positive + x[i] <- microorganismsDT[mo == 'B_GRAMP', ..property][[1]][1L] + next + } } # FIRST TRY FULLNAMES AND CODES @@ -404,6 +416,22 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = next } + + # try splitting of characters in the middle and then find ID ---- + # only when text length is 6 or lower + # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus + if (nchar(x_trimmed[i]) <= 6) { + x_length <- nchar(x_trimmed[i]) + x[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2), + '.* ', + x_trimmed[i] %>% substr((x_length / 2) + 1, x_length)) + found <- microorganisms.prevDT[fullname %like% paste0('^', x[i]), ..property][[1]] + if (length(found) > 0) { + x[i] <- found[1L] + next + } + } + # try fullname without start and stop regex, to also find subspecies ---- # like "K. pneu rhino" -> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH found <- microorganisms.prevDT[fullname %like% x_withspaces_start[i], ..property][[1]] @@ -412,37 +440,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = next } - # try splitting of characters in the middle and then find ID ---- - # only when text length is 6 or lower - # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus - if (nchar(x_trimmed[i]) <= 6) { - x_split <- x - x_length <- nchar(x_trimmed[i]) - x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(), - '.* ', - x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws()) - found <- microorganisms.prevDT[fullname %like% paste0('^', x_split[i]), ..property][[1]] - if (length(found) > 0) { - x[i] <- found[1L] - next - } - } - - # try any match with text before and after original search string ---- - # so "negative rods" will be "GNR" - # if (x_trimmed[i] %like% "^Gram") { - # x_trimmed[i] <- gsub("^Gram", "", x_trimmed[i], ignore.case = TRUE) - # # remove leading and trailing spaces again - # x_trimmed[i] <- trimws(x_trimmed[i], which = "both") - # } - # if (!is.na(x_trimmed[i])) { - # found <- microorganisms.prevDT[fullname %like% x_trimmed[i], ..property][[1]] - # if (length(found) > 0) { - # x[i] <- found[1L] - # next - # } - # } - # THEN TRY ALL OTHERS ---- found <- microorganisms.unprevDT[tolower(fullname) == tolower(x_backup[i]), ..property][[1]] # most probable: is exact match in fullname @@ -490,6 +487,21 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = next } + # try splitting of characters in the middle and then find ID ---- + # only when text length is 6 or lower + # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus + if (nchar(x_trimmed[i]) <= 6) { + x_length <- nchar(x_trimmed[i]) + x[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2), + '.* ', + x_trimmed[i] %>% substr((x_length / 2) + 1, x_length)) + found <- microorganisms.unprevDT[fullname %like% paste0('^', x[i]), ..property][[1]] + if (length(found) > 0) { + x[i] <- found[1L] + next + } + } + # try fullname without start and stop regex, to also find subspecies ---- # like "K. pneu rhino" -> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH found <- microorganisms.unprevDT[fullname %like% x_withspaces_start[i], ..property][[1]] @@ -498,37 +510,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = next } - # try splitting of characters in the middle and then find ID ---- - # only when text length is 6 or lower - # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus - if (nchar(x_trimmed[i]) <= 6) { - x_split <- x - x_length <- nchar(x_trimmed[i]) - x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(), - '.* ', - x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws()) - found <- microorganisms.unprevDT[fullname %like% paste0('^', x_split[i]), ..property][[1]] - if (length(found) > 0) { - x[i] <- found[1L] - next - } - } - - # # try any match with text before and after original search string ---- - # # so "negative rods" will be "GNR" - # if (x_trimmed[i] %like% "^Gram") { - # x_trimmed[i] <- gsub("^Gram", "", x_trimmed[i], ignore.case = TRUE) - # # remove leading and trailing spaces again - # x_trimmed[i] <- trimws(x_trimmed[i], which = "both") - # } - # if (!is.na(x_trimmed[i])) { - # found <- microorganisms.unprevDT[fullname %like% x_trimmed[i], ..property][[1]] - # if (length(found) > 0) { - # x[i] <- found[1L] - # next - # } - # } - # MISCELLANEOUS ---- # look for old taxonomic names ---- diff --git a/README.md b/README.md index 7e441fb6..176e5da7 100755 --- a/README.md +++ b/README.md @@ -153,7 +153,7 @@ Percentage of syntax lines checked | [![Code_Coverage](https://codecov.io/gl/msb If so, try it with: ```r install.packages("devtools") -devtools::install_git("https://gitlab.com/msberends/AMR") +devtools::install_gitlab("msberends/AMR") ``` ## How to use it? diff --git a/tests/testthat/test-atc.R b/tests/testthat/test-atc.R index fe6f3de4..342aed7f 100755 --- a/tests/testthat/test-atc.R +++ b/tests/testthat/test-atc.R @@ -10,8 +10,8 @@ test_that("atc_property works", { expect_equal(atc_property("J01CA04", property = "DDD"), atc_ddd("J01CA04")) - # expect_identical(atc_property("J01CA04", property = "Groups"), - # atc_groups("J01CA04")) + expect_identical(atc_property("J01CA04", property = "Groups"), + atc_groups("J01CA04")) expect_warning(atc_property("ABCDEFG", property = "DDD")) diff --git a/tests/testthat/test-mo.R b/tests/testthat/test-mo.R index d12ee4e5..165c9182 100644 --- a/tests/testthat/test-mo.R +++ b/tests/testthat/test-mo.R @@ -22,14 +22,11 @@ test_that("as.mo works", { expect_equal(as.character(as.mo("Bartonella")), "B_BRTNL") expect_equal(as.character(as.mo("C. difficile")), "B_CTRDM_DIF") expect_equal(as.character(as.mo("L. pneumophila")), "B_LGNLL_PNE") - # expect_equal(as.character(as.mo("L. non pneumophila")), "LEGNON") - # expect_equal(as.character(as.mo("S. beta-haemolytic")), "STCHAE") expect_equal(as.character(as.mo("Strepto")), "B_STRPTC") expect_equal(as.character(as.mo("Streptococcus")), "B_STRPTC") # not Peptostreptoccus expect_equal(as.character(as.mo(c("GAS", "GBS"))), c("B_STRPTC_GRA", "B_STRPTC_GRB")) - expect_equal(as.character(as.mo("S. pyo")), "B_STRPTC_PYO") # not Actinomyces pyogenes expect_equal(as.character(as.mo("P. aer")), "B_PDMNS_AER") # not Pasteurella aerogenes @@ -50,6 +47,9 @@ test_that("as.mo works", { expect_equal(as.character(as.mo("CPS")), "B_STPHY_CPS") expect_equal(as.character(as.mo("CoPS")), "B_STPHY_CPS") + expect_equal(as.character(as.mo(c("Gram negative", "Gram positive"))), c("B_GRAMN", "B_GRAMP")) + + # prevalent MO expect_identical( as.character( as.mo(c("stau", @@ -61,6 +61,21 @@ test_that("as.mo works", { "MRSA", "VISA"))), rep("B_STPHY_AUR", 8)) + # unprevalent MO + expect_identical( + as.character( + as.mo(c("buno", + "BUNO", + "burnod", + "B. nodosa", + "B nodosa", + "Burkholderia nodosa"))), + rep("B_BRKHL_NOD", 6)) + + # empty values + expect_identical(as.character(as.mo(c("", NA, NaN))), rep(NA_character_, 3)) + # too few characters + expect_warning(as.mo("ab")) # check for Becker classification expect_identical(as.character(guess_mo("S. epidermidis", Becker = FALSE)), "B_STPHY_EPI") @@ -143,6 +158,7 @@ test_that("as.mo works", { # check less prevalent MOs expect_equal(as.character(as.mo("Gomphosphaeria aponina delicatula")), "B_GMPHS_APO_DEL") + expect_equal(as.character(as.mo("Gomphosphaeria apo del")), "B_GMPHS_APO_DEL") expect_equal(as.character(as.mo("G apo deli")), "B_GMPHS_APO_DEL") expect_equal(as.character(as.mo("Gomphosphaeria aponina")), "B_GMPHS_APO") expect_equal(as.character(as.mo("Gomphosphaeria species")), "B_GMPHS") @@ -152,6 +168,11 @@ test_that("as.mo works", { # check old names expect_equal(suppressMessages(as.character(as.mo("Escherichia blattae"))), "B_SHMWL_BLA") + # - Didymosphaeria spartinae (unprevalent) + expect_warning(suppressMessages(as.mo("D spartin", allow_uncertain = TRUE))) + # - was renames to Leptosphaeria obiones + expect_equal(suppressWarnings(suppressMessages(as.character(as.mo("D spartin", allow_uncertain = TRUE)))), + "F_LPTSP_OBI") # check uncertain names expect_equal(suppressWarnings(as.character(as.mo("esco extra_text", allow_uncertain = FALSE))), NA_character_)