diff --git a/.travis.yml b/.travis.yml index e9cab784..fec8dd4a 100755 --- a/.travis.yml +++ b/.travis.yml @@ -26,8 +26,12 @@ jobs: warnings_are_errors: false matrix: allow_failures: - - os: osx - # - r: devel + - r: 3.2 + os: osx + - r: 3.3 + os: osx + - r: 3.4 + os: osx r_packages: covr cache: packages cran: https://cran.rstudio.com diff --git a/NEWS.md b/NEWS.md index b0879032..16a66e62 100755 --- a/NEWS.md +++ b/NEWS.md @@ -4,7 +4,23 @@ * Functions `count_R`, `count_IR`, `count_I`, `count_SI` and `count_S` to selectively count resistant or susceptible isolates * Extra function `count_df` (which works like `portion_df`) to get all counts of S, I and R of a data set with antibiotic columns, with support for grouped variables * Function `is.rsi.eligible` to check for columns that have valid antimicrobial results, but do not have the `rsi` class yet. Transform the columns of your raw data with: `data %>% mutate_if(is.rsi.eligible, as.rsi)` -* Functions `as.mo` and `is.mo` as replacements for `as.bactid` and `is.bactid` (since the `microoganisms` data set not only contains bacteria). These last two functions are deprecated and will be removed in a future release. +* Functions `as.mo` and `is.mo` as replacements for `as.bactid` and `is.bactid` (since the `microoganisms` data set not only contains bacteria). These last two functions are deprecated and will be removed in a future release. The `as.mo` function determines microbial IDs using Artificial Intelligence (AI): + ```r + as.mo("E. coli") + # [1] ESCCOL + as.mo("MRSA") + # [1] STAAUR + as.mo("S group A") + # [1] STCGRA + ``` + And with great speed too - on a quite regular Linux server from 2007 it takes us 0.009 seconds to transform 25,000 items: + ```r + thousands_of_E_colis <- rep("E. coli", 25000) + microbenchmark::microbenchmark(as.mo(thousands_of_E_colis), unit = "s") + # Unit: seconds + # min median max neval + # 0.00861352 0.008774335 0.01952958 100 + ``` * Renamed all previous references to `bactid` to `mo`, like: * Column names inputs of `EUCAST_rules`, `first_isolate` and `key_antibiotics` * Column names of datasets `microorganisms` and `septic_patients` diff --git a/R/deprecated.R b/R/deprecated.R index ee0b250e..bdc940d3 100644 --- a/R/deprecated.R +++ b/R/deprecated.R @@ -42,6 +42,36 @@ guess_bactid <- function(...) { guess_mo(...) } +#' @exportMethod print.bactid +#' @export +#' @noRd +print.bactid <- function(x, ...) { + cat("Class 'bactid'\n") + print.default(as.character(x), quote = FALSE) +} + +#' @exportMethod as.data.frame.bactid +#' @export +#' @noRd +as.data.frame.bactid <- function (x, ...) { + # same as as.data.frame.character but with removed stringsAsFactors + nm <- paste(deparse(substitute(x), width.cutoff = 500L), + collapse = " ") + if (!"nm" %in% names(list(...))) { + as.data.frame.vector(x, ..., nm = nm) + } else { + as.data.frame.vector(x, ...) + } +} + +#' @exportMethod pull.bactid +#' @export +#' @importFrom dplyr pull +#' @noRd +pull.bactid <- function(.data, ...) { + pull(as.data.frame(.data), ...) +} + #' @rdname AMR-deprecated #' @export ratio <- function(x, ratio) { @@ -53,13 +83,13 @@ ratio <- function(x, ratio) { if (length(ratio) == 1) { if (ratio %like% '^([0-9]+([.][0-9]+)?[-,:])+[0-9]+([.][0-9]+)?$') { # support for "1:2:1", "1-2-1", "1,2,1" and even "1.75:2:1.5" - ratio <- ratio %>% base::strsplit("[-,:]") %>% base::unlist() %>% base::as.double() + ratio <- ratio %>% strsplit("[-,:]") %>% unlist() %>% as.double() } else { stop('Invalid `ratio`: ', ratio, '.') } } - if (length(x) != length(ratio)) { + if (length(x) != 1 & length(x) != length(ratio)) { stop('`x` and `ratio` must be of same size.') } - base::sum(x, na.rm = TRUE) * (ratio / base::sum(ratio, na.rm = TRUE)) + sum(x, na.rm = TRUE) * (ratio / sum(ratio, na.rm = TRUE)) } diff --git a/R/mo.R b/R/mo.R index ddf2a6dd..f4489d40 100644 --- a/R/mo.R +++ b/R/mo.R @@ -158,56 +158,23 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) { x[i] <- NA next } - if (x_backup[i] %in% AMR::microorganisms$mo) { + if (toupper(x_backup[i]) %in% AMR::microorganisms$mo) { # is already a valid MO code - x[i] <- x_backup[i] + x[i] <- toupper(x_backup[i]) next } - if (x_trimmed[i] %in% AMR::microorganisms$mo) { + if (toupper(x_trimmed[i]) %in% AMR::microorganisms$mo) { # is already a valid MO code - x[i] <- x_trimmed[i] + x[i] <- toupper(x_trimmed[i]) next } - if (x_backup[i] %in% AMR::microorganisms$fullname) { + if (tolower(x_backup[i]) %in% tolower(AMR::microorganisms$fullname)) { # is exact match in fullname - x[i] <- AMR::microorganisms[which(AMR::microorganisms$fullname == x_backup[i]), ]$mo[1] + x[i] <- AMR::microorganisms[which(AMR::microorganisms$fullname == x_backup[i]), ]$mo[1L] next } - if (tolower(x[i]) == '^e.*coli$') { - # avoid detection of Entamoeba coli in case of E. coli - x[i] <- 'ESCCOL' - next - } - if (tolower(x[i]) == '^h.*influenzae$') { - # avoid detection of Haematobacter influenzae in case of H. influenzae - x[i] <- 'HAEINF' - next - } - if (tolower(x[i]) == '^c.*difficile$') { - # avoid detection of Catabacter difficile in case of C. difficile - x[i] <- 'CLODIF' - next - } - if (tolower(x[i]) == '^st.*au$' - | tolower(x[i]) == '^stau$' - | tolower(x[i]) == '^staaur$') { - # avoid detection of Staphylococcus auricularis in case of S. aureus - x[i] <- 'STAAUR' - next - } - if (tolower(x[i]) == '^p.*aer$') { - # avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa - x[i] <- 'PSEAER' - next - } - if (x_backup[i] %like% '^l.*pneum.*' & !x_backup[i] %like% '^l.*non.*pneum.*') { - # avoid detection of Legionella non pneumophila in case of Legionella pneumophila - x[i] <- 'LEGPNE' - next - } - - # CoNS and CoPS in different languages (support for German, Dutch, Spanish, Portuguese) + # CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese) ---- if (tolower(x[i]) %like% '[ck]oagulas[ea] negatie?[vf]' | tolower(x_trimmed[i]) %like% '[ck]oagulas[ea] negatie?[vf]' | tolower(x[i]) %like% '[ck]o?ns[^a-z]?$') { @@ -223,7 +190,7 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) { next } - # translate known trivial abbreviations to genus+species + # translate known trivial abbreviations to genus + species ---- if (!is.na(x_trimmed[i])) { if (toupper(x_trimmed[i]) == 'MRSA' | toupper(x_trimmed[i]) == 'VISA' @@ -255,33 +222,33 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) { } } - # try any match keeping spaces + # try any match keeping spaces ---- found <- MOs[which(MOs$fullname %like% x_withspaces[i]),]$mo if (length(found) > 0) { x[i] <- found[1L] next } - # try the same, now based on genus + species + # try the same, now based on genus + species ---- found <- MOs[which(paste(MOs$genus, MOs$species) %like% x_withspaces[i]),]$mo if (length(found) > 0) { x[i] <- found[1L] next } - # try any match keeping spaces, not ending with $ + # try any match keeping spaces, not ending with $ ---- found <- MOs[which(MOs$fullname %like% x_withspaces_start[i]),]$mo if (length(found) > 0) { x[i] <- found[1L] next } - # try any match diregarding spaces + # try any match diregarding spaces ---- found <- MOs[which(MOs$fullname %like% x[i]),]$mo if (length(found) > 0) { x[i] <- found[1L] next } - # try exact match of only genus, with 'species' attached + # try exact match of only genus, with 'species' attached ---- # (this prevents Streptococcus from becoming Peptostreptococcus, since "p" < "s") found <- MOs[which(MOs$fullname == x_species[i]),]$mo if (length(found) > 0) { @@ -289,28 +256,29 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) { next } - # try any match of only genus, with 'species' attached + # try any match of only genus, with 'species' attached ---- found <- MOs[which(MOs$fullname %like% x_species[i]),]$mo if (length(found) > 0) { x[i] <- found[1L] next } - # try fullname without start and stop regex, to also find subspecies, like "K. pneu rhino" + # try fullname without start and stop regex, to also find subspecies ---- + # like "K. pneu rhino" -> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH found <- MOs[which(gsub("[\\(\\)]", "", MOs$fullname) %like% x_withspaces_all[i]),]$mo if (length(found) > 0) { x[i] <- found[1L] next } - # search for GLIMS code + # search for GLIMS code ---- found <- AMR::microorganisms.umcg[which(toupper(AMR::microorganisms.umcg$umcg) == toupper(x_trimmed[i])),]$mo if (length(found) > 0) { x[i] <- found[1L] next } - # try splitting of characters and then find ID + # try splitting of characters and then find ID ---- # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus x_split <- x x_length <- nchar(x_trimmed[i]) @@ -323,7 +291,7 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) { next } - # try any match with text before and after original search string + # try any match with text before and after original search string ---- # so "negative rods" will be "GNR" if (x_trimmed[i] %like% "^Gram") { x_trimmed[i] <- gsub("^Gram", "", x_trimmed[i], ignore.case = TRUE) @@ -338,12 +306,23 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) { } } - # not found + # not found ---- x[i] <- NA_character_ failures <- c(failures, x_backup[i]) } + # avoid detection of Staphylococcus auricularis in case of S. aureus ---- + x[x == "STAAUC" & toupper(x_backup) != "STAAUC" & !x_backup %like% 'auri'] <- "STAAUR" + # avoid detection of Entamoeba coli in case of E. coli ---- + x[x == "ENMCOL" & toupper(x_backup) != "ENMCOL" & !x_backup %like% '^ent?'] <- "ESCCOL" + # avoid detection of Haematobacter influenzae in case of H. influenzae ---- + x[x == "HABINF" & toupper(x_backup) != "HABINF" & !x_backup %like% '^haema'] <- "HAEINF" + # avoid detection of Pasteurella aerogenes in case of P. aeruginosa ---- + x[x == "PASAER" & toupper(x_backup) != "PASAER" & !(x_backup %like% '^pas?' | x_backup %like% 'aero')] <- "PSEAER" + # avoid detection of Legionella non pneumophila in case of Legionella pneumophila ---- + x[x == "LEGNON" & toupper(x_backup) != "LEGNON" & !x_backup %like% 'non'] <- "LEGPNE" + failures <- failures[!failures %in% c(NA, NULL, NaN)] if (length(failures) > 0) { warning("These ", length(failures) , " values could not be coerced to a valid mo: ", @@ -352,6 +331,7 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) { call. = FALSE) } + # Becker ---- if (Becker == TRUE | Becker == "all") { # See Source. It's this figure: # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4187637/figure/F3/ @@ -384,6 +364,7 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) { } } + # Lancefield ---- if (Lancefield == TRUE | Lancefield == "all") { # group A x[x == "STCPYO"] <- "STCGRA" # S. pyogenes @@ -406,7 +387,8 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) { x[x == "STCSAL"] <- "STCGRK" # S. salivarius } - # for the returned genera without species (like "ESC"), add species (like "ESCSPP") where the input contained it + # for the returned genera without species, add species ---- + # like "ESC" -> "ESCSPP", but only where the input contained it indices <- unique(x_input) %like% "[A-Z]{3}SPP" & !x %like% "[A-Z]{3}SPP" x[indices] <- paste0(x[indices], 'SPP') @@ -468,33 +450,3 @@ as.data.frame.mo <- function (x, ...) { pull.mo <- function(.data, ...) { pull(as.data.frame(.data), ...) } - -#' @exportMethod print.bactid -#' @export -#' @noRd -print.bactid <- function(x, ...) { - cat("Class 'bactid'\n") - print.default(as.character(x), quote = FALSE) -} - -#' @exportMethod as.data.frame.bactid -#' @export -#' @noRd -as.data.frame.bactid <- function (x, ...) { - # same as as.data.frame.character but with removed stringsAsFactors - nm <- paste(deparse(substitute(x), width.cutoff = 500L), - collapse = " ") - if (!"nm" %in% names(list(...))) { - as.data.frame.vector(x, ..., nm = nm) - } else { - as.data.frame.vector(x, ...) - } -} - -#' @exportMethod pull.bactid -#' @export -#' @importFrom dplyr pull -#' @noRd -pull.bactid <- function(.data, ...) { - pull(as.data.frame(.data), ...) -}