diff --git a/DESCRIPTION b/DESCRIPTION index 5f5043e0..c9648b26 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR Version: 0.5.0.9009 -Date: 2019-01-08 +Date: 2019-01-11 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/NAMESPACE b/NAMESPACE index aee04dee..3ce45c98 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -76,7 +76,7 @@ export(g.test) export(geom_rsi) export(get_locale) export(ggplot_rsi) -export(guess_ab) +export(guess_ab_col) export(guess_atc) export(guess_mo) export(header) diff --git a/NEWS.md b/NEWS.md index 254bcc06..ab842da3 100755 --- a/NEWS.md +++ b/NEWS.md @@ -7,7 +7,7 @@ * Contains the complete manual of this package and all of its functions with an explanation of their parameters * Contains a comprehensive tutorial about how to conduct antimicrobial resistance analysis * Support for [`dplyr`](https://dplyr.tidyverse.org) version 0.8.0 -* Function `guess_ab` to find an antibiotic column in a table +* Function `guess_ab_col` to find an antibiotic column in a table * Function `mo_failures()` to review values that could not be coerced to a valid MO code, using `as.mo()`. This latter function will now only show a maximum of 25 uncoerced values. * Function `mo_renamed()` to get a list of all returned values from `as.mo()` that have had taxonomic renaming * Function `age()` to calculate the (patients) age in years diff --git a/R/age.R b/R/age.R index 881d5fb0..399e7ea0 100644 --- a/R/age.R +++ b/R/age.R @@ -48,7 +48,7 @@ age <- function(x, reference = Sys.Date()) { as.integer(years_gap - 1), as.integer(years_gap)) if (any(ages > 120)) { - warning("Some ages are >120.") + warning("Some ages are > 120.") } ages } diff --git a/R/eucast_rules.R b/R/eucast_rules.R index 9cc57146..6ae03d7d 100755 --- a/R/eucast_rules.R +++ b/R/eucast_rules.R @@ -30,7 +30,7 @@ #' @param ... parameters that are passed on to \code{eucast_rules} #' @inheritParams first_isolate #' @section Antibiotics: -#' To define antibiotics column names, leave as it is to determine it automatically with \code{\link{guess_ab}} or input a text (case-insensitive) or use \code{NULL} to skip a column (e.g. \code{tica = NULL}). Non-existing columns will anyway be skipped with a warning. +#' To define antibiotics column names, leave as it is to determine it automatically with \code{\link{guess_ab_col}} or input a text (case-insensitive) or use \code{NULL} to skip a column (e.g. \code{tica = NULL}). Non-existing columns will anyway be skipped with a warning. #' #' Abbrevations of the column containing antibiotics in the form: \strong{abbreviation}: generic name (\emph{ATC code}) #' @@ -158,69 +158,69 @@ eucast_rules <- function(tbl, info = TRUE, rules = c("breakpoints", "expert", "other", "all"), verbose = FALSE, - amcl = guess_ab(), - amik = guess_ab(), - amox = guess_ab(), - ampi = guess_ab(), - azit = guess_ab(), - azlo = guess_ab(), - aztr = guess_ab(), - cefa = guess_ab(), - cfep = guess_ab(), - cfot = guess_ab(), - cfox = guess_ab(), - cfra = guess_ab(), - cfta = guess_ab(), - cftr = guess_ab(), - cfur = guess_ab(), - chlo = guess_ab(), - cipr = guess_ab(), - clar = guess_ab(), - clin = guess_ab(), - clox = guess_ab(), - coli = guess_ab(), - czol = guess_ab(), - dapt = guess_ab(), - doxy = guess_ab(), - erta = guess_ab(), - eryt = guess_ab(), - fosf = guess_ab(), - fusi = guess_ab(), - gent = guess_ab(), - imip = guess_ab(), - kana = guess_ab(), - levo = guess_ab(), - linc = guess_ab(), - line = guess_ab(), - mero = guess_ab(), - mezl = guess_ab(), - mino = guess_ab(), - moxi = guess_ab(), - nali = guess_ab(), - neom = guess_ab(), - neti = guess_ab(), - nitr = guess_ab(), - norf = guess_ab(), - novo = guess_ab(), - oflo = guess_ab(), - oxac = guess_ab(), - peni = guess_ab(), - pipe = guess_ab(), - pita = guess_ab(), - poly = guess_ab(), - pris = guess_ab(), - qida = guess_ab(), - rifa = guess_ab(), - roxi = guess_ab(), - siso = guess_ab(), - teic = guess_ab(), - tetr = guess_ab(), - tica = guess_ab(), - tige = guess_ab(), - tobr = guess_ab(), - trim = guess_ab(), - trsu = guess_ab(), - vanc = guess_ab()) { + amcl = guess_ab_col(), + amik = guess_ab_col(), + amox = guess_ab_col(), + ampi = guess_ab_col(), + azit = guess_ab_col(), + azlo = guess_ab_col(), + aztr = guess_ab_col(), + cefa = guess_ab_col(), + cfep = guess_ab_col(), + cfot = guess_ab_col(), + cfox = guess_ab_col(), + cfra = guess_ab_col(), + cfta = guess_ab_col(), + cftr = guess_ab_col(), + cfur = guess_ab_col(), + chlo = guess_ab_col(), + cipr = guess_ab_col(), + clar = guess_ab_col(), + clin = guess_ab_col(), + clox = guess_ab_col(), + coli = guess_ab_col(), + czol = guess_ab_col(), + dapt = guess_ab_col(), + doxy = guess_ab_col(), + erta = guess_ab_col(), + eryt = guess_ab_col(), + fosf = guess_ab_col(), + fusi = guess_ab_col(), + gent = guess_ab_col(), + imip = guess_ab_col(), + kana = guess_ab_col(), + levo = guess_ab_col(), + linc = guess_ab_col(), + line = guess_ab_col(), + mero = guess_ab_col(), + mezl = guess_ab_col(), + mino = guess_ab_col(), + moxi = guess_ab_col(), + nali = guess_ab_col(), + neom = guess_ab_col(), + neti = guess_ab_col(), + nitr = guess_ab_col(), + norf = guess_ab_col(), + novo = guess_ab_col(), + oflo = guess_ab_col(), + oxac = guess_ab_col(), + peni = guess_ab_col(), + pipe = guess_ab_col(), + pita = guess_ab_col(), + poly = guess_ab_col(), + pris = guess_ab_col(), + qida = guess_ab_col(), + rifa = guess_ab_col(), + roxi = guess_ab_col(), + siso = guess_ab_col(), + teic = guess_ab_col(), + tetr = guess_ab_col(), + tica = guess_ab_col(), + tige = guess_ab_col(), + tobr = guess_ab_col(), + trim = guess_ab_col(), + trsu = guess_ab_col(), + vanc = guess_ab_col()) { EUCAST_VERSION_BREAKPOINTS <- "8.1, 2018" EUCAST_VERSION_EXPERT_RULES <- "3.1, 2016" @@ -268,69 +268,69 @@ eucast_rules <- function(tbl, } # check columns - if (identical(amcl, as.name("guess_ab"))) { amcl <- guess_ab(tbl, "amcl", verbose = verbose) } - if (identical(amik, as.name("guess_ab"))) { amik <- guess_ab(tbl, "amik", verbose = verbose) } - if (identical(amox, as.name("guess_ab"))) { amox <- guess_ab(tbl, "amox", verbose = verbose) } - if (identical(ampi, as.name("guess_ab"))) { ampi <- guess_ab(tbl, "ampi", verbose = verbose) } - if (identical(azit, as.name("guess_ab"))) { azit <- guess_ab(tbl, "azit", verbose = verbose) } - if (identical(azlo, as.name("guess_ab"))) { azlo <- guess_ab(tbl, "azlo", verbose = verbose) } - if (identical(aztr, as.name("guess_ab"))) { aztr <- guess_ab(tbl, "aztr", verbose = verbose) } - if (identical(cefa, as.name("guess_ab"))) { cefa <- guess_ab(tbl, "cefa", verbose = verbose) } - if (identical(cfep, as.name("guess_ab"))) { cfep <- guess_ab(tbl, "cfep", verbose = verbose) } - if (identical(cfot, as.name("guess_ab"))) { cfot <- guess_ab(tbl, "cfot", verbose = verbose) } - if (identical(cfox, as.name("guess_ab"))) { cfox <- guess_ab(tbl, "cfox", verbose = verbose) } - if (identical(cfra, as.name("guess_ab"))) { cfra <- guess_ab(tbl, "cfra", verbose = verbose) } - if (identical(cfta, as.name("guess_ab"))) { cfta <- guess_ab(tbl, "cfta", verbose = verbose) } - if (identical(cftr, as.name("guess_ab"))) { cftr <- guess_ab(tbl, "cftr", verbose = verbose) } - if (identical(cfur, as.name("guess_ab"))) { cfur <- guess_ab(tbl, "cfur", verbose = verbose) } - if (identical(chlo, as.name("guess_ab"))) { chlo <- guess_ab(tbl, "chlo", verbose = verbose) } - if (identical(cipr, as.name("guess_ab"))) { cipr <- guess_ab(tbl, "cipr", verbose = verbose) } - if (identical(clar, as.name("guess_ab"))) { clar <- guess_ab(tbl, "clar", verbose = verbose) } - if (identical(clin, as.name("guess_ab"))) { clin <- guess_ab(tbl, "clin", verbose = verbose) } - if (identical(clox, as.name("guess_ab"))) { clox <- guess_ab(tbl, "clox", verbose = verbose) } - if (identical(coli, as.name("guess_ab"))) { coli <- guess_ab(tbl, "coli", verbose = verbose) } - if (identical(czol, as.name("guess_ab"))) { czol <- guess_ab(tbl, "czol", verbose = verbose) } - if (identical(dapt, as.name("guess_ab"))) { dapt <- guess_ab(tbl, "dapt", verbose = verbose) } - if (identical(doxy, as.name("guess_ab"))) { doxy <- guess_ab(tbl, "doxy", verbose = verbose) } - if (identical(erta, as.name("guess_ab"))) { erta <- guess_ab(tbl, "erta", verbose = verbose) } - if (identical(eryt, as.name("guess_ab"))) { eryt <- guess_ab(tbl, "eryt", verbose = verbose) } - if (identical(fosf, as.name("guess_ab"))) { fosf <- guess_ab(tbl, "fosf", verbose = verbose) } - if (identical(fusi, as.name("guess_ab"))) { fusi <- guess_ab(tbl, "fusi", verbose = verbose) } - if (identical(gent, as.name("guess_ab"))) { gent <- guess_ab(tbl, "gent", verbose = verbose) } - if (identical(imip, as.name("guess_ab"))) { imip <- guess_ab(tbl, "imip", verbose = verbose) } - if (identical(kana, as.name("guess_ab"))) { kana <- guess_ab(tbl, "kana", verbose = verbose) } - if (identical(levo, as.name("guess_ab"))) { levo <- guess_ab(tbl, "levo", verbose = verbose) } - if (identical(linc, as.name("guess_ab"))) { linc <- guess_ab(tbl, "linc", verbose = verbose) } - if (identical(line, as.name("guess_ab"))) { line <- guess_ab(tbl, "line", verbose = verbose) } - if (identical(mero, as.name("guess_ab"))) { mero <- guess_ab(tbl, "mero", verbose = verbose) } - if (identical(mezl, as.name("guess_ab"))) { mezl <- guess_ab(tbl, "mezl", verbose = verbose) } - if (identical(mino, as.name("guess_ab"))) { mino <- guess_ab(tbl, "mino", verbose = verbose) } - if (identical(moxi, as.name("guess_ab"))) { moxi <- guess_ab(tbl, "moxi", verbose = verbose) } - if (identical(nali, as.name("guess_ab"))) { nali <- guess_ab(tbl, "nali", verbose = verbose) } - if (identical(neom, as.name("guess_ab"))) { neom <- guess_ab(tbl, "neom", verbose = verbose) } - if (identical(neti, as.name("guess_ab"))) { neti <- guess_ab(tbl, "neti", verbose = verbose) } - if (identical(nitr, as.name("guess_ab"))) { nitr <- guess_ab(tbl, "nitr", verbose = verbose) } - if (identical(norf, as.name("guess_ab"))) { norf <- guess_ab(tbl, "norf", verbose = verbose) } - if (identical(novo, as.name("guess_ab"))) { novo <- guess_ab(tbl, "novo", verbose = verbose) } - if (identical(oflo, as.name("guess_ab"))) { oflo <- guess_ab(tbl, "oflo", verbose = verbose) } - if (identical(oxac, as.name("guess_ab"))) { oxac <- guess_ab(tbl, "oxac", verbose = verbose) } - if (identical(peni, as.name("guess_ab"))) { peni <- guess_ab(tbl, "peni", verbose = verbose) } - if (identical(pipe, as.name("guess_ab"))) { pipe <- guess_ab(tbl, "pipe", verbose = verbose) } - if (identical(pita, as.name("guess_ab"))) { pita <- guess_ab(tbl, "pita", verbose = verbose) } - if (identical(poly, as.name("guess_ab"))) { poly <- guess_ab(tbl, "poly", verbose = verbose) } - if (identical(pris, as.name("guess_ab"))) { pris <- guess_ab(tbl, "pris", verbose = verbose) } - if (identical(qida, as.name("guess_ab"))) { qida <- guess_ab(tbl, "qida", verbose = verbose) } - if (identical(rifa, as.name("guess_ab"))) { rifa <- guess_ab(tbl, "rifa", verbose = verbose) } - if (identical(roxi, as.name("guess_ab"))) { roxi <- guess_ab(tbl, "roxi", verbose = verbose) } - if (identical(siso, as.name("guess_ab"))) { siso <- guess_ab(tbl, "siso", verbose = verbose) } - if (identical(teic, as.name("guess_ab"))) { teic <- guess_ab(tbl, "teic", verbose = verbose) } - if (identical(tetr, as.name("guess_ab"))) { tetr <- guess_ab(tbl, "tetr", verbose = verbose) } - if (identical(tica, as.name("guess_ab"))) { tica <- guess_ab(tbl, "tica", verbose = verbose) } - if (identical(tige, as.name("guess_ab"))) { tige <- guess_ab(tbl, "tige", verbose = verbose) } - if (identical(tobr, as.name("guess_ab"))) { tobr <- guess_ab(tbl, "tobr", verbose = verbose) } - if (identical(trim, as.name("guess_ab"))) { trim <- guess_ab(tbl, "trim", verbose = verbose) } - if (identical(trsu, as.name("guess_ab"))) { trsu <- guess_ab(tbl, "trsu", verbose = verbose) } - if (identical(vanc, as.name("guess_ab"))) { vanc <- guess_ab(tbl, "vanc", verbose = verbose) } + if (identical(amcl, as.name("guess_ab_col"))) { amcl <- guess_ab_col(tbl, "amcl", verbose = verbose) } + if (identical(amik, as.name("guess_ab_col"))) { amik <- guess_ab_col(tbl, "amik", verbose = verbose) } + if (identical(amox, as.name("guess_ab_col"))) { amox <- guess_ab_col(tbl, "amox", verbose = verbose) } + if (identical(ampi, as.name("guess_ab_col"))) { ampi <- guess_ab_col(tbl, "ampi", verbose = verbose) } + if (identical(azit, as.name("guess_ab_col"))) { azit <- guess_ab_col(tbl, "azit", verbose = verbose) } + if (identical(azlo, as.name("guess_ab_col"))) { azlo <- guess_ab_col(tbl, "azlo", verbose = verbose) } + if (identical(aztr, as.name("guess_ab_col"))) { aztr <- guess_ab_col(tbl, "aztr", verbose = verbose) } + if (identical(cefa, as.name("guess_ab_col"))) { cefa <- guess_ab_col(tbl, "cefa", verbose = verbose) } + if (identical(cfep, as.name("guess_ab_col"))) { cfep <- guess_ab_col(tbl, "cfep", verbose = verbose) } + if (identical(cfot, as.name("guess_ab_col"))) { cfot <- guess_ab_col(tbl, "cfot", verbose = verbose) } + if (identical(cfox, as.name("guess_ab_col"))) { cfox <- guess_ab_col(tbl, "cfox", verbose = verbose) } + if (identical(cfra, as.name("guess_ab_col"))) { cfra <- guess_ab_col(tbl, "cfra", verbose = verbose) } + if (identical(cfta, as.name("guess_ab_col"))) { cfta <- guess_ab_col(tbl, "cfta", verbose = verbose) } + if (identical(cftr, as.name("guess_ab_col"))) { cftr <- guess_ab_col(tbl, "cftr", verbose = verbose) } + if (identical(cfur, as.name("guess_ab_col"))) { cfur <- guess_ab_col(tbl, "cfur", verbose = verbose) } + if (identical(chlo, as.name("guess_ab_col"))) { chlo <- guess_ab_col(tbl, "chlo", verbose = verbose) } + if (identical(cipr, as.name("guess_ab_col"))) { cipr <- guess_ab_col(tbl, "cipr", verbose = verbose) } + if (identical(clar, as.name("guess_ab_col"))) { clar <- guess_ab_col(tbl, "clar", verbose = verbose) } + if (identical(clin, as.name("guess_ab_col"))) { clin <- guess_ab_col(tbl, "clin", verbose = verbose) } + if (identical(clox, as.name("guess_ab_col"))) { clox <- guess_ab_col(tbl, "clox", verbose = verbose) } + if (identical(coli, as.name("guess_ab_col"))) { coli <- guess_ab_col(tbl, "coli", verbose = verbose) } + if (identical(czol, as.name("guess_ab_col"))) { czol <- guess_ab_col(tbl, "czol", verbose = verbose) } + if (identical(dapt, as.name("guess_ab_col"))) { dapt <- guess_ab_col(tbl, "dapt", verbose = verbose) } + if (identical(doxy, as.name("guess_ab_col"))) { doxy <- guess_ab_col(tbl, "doxy", verbose = verbose) } + if (identical(erta, as.name("guess_ab_col"))) { erta <- guess_ab_col(tbl, "erta", verbose = verbose) } + if (identical(eryt, as.name("guess_ab_col"))) { eryt <- guess_ab_col(tbl, "eryt", verbose = verbose) } + if (identical(fosf, as.name("guess_ab_col"))) { fosf <- guess_ab_col(tbl, "fosf", verbose = verbose) } + if (identical(fusi, as.name("guess_ab_col"))) { fusi <- guess_ab_col(tbl, "fusi", verbose = verbose) } + if (identical(gent, as.name("guess_ab_col"))) { gent <- guess_ab_col(tbl, "gent", verbose = verbose) } + if (identical(imip, as.name("guess_ab_col"))) { imip <- guess_ab_col(tbl, "imip", verbose = verbose) } + if (identical(kana, as.name("guess_ab_col"))) { kana <- guess_ab_col(tbl, "kana", verbose = verbose) } + if (identical(levo, as.name("guess_ab_col"))) { levo <- guess_ab_col(tbl, "levo", verbose = verbose) } + if (identical(linc, as.name("guess_ab_col"))) { linc <- guess_ab_col(tbl, "linc", verbose = verbose) } + if (identical(line, as.name("guess_ab_col"))) { line <- guess_ab_col(tbl, "line", verbose = verbose) } + if (identical(mero, as.name("guess_ab_col"))) { mero <- guess_ab_col(tbl, "mero", verbose = verbose) } + if (identical(mezl, as.name("guess_ab_col"))) { mezl <- guess_ab_col(tbl, "mezl", verbose = verbose) } + if (identical(mino, as.name("guess_ab_col"))) { mino <- guess_ab_col(tbl, "mino", verbose = verbose) } + if (identical(moxi, as.name("guess_ab_col"))) { moxi <- guess_ab_col(tbl, "moxi", verbose = verbose) } + if (identical(nali, as.name("guess_ab_col"))) { nali <- guess_ab_col(tbl, "nali", verbose = verbose) } + if (identical(neom, as.name("guess_ab_col"))) { neom <- guess_ab_col(tbl, "neom", verbose = verbose) } + if (identical(neti, as.name("guess_ab_col"))) { neti <- guess_ab_col(tbl, "neti", verbose = verbose) } + if (identical(nitr, as.name("guess_ab_col"))) { nitr <- guess_ab_col(tbl, "nitr", verbose = verbose) } + if (identical(norf, as.name("guess_ab_col"))) { norf <- guess_ab_col(tbl, "norf", verbose = verbose) } + if (identical(novo, as.name("guess_ab_col"))) { novo <- guess_ab_col(tbl, "novo", verbose = verbose) } + if (identical(oflo, as.name("guess_ab_col"))) { oflo <- guess_ab_col(tbl, "oflo", verbose = verbose) } + if (identical(oxac, as.name("guess_ab_col"))) { oxac <- guess_ab_col(tbl, "oxac", verbose = verbose) } + if (identical(peni, as.name("guess_ab_col"))) { peni <- guess_ab_col(tbl, "peni", verbose = verbose) } + if (identical(pipe, as.name("guess_ab_col"))) { pipe <- guess_ab_col(tbl, "pipe", verbose = verbose) } + if (identical(pita, as.name("guess_ab_col"))) { pita <- guess_ab_col(tbl, "pita", verbose = verbose) } + if (identical(poly, as.name("guess_ab_col"))) { poly <- guess_ab_col(tbl, "poly", verbose = verbose) } + if (identical(pris, as.name("guess_ab_col"))) { pris <- guess_ab_col(tbl, "pris", verbose = verbose) } + if (identical(qida, as.name("guess_ab_col"))) { qida <- guess_ab_col(tbl, "qida", verbose = verbose) } + if (identical(rifa, as.name("guess_ab_col"))) { rifa <- guess_ab_col(tbl, "rifa", verbose = verbose) } + if (identical(roxi, as.name("guess_ab_col"))) { roxi <- guess_ab_col(tbl, "roxi", verbose = verbose) } + if (identical(siso, as.name("guess_ab_col"))) { siso <- guess_ab_col(tbl, "siso", verbose = verbose) } + if (identical(teic, as.name("guess_ab_col"))) { teic <- guess_ab_col(tbl, "teic", verbose = verbose) } + if (identical(tetr, as.name("guess_ab_col"))) { tetr <- guess_ab_col(tbl, "tetr", verbose = verbose) } + if (identical(tica, as.name("guess_ab_col"))) { tica <- guess_ab_col(tbl, "tica", verbose = verbose) } + if (identical(tige, as.name("guess_ab_col"))) { tige <- guess_ab_col(tbl, "tige", verbose = verbose) } + if (identical(tobr, as.name("guess_ab_col"))) { tobr <- guess_ab_col(tbl, "tobr", verbose = verbose) } + if (identical(trim, as.name("guess_ab_col"))) { trim <- guess_ab_col(tbl, "trim", verbose = verbose) } + if (identical(trsu, as.name("guess_ab_col"))) { trsu <- guess_ab_col(tbl, "trsu", verbose = verbose) } + if (identical(vanc, as.name("guess_ab_col"))) { vanc <- guess_ab_col(tbl, "vanc", verbose = verbose) } col.list <- c(amcl, amik, amox, ampi, azit, azlo, aztr, cefa, cfra, cfep, cfot, cfox, cfta, cftr, cfur, chlo, cipr, clar, clin, clox, coli, czol, dapt, doxy, erta, eryt, fosf, fusi, gent, imip, kana, diff --git a/R/guess_ab.R b/R/guess_ab_col.R similarity index 54% rename from R/guess_ab.R rename to R/guess_ab_col.R index 70fd6dab..c5b90f56 100644 --- a/R/guess_ab.R +++ b/R/guess_ab_col.R @@ -21,18 +21,28 @@ #' Guess antibiotic column #' -#' This tries to find a column name in a data set based on information from the \code{\link{antibiotics}} data set. +#' This tries to find a column name in a data set based on information from the \code{\link{antibiotics}} data set. You can look for an antibiotic (trade) of abbreviation and it will search the data for any column containing a name or ATC code of that antibiotic. #' @param tbl a \code{data.frame} #' @param col a character to look for #' @param verbose a logical to indicate whether additional info should be printed #' @importFrom dplyr %>% select filter_all any_vars #' @export #' @inheritSection AMR Read more on our website! -# @examples -# -guess_ab <- function(tbl = NULL, col = NULL, verbose = FALSE) { +#' @examples +#' df <- data.frame(amox = "S", +#' tetr = "R") +#' +#' guess_ab_col(df, "amoxicillin") +#' # [1] "amox" +#' guess_ab_col(df, "J01AA07") # ATC code of Tetracycline +#' # [1] "tetr" +#' +#' guess_ab_col(df, "J01AA07", verbose = TRUE) +#' # using column `tetr` for col "J01AA07" +#' # [1] "tetr" +guess_ab_col <- function(tbl = NULL, col = NULL, verbose = FALSE) { if (is.null(tbl) & is.null(col)) { - return(as.name("guess_ab")) + return(as.name("guess_ab_col")) } #stop("This function should not be called directly.") if (length(col) > 1) { @@ -42,35 +52,58 @@ guess_ab <- function(tbl = NULL, col = NULL, verbose = FALSE) { if (!is.data.frame(tbl)) { stop("`tbl` must be a data.frame") } + tbl_names <- colnames(tbl) + if (col %in% tbl_names) { + return(col) + } ab_result <- antibiotics %>% select(atc:trade_name) %>% - filter_all(any_vars(tolower(.) == tolower(col))) - if (nrow(ab_result) > 1) { - # get most likely one - if (col %in% ab_result$atc) { - ab_result <- ab_result %>% filter(atc == col) - } else if (col %in% ab_result$certe) { - ab_result <- ab_result %>% filter(certe == col) - } else if (col %in% ab_result$umcg) { - ab_result <- ab_result %>% filter(umcg == col) - } else if (col %in% ab_result$umcg) { - ab_result <- ab_result %>% filter(official == col) - } else { - ab_result <- ab_result[1,] - } + filter_all(any_vars(tolower(.) == tolower(col))) %>% + filter_all(any_vars(. %in% tbl_names)) + + if (nrow(ab_result) == 0 & nchar(col) > 4) { + # use like when col >= 5 characters + ab_result <- antibiotics %>% + select(atc:trade_name) %>% + filter_all(any_vars(tolower(.) %like% tolower(col))) %>% + filter_all(any_vars(. %in% tbl_names)) } - tbl_result <- tbl_names[tbl_names %in% ab_result] - if (length(tbl_result) > 1) { - tbl_result <- tbl_result[1] - warning('using column `', tbl_result, '` for col "', col, '"', call. = FALSE) - } else if (length(tbl_result) == 0) { + + if (nrow(ab_result) > 1) { + # looking more and more for reliable hit + ab_result_1 <- ab_result %>% filter(tolower(atc) == tolower(col)) + if (nrow(ab_result_1) == 0) { + ab_result_1 <- ab_result %>% filter(tolower(certe) == tolower(col)) + } + if (nrow(ab_result_1) == 0) { + ab_result_1 <- ab_result %>% filter(tolower(umcg) == tolower(col)) + } + if (nrow(ab_result_1) == 0) { + ab_result_1 <- ab_result %>% filter(tolower(official) == tolower(col)) + } + if (nrow(ab_result_1) == 0) { + ab_result_1 <- ab_result[1, ] + } + ab_result <- ab_result_1 + } + + if (length(ab_result) == 0) { if (verbose == TRUE) { message('no result found for col "', col, '"') } return(NULL) - } else if (verbose == TRUE) { - message('using column `', tbl_result, '` for col "', col, '"') + } else { + result <- tbl_names[tbl_names %in% ab_result] + if (length(result) == 0) { + if (verbose == TRUE) { + message('no result found for col "', col, '"') + } + return(NULL) + } + if (verbose == TRUE) { + message('using column `', result, '` for col "', col, '"') + } + return(result) } - tbl_result } diff --git a/R/key_antibiotics.R b/R/key_antibiotics.R index 9b37438e..f654c01f 100644 --- a/R/key_antibiotics.R +++ b/R/key_antibiotics.R @@ -25,12 +25,12 @@ #' @param tbl table with antibiotics coloms, like \code{amox} and \code{amcl}. #' @param x,y characters to compare #' @inheritParams first_isolate -#' @param universal_1,universal_2,universal_3,universal_4,universal_5,universal_6 column names of \strong{broad-spectrum} antibiotics, case-insensitive -#' @param GramPos_1,GramPos_2,GramPos_3,GramPos_4,GramPos_5,GramPos_6 column names of antibiotics for \strong{Gram positives}, case-insensitive -#' @param GramNeg_1,GramNeg_2,GramNeg_3,GramNeg_4,GramNeg_5,GramNeg_6 column names of antibiotics for \strong{Gram negatives}, case-insensitive +#' @param universal_1,universal_2,universal_3,universal_4,universal_5,universal_6 column names of \strong{broad-spectrum} antibiotics, case-insensitive. At default, the columns containing these antibiotics will be guessed with \code{\link{guess_ab_col}}. +#' @param GramPos_1,GramPos_2,GramPos_3,GramPos_4,GramPos_5,GramPos_6 column names of antibiotics for \strong{Gram positives}, case-insensitive. At default, the columns containing these antibiotics will be guessed with \code{\link{guess_ab_col}}. +#' @param GramNeg_1,GramNeg_2,GramNeg_3,GramNeg_4,GramNeg_5,GramNeg_6 column names of antibiotics for \strong{Gram negatives}, case-insensitive. At default, the columns containing these antibiotics will be guessed with \code{\link{guess_ab_col}}. #' @param warnings give warning about missing antibiotic columns, they will anyway be ignored #' @param ... other parameters passed on to function -#' @details The function \code{key_antibiotics} returns a character vector with 12 antibiotic results for every isolate. These isolates can then be compared using \code{key_antibiotics_equal}, to check if two isolates have generally the same antibiogram. Missing and invalid values are replaced with a dot (\code{"."}). The \code{\link{first_isolate}} function only uses this function on the same microbial species from the same patient. Using this, an MRSA will be included after a susceptible \emph{S. aureus} (MSSA) found within the same episode (see \code{episode} parameter of \code{\link{first_isolate}}). Without key antibiotic comparison it wouldn't. +#' @details The function \code{key_antibiotics} returns a character vector with 12 antibiotic results for every isolate. These isolates can then be compared using \code{key_antibiotics_equal}, to check if two isolates have generally the same antibiogram. Missing and invalid values are replaced with a dot (\code{"."}). The \code{\link{first_isolate}} function only uses this function on the same microbial species from the same patient. Using this, an MRSA will be included after a susceptible \emph{S. aureus} (MSSA) found within the same episode (see \code{episode} parameter of \code{\link{first_isolate}}). Without key antibiotic comparison it would not. #' #' At default, the antibiotics that are used for \strong{Gram positive bacteria} are (colum names): \cr #' \code{"amox"}, \code{"amcl"}, \code{"cfur"}, \code{"pita"}, \code{"cipr"}, \code{"trsu"} (until here is universal), \code{"vanc"}, \code{"teic"}, \code{"tetr"}, \code{"eryt"}, \code{"oxac"}, \code{"rifa"}. @@ -78,24 +78,24 @@ #' # FALSE, because I is not ignored and so the 4th value differs key_antibiotics <- function(tbl, col_mo = NULL, - universal_1 = guess_ab(tbl, "amox"), - universal_2 = guess_ab(tbl, "amcl"), - universal_3 = guess_ab(tbl, "cfur"), - universal_4 = guess_ab(tbl, "pita"), - universal_5 = guess_ab(tbl, "cipr"), - universal_6 = guess_ab(tbl, "trsu"), - GramPos_1 = guess_ab(tbl, "vanc"), - GramPos_2 = guess_ab(tbl, "teic"), - GramPos_3 = guess_ab(tbl, "tetr"), - GramPos_4 = guess_ab(tbl, "eryt"), - GramPos_5 = guess_ab(tbl, "oxac"), - GramPos_6 = guess_ab(tbl, "rifa"), - GramNeg_1 = guess_ab(tbl, "gent"), - GramNeg_2 = guess_ab(tbl, "tobr"), - GramNeg_3 = guess_ab(tbl, "coli"), - GramNeg_4 = guess_ab(tbl, "cfot"), - GramNeg_5 = guess_ab(tbl, "cfta"), - GramNeg_6 = guess_ab(tbl, "mero"), + universal_1 = guess_ab_col(tbl, "amox"), + universal_2 = guess_ab_col(tbl, "amcl"), + universal_3 = guess_ab_col(tbl, "cfur"), + universal_4 = guess_ab_col(tbl, "pita"), + universal_5 = guess_ab_col(tbl, "cipr"), + universal_6 = guess_ab_col(tbl, "trsu"), + GramPos_1 = guess_ab_col(tbl, "vanc"), + GramPos_2 = guess_ab_col(tbl, "teic"), + GramPos_3 = guess_ab_col(tbl, "tetr"), + GramPos_4 = guess_ab_col(tbl, "eryt"), + GramPos_5 = guess_ab_col(tbl, "oxac"), + GramPos_6 = guess_ab_col(tbl, "rifa"), + GramNeg_1 = guess_ab_col(tbl, "gent"), + GramNeg_2 = guess_ab_col(tbl, "tobr"), + GramNeg_3 = guess_ab_col(tbl, "coli"), + GramNeg_4 = guess_ab_col(tbl, "cfot"), + GramNeg_5 = guess_ab_col(tbl, "cfta"), + GramNeg_6 = guess_ab_col(tbl, "mero"), warnings = TRUE, ...) { @@ -153,9 +153,6 @@ key_antibiotics <- function(tbl, left_join_microorganisms(by = col_mo) %>% mutate(key_ab = NA_character_) - print(as.character(gram_positive)) - print(gram_negative) - # Gram + tbl <- tbl %>% mutate(key_ab = if_else(gramstain == "Gram positive", diff --git a/R/mdro.R b/R/mdro.R index 09deedcd..ae139bc8 100755 --- a/R/mdro.R +++ b/R/mdro.R @@ -46,66 +46,66 @@ mdro <- function(tbl, country = NULL, col_mo = NULL, info = TRUE, - amcl = guess_ab(), - amik = guess_ab(), - amox = guess_ab(), - ampi = guess_ab(), - azit = guess_ab(), - aztr = guess_ab(), - cefa = guess_ab(), - cfra = guess_ab(), - cfep = guess_ab(), - cfot = guess_ab(), - cfox = guess_ab(), - cfta = guess_ab(), - cftr = guess_ab(), - cfur = guess_ab(), - chlo = guess_ab(), - cipr = guess_ab(), - clar = guess_ab(), - clin = guess_ab(), - clox = guess_ab(), - coli = guess_ab(), - czol = guess_ab(), - dapt = guess_ab(), - doxy = guess_ab(), - erta = guess_ab(), - eryt = guess_ab(), - fosf = guess_ab(), - fusi = guess_ab(), - gent = guess_ab(), - imip = guess_ab(), - kana = guess_ab(), - levo = guess_ab(), - linc = guess_ab(), - line = guess_ab(), - mero = guess_ab(), - metr = guess_ab(), - mino = guess_ab(), - moxi = guess_ab(), - nali = guess_ab(), - neom = guess_ab(), - neti = guess_ab(), - nitr = guess_ab(), - novo = guess_ab(), - norf = guess_ab(), - oflo = guess_ab(), - peni = guess_ab(), - pipe = guess_ab(), - pita = guess_ab(), - poly = guess_ab(), - qida = guess_ab(), - rifa = guess_ab(), - roxi = guess_ab(), - siso = guess_ab(), - teic = guess_ab(), - tetr = guess_ab(), - tica = guess_ab(), - tige = guess_ab(), - tobr = guess_ab(), - trim = guess_ab(), - trsu = guess_ab(), - vanc = guess_ab()) { + amcl = guess_ab_col(), + amik = guess_ab_col(), + amox = guess_ab_col(), + ampi = guess_ab_col(), + azit = guess_ab_col(), + aztr = guess_ab_col(), + cefa = guess_ab_col(), + cfra = guess_ab_col(), + cfep = guess_ab_col(), + cfot = guess_ab_col(), + cfox = guess_ab_col(), + cfta = guess_ab_col(), + cftr = guess_ab_col(), + cfur = guess_ab_col(), + chlo = guess_ab_col(), + cipr = guess_ab_col(), + clar = guess_ab_col(), + clin = guess_ab_col(), + clox = guess_ab_col(), + coli = guess_ab_col(), + czol = guess_ab_col(), + dapt = guess_ab_col(), + doxy = guess_ab_col(), + erta = guess_ab_col(), + eryt = guess_ab_col(), + fosf = guess_ab_col(), + fusi = guess_ab_col(), + gent = guess_ab_col(), + imip = guess_ab_col(), + kana = guess_ab_col(), + levo = guess_ab_col(), + linc = guess_ab_col(), + line = guess_ab_col(), + mero = guess_ab_col(), + metr = guess_ab_col(), + mino = guess_ab_col(), + moxi = guess_ab_col(), + nali = guess_ab_col(), + neom = guess_ab_col(), + neti = guess_ab_col(), + nitr = guess_ab_col(), + novo = guess_ab_col(), + norf = guess_ab_col(), + oflo = guess_ab_col(), + peni = guess_ab_col(), + pipe = guess_ab_col(), + pita = guess_ab_col(), + poly = guess_ab_col(), + qida = guess_ab_col(), + rifa = guess_ab_col(), + roxi = guess_ab_col(), + siso = guess_ab_col(), + teic = guess_ab_col(), + tetr = guess_ab_col(), + tica = guess_ab_col(), + tige = guess_ab_col(), + tobr = guess_ab_col(), + trim = guess_ab_col(), + trsu = guess_ab_col(), + vanc = guess_ab_col()) { if (!is.data.frame(tbl)) { stop("`tbl` must be a data frame.", call. = FALSE) @@ -169,66 +169,66 @@ mdro <- function(tbl, } # check columns - if (identical(amcl, as.name("guess_ab"))) { amcl <- guess_ab(tbl, "amcl", verbose = info) } - if (identical(amik, as.name("guess_ab"))) { amik <- guess_ab(tbl, "amik", verbose = info) } - if (identical(amox, as.name("guess_ab"))) { amox <- guess_ab(tbl, "amox", verbose = info) } - if (identical(ampi, as.name("guess_ab"))) { ampi <- guess_ab(tbl, "ampi", verbose = info) } - if (identical(azit, as.name("guess_ab"))) { azit <- guess_ab(tbl, "azit", verbose = info) } - if (identical(aztr, as.name("guess_ab"))) { aztr <- guess_ab(tbl, "aztr", verbose = info) } - if (identical(cefa, as.name("guess_ab"))) { cefa <- guess_ab(tbl, "cefa", verbose = info) } - if (identical(cfra, as.name("guess_ab"))) { cfra <- guess_ab(tbl, "cfra", verbose = info) } - if (identical(cfep, as.name("guess_ab"))) { cfep <- guess_ab(tbl, "cfep", verbose = info) } - if (identical(cfot, as.name("guess_ab"))) { cfot <- guess_ab(tbl, "cfot", verbose = info) } - if (identical(cfox, as.name("guess_ab"))) { cfox <- guess_ab(tbl, "cfox", verbose = info) } - if (identical(cfta, as.name("guess_ab"))) { cfta <- guess_ab(tbl, "cfta", verbose = info) } - if (identical(cftr, as.name("guess_ab"))) { cftr <- guess_ab(tbl, "cftr", verbose = info) } - if (identical(cfur, as.name("guess_ab"))) { cfur <- guess_ab(tbl, "cfur", verbose = info) } - if (identical(chlo, as.name("guess_ab"))) { chlo <- guess_ab(tbl, "chlo", verbose = info) } - if (identical(cipr, as.name("guess_ab"))) { cipr <- guess_ab(tbl, "cipr", verbose = info) } - if (identical(clar, as.name("guess_ab"))) { clar <- guess_ab(tbl, "clar", verbose = info) } - if (identical(clin, as.name("guess_ab"))) { clin <- guess_ab(tbl, "clin", verbose = info) } - if (identical(clox, as.name("guess_ab"))) { clox <- guess_ab(tbl, "clox", verbose = info) } - if (identical(coli, as.name("guess_ab"))) { coli <- guess_ab(tbl, "coli", verbose = info) } - if (identical(czol, as.name("guess_ab"))) { czol <- guess_ab(tbl, "czol", verbose = info) } - if (identical(dapt, as.name("guess_ab"))) { dapt <- guess_ab(tbl, "dapt", verbose = info) } - if (identical(doxy, as.name("guess_ab"))) { doxy <- guess_ab(tbl, "doxy", verbose = info) } - if (identical(erta, as.name("guess_ab"))) { erta <- guess_ab(tbl, "erta", verbose = info) } - if (identical(eryt, as.name("guess_ab"))) { eryt <- guess_ab(tbl, "eryt", verbose = info) } - if (identical(fosf, as.name("guess_ab"))) { fosf <- guess_ab(tbl, "fosf", verbose = info) } - if (identical(fusi, as.name("guess_ab"))) { fusi <- guess_ab(tbl, "fusi", verbose = info) } - if (identical(gent, as.name("guess_ab"))) { gent <- guess_ab(tbl, "gent", verbose = info) } - if (identical(imip, as.name("guess_ab"))) { imip <- guess_ab(tbl, "imip", verbose = info) } - if (identical(kana, as.name("guess_ab"))) { kana <- guess_ab(tbl, "kana", verbose = info) } - if (identical(levo, as.name("guess_ab"))) { levo <- guess_ab(tbl, "levo", verbose = info) } - if (identical(linc, as.name("guess_ab"))) { linc <- guess_ab(tbl, "linc", verbose = info) } - if (identical(line, as.name("guess_ab"))) { line <- guess_ab(tbl, "line", verbose = info) } - if (identical(mero, as.name("guess_ab"))) { mero <- guess_ab(tbl, "mero", verbose = info) } - if (identical(metr, as.name("guess_ab"))) { metr <- guess_ab(tbl, "metr", verbose = info) } - if (identical(mino, as.name("guess_ab"))) { mino <- guess_ab(tbl, "mino", verbose = info) } - if (identical(moxi, as.name("guess_ab"))) { moxi <- guess_ab(tbl, "moxi", verbose = info) } - if (identical(nali, as.name("guess_ab"))) { nali <- guess_ab(tbl, "nali", verbose = info) } - if (identical(neom, as.name("guess_ab"))) { neom <- guess_ab(tbl, "neom", verbose = info) } - if (identical(neti, as.name("guess_ab"))) { neti <- guess_ab(tbl, "neti", verbose = info) } - if (identical(nitr, as.name("guess_ab"))) { nitr <- guess_ab(tbl, "nitr", verbose = info) } - if (identical(novo, as.name("guess_ab"))) { novo <- guess_ab(tbl, "novo", verbose = info) } - if (identical(norf, as.name("guess_ab"))) { norf <- guess_ab(tbl, "norf", verbose = info) } - if (identical(oflo, as.name("guess_ab"))) { oflo <- guess_ab(tbl, "oflo", verbose = info) } - if (identical(peni, as.name("guess_ab"))) { peni <- guess_ab(tbl, "peni", verbose = info) } - if (identical(pipe, as.name("guess_ab"))) { pipe <- guess_ab(tbl, "pipe", verbose = info) } - if (identical(pita, as.name("guess_ab"))) { pita <- guess_ab(tbl, "pita", verbose = info) } - if (identical(poly, as.name("guess_ab"))) { poly <- guess_ab(tbl, "poly", verbose = info) } - if (identical(qida, as.name("guess_ab"))) { qida <- guess_ab(tbl, "qida", verbose = info) } - if (identical(rifa, as.name("guess_ab"))) { rifa <- guess_ab(tbl, "rifa", verbose = info) } - if (identical(roxi, as.name("guess_ab"))) { roxi <- guess_ab(tbl, "roxi", verbose = info) } - if (identical(siso, as.name("guess_ab"))) { siso <- guess_ab(tbl, "siso", verbose = info) } - if (identical(teic, as.name("guess_ab"))) { teic <- guess_ab(tbl, "teic", verbose = info) } - if (identical(tetr, as.name("guess_ab"))) { tetr <- guess_ab(tbl, "tetr", verbose = info) } - if (identical(tica, as.name("guess_ab"))) { tica <- guess_ab(tbl, "tica", verbose = info) } - if (identical(tige, as.name("guess_ab"))) { tige <- guess_ab(tbl, "tige", verbose = info) } - if (identical(tobr, as.name("guess_ab"))) { tobr <- guess_ab(tbl, "tobr", verbose = info) } - if (identical(trim, as.name("guess_ab"))) { trim <- guess_ab(tbl, "trim", verbose = info) } - if (identical(trsu, as.name("guess_ab"))) { trsu <- guess_ab(tbl, "trsu", verbose = info) } - if (identical(vanc, as.name("guess_ab"))) { vanc <- guess_ab(tbl, "vanc", verbose = info) } + if (identical(amcl, as.name("guess_ab_col"))) { amcl <- guess_ab_col(tbl, "amcl", verbose = info) } + if (identical(amik, as.name("guess_ab_col"))) { amik <- guess_ab_col(tbl, "amik", verbose = info) } + if (identical(amox, as.name("guess_ab_col"))) { amox <- guess_ab_col(tbl, "amox", verbose = info) } + if (identical(ampi, as.name("guess_ab_col"))) { ampi <- guess_ab_col(tbl, "ampi", verbose = info) } + if (identical(azit, as.name("guess_ab_col"))) { azit <- guess_ab_col(tbl, "azit", verbose = info) } + if (identical(aztr, as.name("guess_ab_col"))) { aztr <- guess_ab_col(tbl, "aztr", verbose = info) } + if (identical(cefa, as.name("guess_ab_col"))) { cefa <- guess_ab_col(tbl, "cefa", verbose = info) } + if (identical(cfra, as.name("guess_ab_col"))) { cfra <- guess_ab_col(tbl, "cfra", verbose = info) } + if (identical(cfep, as.name("guess_ab_col"))) { cfep <- guess_ab_col(tbl, "cfep", verbose = info) } + if (identical(cfot, as.name("guess_ab_col"))) { cfot <- guess_ab_col(tbl, "cfot", verbose = info) } + if (identical(cfox, as.name("guess_ab_col"))) { cfox <- guess_ab_col(tbl, "cfox", verbose = info) } + if (identical(cfta, as.name("guess_ab_col"))) { cfta <- guess_ab_col(tbl, "cfta", verbose = info) } + if (identical(cftr, as.name("guess_ab_col"))) { cftr <- guess_ab_col(tbl, "cftr", verbose = info) } + if (identical(cfur, as.name("guess_ab_col"))) { cfur <- guess_ab_col(tbl, "cfur", verbose = info) } + if (identical(chlo, as.name("guess_ab_col"))) { chlo <- guess_ab_col(tbl, "chlo", verbose = info) } + if (identical(cipr, as.name("guess_ab_col"))) { cipr <- guess_ab_col(tbl, "cipr", verbose = info) } + if (identical(clar, as.name("guess_ab_col"))) { clar <- guess_ab_col(tbl, "clar", verbose = info) } + if (identical(clin, as.name("guess_ab_col"))) { clin <- guess_ab_col(tbl, "clin", verbose = info) } + if (identical(clox, as.name("guess_ab_col"))) { clox <- guess_ab_col(tbl, "clox", verbose = info) } + if (identical(coli, as.name("guess_ab_col"))) { coli <- guess_ab_col(tbl, "coli", verbose = info) } + if (identical(czol, as.name("guess_ab_col"))) { czol <- guess_ab_col(tbl, "czol", verbose = info) } + if (identical(dapt, as.name("guess_ab_col"))) { dapt <- guess_ab_col(tbl, "dapt", verbose = info) } + if (identical(doxy, as.name("guess_ab_col"))) { doxy <- guess_ab_col(tbl, "doxy", verbose = info) } + if (identical(erta, as.name("guess_ab_col"))) { erta <- guess_ab_col(tbl, "erta", verbose = info) } + if (identical(eryt, as.name("guess_ab_col"))) { eryt <- guess_ab_col(tbl, "eryt", verbose = info) } + if (identical(fosf, as.name("guess_ab_col"))) { fosf <- guess_ab_col(tbl, "fosf", verbose = info) } + if (identical(fusi, as.name("guess_ab_col"))) { fusi <- guess_ab_col(tbl, "fusi", verbose = info) } + if (identical(gent, as.name("guess_ab_col"))) { gent <- guess_ab_col(tbl, "gent", verbose = info) } + if (identical(imip, as.name("guess_ab_col"))) { imip <- guess_ab_col(tbl, "imip", verbose = info) } + if (identical(kana, as.name("guess_ab_col"))) { kana <- guess_ab_col(tbl, "kana", verbose = info) } + if (identical(levo, as.name("guess_ab_col"))) { levo <- guess_ab_col(tbl, "levo", verbose = info) } + if (identical(linc, as.name("guess_ab_col"))) { linc <- guess_ab_col(tbl, "linc", verbose = info) } + if (identical(line, as.name("guess_ab_col"))) { line <- guess_ab_col(tbl, "line", verbose = info) } + if (identical(mero, as.name("guess_ab_col"))) { mero <- guess_ab_col(tbl, "mero", verbose = info) } + if (identical(metr, as.name("guess_ab_col"))) { metr <- guess_ab_col(tbl, "metr", verbose = info) } + if (identical(mino, as.name("guess_ab_col"))) { mino <- guess_ab_col(tbl, "mino", verbose = info) } + if (identical(moxi, as.name("guess_ab_col"))) { moxi <- guess_ab_col(tbl, "moxi", verbose = info) } + if (identical(nali, as.name("guess_ab_col"))) { nali <- guess_ab_col(tbl, "nali", verbose = info) } + if (identical(neom, as.name("guess_ab_col"))) { neom <- guess_ab_col(tbl, "neom", verbose = info) } + if (identical(neti, as.name("guess_ab_col"))) { neti <- guess_ab_col(tbl, "neti", verbose = info) } + if (identical(nitr, as.name("guess_ab_col"))) { nitr <- guess_ab_col(tbl, "nitr", verbose = info) } + if (identical(novo, as.name("guess_ab_col"))) { novo <- guess_ab_col(tbl, "novo", verbose = info) } + if (identical(norf, as.name("guess_ab_col"))) { norf <- guess_ab_col(tbl, "norf", verbose = info) } + if (identical(oflo, as.name("guess_ab_col"))) { oflo <- guess_ab_col(tbl, "oflo", verbose = info) } + if (identical(peni, as.name("guess_ab_col"))) { peni <- guess_ab_col(tbl, "peni", verbose = info) } + if (identical(pipe, as.name("guess_ab_col"))) { pipe <- guess_ab_col(tbl, "pipe", verbose = info) } + if (identical(pita, as.name("guess_ab_col"))) { pita <- guess_ab_col(tbl, "pita", verbose = info) } + if (identical(poly, as.name("guess_ab_col"))) { poly <- guess_ab_col(tbl, "poly", verbose = info) } + if (identical(qida, as.name("guess_ab_col"))) { qida <- guess_ab_col(tbl, "qida", verbose = info) } + if (identical(rifa, as.name("guess_ab_col"))) { rifa <- guess_ab_col(tbl, "rifa", verbose = info) } + if (identical(roxi, as.name("guess_ab_col"))) { roxi <- guess_ab_col(tbl, "roxi", verbose = info) } + if (identical(siso, as.name("guess_ab_col"))) { siso <- guess_ab_col(tbl, "siso", verbose = info) } + if (identical(teic, as.name("guess_ab_col"))) { teic <- guess_ab_col(tbl, "teic", verbose = info) } + if (identical(tetr, as.name("guess_ab_col"))) { tetr <- guess_ab_col(tbl, "tetr", verbose = info) } + if (identical(tica, as.name("guess_ab_col"))) { tica <- guess_ab_col(tbl, "tica", verbose = info) } + if (identical(tige, as.name("guess_ab_col"))) { tige <- guess_ab_col(tbl, "tige", verbose = info) } + if (identical(tobr, as.name("guess_ab_col"))) { tobr <- guess_ab_col(tbl, "tobr", verbose = info) } + if (identical(trim, as.name("guess_ab_col"))) { trim <- guess_ab_col(tbl, "trim", verbose = info) } + if (identical(trsu, as.name("guess_ab_col"))) { trsu <- guess_ab_col(tbl, "trsu", verbose = info) } + if (identical(vanc, as.name("guess_ab_col"))) { vanc <- guess_ab_col(tbl, "vanc", verbose = info) } col.list <- c(amcl, amik, amox, ampi, azit, aztr, cefa, cfra, cfep, cfot, cfox, cfta, cftr, cfur, chlo, cipr, clar, clin, clox, coli, czol, dapt, doxy, erta, eryt, fosf, fusi, gent, imip, kana, diff --git a/_pkgdown.yml b/_pkgdown.yml index b2197338..f44270cf 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -53,6 +53,9 @@ navbar: - text: 'Use the G-test' icon: 'fa-clipboard-check' href: 'articles/G_test.html' + - text: 'Other: benchmarks' + icon: 'fa-shipping-fast' + href: 'articles/benchmarks.html' - text: 'Manual' icon: 'fa-book-open' href: 'reference/' @@ -73,7 +76,8 @@ navbar: reference: - title: 'Background information' desc: > - Some pages about our package and its external sources. + Some pages about our package and its external sources. Be sure to read our [How To's](./../articles/index.html) + for more information about how to work with functions in this package. contents: - '`AMR`' - '`ITIS`' @@ -85,7 +89,7 @@ reference: contents: - starts_with("as.") - '`eucast_rules`' - - '`guess_ab`' + - '`guess_ab_col`' - '`read.4D`' - title: 'Adding variables to your data' desc: > @@ -129,7 +133,10 @@ reference: - '`microorganisms.umcg`' - '`supplementary_data`' - title: Other - desc: ~ + desc: > + These functions are mostly for internal use, but some of + them may also be suitable for your analysis. Especially the + 'like' function can be useful: `if (x %like% y) {...}`. contents: - '`get_locale`' - '`like`' diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 9ed9ff8c..bcf568b9 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -149,6 +149,13 @@ Use the G-test +
  • + + + + Other: benchmarks + +
  • diff --git a/docs/articles/AMR.html b/docs/articles/AMR.html index 9e9b3092..d586dc73 100644 --- a/docs/articles/AMR.html +++ b/docs/articles/AMR.html @@ -111,6 +111,13 @@ Use the G-test
  • +
  • + + + + Other: benchmarks + +
  • @@ -171,7 +178,7 @@

    How to conduct AMR analysis

    Matthijs S. Berends

    -

    08 January 2019

    +

    11 January 2019

    @@ -180,7 +187,7 @@ -

    Note: values on this page will change with every website update since they are based on randomly created values and the page was written in RMarkdown. However, the methodology remains unchanged. This page was generated on 08 January 2019.

    +

    Note: values on this page will change with every website update since they are based on randomly created values and the page was written in RMarkdown. However, the methodology remains unchanged. This page was generated on 11 January 2019.

    Introduction

    @@ -196,21 +203,21 @@ -2019-01-08 +2019-01-11 abcd Escherichia coli S S -2019-01-08 +2019-01-11 abcd Escherichia coli S R -2019-01-08 +2019-01-11 efgh Escherichia coli R @@ -268,18 +275,18 @@

    Put everything together

    -

    Using the sample() function, we can randomly select items from all objects we defined earlier. To let our fake data reflect reality a bit, we will also approximately define the probabilities of bacteria and the antibiotic results with the prob parameter.

    -
    data <- data.frame(date = sample(dates, 5000, replace = TRUE),
    -                   patient_id = sample(patients, 5000, replace = TRUE),
    -                   hospital = sample(hospitals, 5000, replace = TRUE, prob = c(0.30, 0.35, 0.15, 0.20)),
    -                   bacteria = sample(bacteria, 5000, replace = TRUE, prob = c(0.50, 0.25, 0.15, 0.10)),
    -                   amox = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.60, 0.05, 0.35)),
    -                   amcl = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.75, 0.10, 0.15)),
    -                   cipr = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.80, 0.00, 0.20)),
    -                   gent = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.92, 0.00, 0.08))
    +

    Using the sample() function, we can randomly select items from all objects we defined earlier. To let our fake data reflect reality a bit, we will also approximately define the probabilities of bacteria and the antibiotic results with the prob parameter.

    +
    data <- data.frame(date = sample(dates, 5000, replace = TRUE),
    +                   patient_id = sample(patients, 5000, replace = TRUE),
    +                   hospital = sample(hospitals, 5000, replace = TRUE, prob = c(0.30, 0.35, 0.15, 0.20)),
    +                   bacteria = sample(bacteria, 5000, replace = TRUE, prob = c(0.50, 0.25, 0.15, 0.10)),
    +                   amox = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.60, 0.05, 0.35)),
    +                   amcl = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.75, 0.10, 0.15)),
    +                   cipr = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.80, 0.00, 0.20)),
    +                   gent = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.92, 0.00, 0.08))
                        )
    -

    Using the left_join() function from the dplyr package, we can ‘map’ the gender to the patient ID using the patients_table object we created earlier:

    - +

    Using the left_join() function from the dplyr package, we can ‘map’ the gender to the patient ID using the patients_table object we created earlier:

    +

    The resulting data set contains 5,000 blood culture isolates. With the head() function we can preview the first 6 values of this data set:

    head(data)
    @@ -296,70 +303,70 @@ - - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - + + + + - + + - - - - + + + + - - + +
    2011-02-27M6Hospital C2014-02-02P8Hospital D Escherichia coli SSRSM
    2012-07-12C2Hospital BStreptococcus pneumoniaeSRSSM
    2016-09-13O7Hospital AEscherichia coliR R S S F
    2016-12-05E4Hospital A2013-10-26Q1Hospital B Escherichia coli IRR SF
    2017-06-12E5Hospital DStreptococcus pneumoniaeSRSSM
    2013-06-16K7Hospital BEscherichia coliSI S S M
    2017-10-05M1Hospital AEscherichia coliR2013-01-11M4Hospital BStaphylococcus aureus IRSS S M
    2012-02-22H9Hospital CStreptococcus pneumoniae2016-11-18W10Hospital AStaphylococcus aureus S SR SMSF
    @@ -379,15 +386,15 @@ # # Item Count Percent Cum. Count Cum. Percent # --- ----- ------ -------- ----------- ------------- -# 1 M 2,635 52.7% 2,635 52.7% -# 2 F 2,365 47.3% 5,000 100.0%
    +# 1 M 2,598 52.0% 2,598 52.0% +# 2 F 2,402 48.0% 5,000 100.0%

    So, we can draw at least two conclusions immediately. From a data scientist perspective, the data looks clean: only values M and F. From a researcher perspective: there are slightly more men. Nothing we didn’t already know.

    -

    The data is already quite clean, but we still need to transform some variables. The bacteria column now consists of text, and we want to add more variables based on microbial IDs later on. So, we will transform this column to valid IDs. The mutate() function of the dplyr package makes this really easy:

    +

    The data is already quite clean, but we still need to transform some variables. The bacteria column now consists of text, and we want to add more variables based on microbial IDs later on. So, we will transform this column to valid IDs. The mutate() function of the dplyr package makes this really easy:

    data <- data %>%
    -  mutate(bacteria = as.mo(bacteria))
    -

    We also want to transform the antibiotics, because in real life data we don’t know if they are really clean. The as.rsi() function ensures reliability and reproducibility in these kind of variables. The mutate_at() will run the as.rsi() function on defined variables:

    + mutate(bacteria = as.mo(bacteria))
    +

    We also want to transform the antibiotics, because in real life data we don’t know if they are really clean. The as.rsi() function ensures reliability and reproducibility in these kind of variables. The mutate_at() will run the as.rsi() function on defined variables:

    data <- data %>%
    -  mutate_at(vars(amox:gent), as.rsi)
    + mutate_at(vars(amox:gent), as.rsi)

    Finally, we will apply EUCAST rules on our antimicrobial results. In Europe, most medical microbiological laboratories already apply these rules. Our package features their latest insights on intrinsic resistance and exceptional phenotypes. Moreover, the eucast_rules() function can also apply additional rules, like forcing ampicillin = R when amoxicillin/clavulanic acid = R.

    Because the amoxicillin (column amox) and amoxicillin/clavulanic acid (column amcl) in our data were generated randomly, some rows will undoubtedly contain amox = S and amcl = R, which is technically impossible. The eucast_rules() fixes this:

    +# => EUCAST rules affected 1,830 out of 5,000 rows -> changed 1,025 test results.

    Adding new variables

    Now that we have the microbial ID, we can add some taxonomic properties:

    data <- data %>% 
    -  mutate(gramstain = mo_gramstain(bacteria),
    +  mutate(gramstain = mo_gramstain(bacteria),
              genus = mo_genus(bacteria),
              species = mo_species(bacteria))
    @@ -451,14 +458,14 @@

    This AMR package includes this methodology with the first_isolate() function. It adopts the episode of a year (can be changed by user) and it starts counting days after every selected isolate. This new variable can easily be added to our data:

    -

    So only 58.6% is suitable for resistance analysis! We can now filter on is with the filter() function, also from the dplyr package:

    +# => Found 2,962 first isolates (59.2% of total)
    +

    So only 59.2% is suitable for resistance analysis! We can now filter on is with the filter() function, also from the dplyr package:

    + filter(first == TRUE)

    For future use, the above two syntaxes can be shortened with the filter_first_isolate() function:

    data_1st <- data %>% 
       filter_first_isolate()
    @@ -482,21 +489,21 @@ 1 -2010-01-31 -L4 +2010-05-23 +E7 B_ESCHR_COL R S -S +R S TRUE 2 -2010-08-01 -L4 +2010-08-03 +E7 B_ESCHR_COL -R +S S S S @@ -504,52 +511,52 @@ 3 -2010-12-29 -L4 +2011-01-20 +E7 B_ESCHR_COL -I -S R +R +S S FALSE 4 -2011-01-21 -L4 +2011-02-21 +E7 B_ESCHR_COL -R -I S +R +R S FALSE 5 -2011-02-06 -L4 +2011-08-04 +E7 B_ESCHR_COL -I S -R +S +S S TRUE 6 -2011-05-30 -L4 +2011-11-15 +E7 B_ESCHR_COL -S -I R S +S +S FALSE 7 -2011-08-16 -L4 +2012-01-13 +E7 B_ESCHR_COL S S @@ -559,8 +566,8 @@ 8 -2012-01-15 -L4 +2012-03-10 +E7 B_ESCHR_COL S S @@ -570,22 +577,22 @@ 9 -2014-07-16 -L4 +2012-11-09 +E7 B_ESCHR_COL S S -S +R S TRUE 10 -2014-09-19 -L4 +2013-04-06 +E7 B_ESCHR_COL S -R +S S S FALSE @@ -595,8 +602,8 @@

    Only 3 isolates are marked as ‘first’ according to CLSI guideline. But when reviewing the antibiogram, it is obvious that some isolates are absolutely different strains and show be included too. This is why we weigh isolates, based on their antibiogram. The key_antibiotics() function adds a vector with 18 key antibiotics: 6 broad spectrum ones, 6 small spectrum for Gram negatives and 6 small spectrum for Gram positives. These can be defined by the user.

    If a column exists with a name like ‘key(…)ab’ the first_isolate() function will automatically use it and determine the first weighted isolates. Mind the NOTEs in below output:

    data <- data %>% 
    -  mutate(keyab = key_antibiotics(.)) %>% 
    -  mutate(first_weighted = first_isolate(.))
    +  mutate(keyab = key_antibiotics(.)) %>% 
    +  mutate(first_weighted = first_isolate(.))
     # NOTE: Using column `bacteria` as input for `col_mo`.
     #   amox   amcl   cipr   gent 
     # "amox" "amcl" "cipr" "gent" 
    @@ -608,7 +615,7 @@
     # NOTE: Using column `patient_id` as input for `col_patient_id`.
     # NOTE: Using column `keyab` as input for `col_keyantibiotics`. Use col_keyantibiotics = FALSE to prevent this.
     # [Criterion] Inclusion based on key antibiotics, ignoring I.
    -# => Found 4,435 first weighted isolates (88.7% of total)
    +# => Found 4,399 first weighted isolates (88.0% of total) @@ -625,80 +632,80 @@ - - + + - + - - + + - + - + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - + - - + + @@ -709,8 +716,8 @@ - - + + @@ -721,23 +728,23 @@ - - + + - + - - + + - + @@ -745,14 +752,14 @@
    isolate
    12010-01-31L42010-05-23E7 B_ESCHR_COL R SSR S TRUE TRUE
    22010-08-01L42010-08-03E7 B_ESCHR_COLRS S S S FALSEFALSETRUE
    32010-12-29L42011-01-20E7 B_ESCHR_COLIS RRS S FALSE TRUE
    42011-01-21L42011-02-21E7 B_ESCHR_COLRI SRR S FALSE TRUE
    52011-02-06L42011-08-04E7 B_ESCHR_COLI SRSS S TRUE TRUE
    62011-05-30L42011-11-15E7 B_ESCHR_COLSI R SSS FALSEFALSETRUE
    72011-08-16L42012-01-13E7 B_ESCHR_COL S S
    82012-01-15L42012-03-10E7 B_ESCHR_COL S S
    92014-07-16L42012-11-09E7 B_ESCHR_COL S SSR S TRUE TRUE
    102014-09-19L42013-04-06E7 B_ESCHR_COL SRS S S FALSE
    -

    Instead of 3, now 8 isolates are flagged. In total, 88.7% of all isolates are marked ‘first weighted’ - 147.3% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.

    +

    Instead of 3, now 10 isolates are flagged. In total, 88% of all isolates are marked ‘first weighted’ - 147.2% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.

    As with filter_first_isolate(), there’s a shortcut for this new algorithm too:

    data_1st <- data %>% 
       filter_first_weighted_isolate()
    -

    So we end up with 4,435 isolates for analysis.

    +

    So we end up with 4,399 isolates for analysis.

    We can remove unneeded columns:

    data_1st <- data_1st %>% 
    -  select(-c(first, keyab))
    + select(-c(first, keyab))

    Now our data looks like:

    head(data_1st)
    @@ -775,43 +782,11 @@ - - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -822,30 +797,30 @@ - - - - + + + + - + - + - - - - + + + + - + @@ -854,15 +829,47 @@ - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -884,7 +891,7 @@
    data_1st %>% freq(genus, species)

    Frequency table of genus and species
    Columns: 2
    -Length: 4,435 (of which NA: 0 = 0.00%)
    +Length: 4,399 (of which NA: 0 = 0.00%)
    Unique: 4

    Shortest: 16
    Longest: 24

    @@ -901,33 +908,33 @@ Longest: 24

    - - - - + + + + - - - - + + + + - - - - + + + + - - - + + + @@ -937,11 +944,11 @@ Longest: 24

    Resistance percentages

    The functions portion_R, portion_RI, portion_I, portion_IS and portion_S can be used to determine the portion of a specific antimicrobial outcome. They can be used on their own:

    data_1st %>% portion_IR(amox)
    -# [1] 0.4617813
    -

    Or can be used in conjuction with group_by() and summarise(), both from the dplyr package:

    +# [1] 0.4819277 +

    Or can be used in conjuction with group_by() and summarise(), both from the dplyr package:

    data_1st %>% 
    -  group_by(hospital) %>% 
    -  summarise(amoxicillin = portion_IR(amox))
    + group_by(hospital) %>% + summarise(amoxicillin =portion_IR(amox))
    12011-02-27M6Hospital C2014-02-02P8Hospital D B_ESCHR_COL SSRSMGram negativeEscherichiacoliTRUE
    22012-07-12C2Hospital BB_STRPTC_PNESRSRMGram positiveStreptococcuspneumoniaeTRUE
    32016-09-13O7Hospital AB_ESCHR_COLR R S STRUE
    52017-10-05M1Hospital A22013-10-26Q1Hospital B B_ESCHR_COLR I RR SMF Gram negative Escherichia coli TRUE
    62012-02-22H9Hospital C32017-06-12E5Hospital D B_STRPTC_PNE SS RS R M Gram positiveTRUE
    72011-01-13S6Hospital D42013-06-16K7Hospital BB_ESCHR_COLSISSMGram negativeEscherichiacoliTRUE
    52013-01-11M4Hospital B B_STPHY_AUR I S S SMGram positiveStaphylococcusaureusTRUE
    72012-01-05S10Hospital DB_STPHY_AURRIRS F Gram positive Staphylococcus
    1 Escherichia coli2,20349.7%2,20349.7%2,13848.6%2,13848.6%
    2 Staphylococcus aureus1,07224.2%3,27573.8%1,07024.3%3,20872.9%
    3 Streptococcus pneumoniae68915.5%3,96489.4%69715.8%3,90588.8%
    4 Klebsiella pneumoniae47110.6%4,43549411.2%4,399 100.0%
    @@ -950,26 +957,26 @@ Longest: 24

    - + - + - + - +
    hospital
    Hospital A0.45041640.4591382
    Hospital B0.47820340.5000000
    Hospital C0.46705710.4682171
    Hospital D0.44711010.4953380
    -

    Of course it would be very convenient to know the number of isolates responsible for the percentages. For that purpose the n_rsi() can be used, which works exactly like n_distinct() from the dplyr package. It counts all isolates available for every group (i.e. values S, I or R):

    +

    Of course it would be very convenient to know the number of isolates responsible for the percentages. For that purpose the n_rsi() can be used, which works exactly like n_distinct() from the dplyr package. It counts all isolates available for every group (i.e. values S, I or R):

    data_1st %>% 
    -  group_by(hospital) %>% 
    -  summarise(amoxicillin = portion_IR(amox),
    +  group_by(hospital) %>% 
    +  summarise(amoxicillin = portion_IR(amox),
                 available = n_rsi(amox))
    @@ -980,30 +987,30 @@ Longest: 24

    - - + + - - + + - - + + - - + +
    Hospital A0.450416413210.45913821346
    Hospital B0.478203415140.50000001550
    Hospital C0.46705716830.4682171645
    Hospital D0.44711019170.4953380858

    These functions can also be used to get the portion of multiple antibiotics, to calculate co-resistance very easily:

    data_1st %>% 
    -  group_by(genus) %>% 
    -  summarise(amoxicillin = portion_S(amcl),
    +  group_by(genus) %>% 
    +  summarise(amoxicillin = portion_S(amcl),
                 gentamicin = portion_S(gent),
                 "amox + gent" = portion_S(amcl, gent))
    @@ -1016,37 +1023,37 @@ Longest: 24

    - - - + + + - - - + + + - - - + + + - + - +
    Escherichia0.71629600.90512940.97866550.71936390.91113190.9742750
    Klebsiella0.74946920.91295120.97664540.72267210.90283400.9777328
    Staphylococcus0.73414180.92444030.97761190.73925230.90841120.9831776
    Streptococcus0.75761970.7532281 0.00000000.75761970.7532281

    To make a transition to the next part, let’s see how this difference could be plotted:

    data_1st %>% 
    -  group_by(genus) %>% 
    -  summarise("1. Amoxicillin" = portion_S(amcl),
    +  group_by(genus) %>% 
    +  summarise("1. Amoxicillin" = portion_S(amcl),
                 "2. Gentamicin" = portion_S(gent),
                 "3. Amox + gent" = portion_S(amcl, gent)) %>% 
    -  tidyr::gather("Antibiotic", "S", -genus) %>%
    +  tidyr::gather("Antibiotic", "S", -genus) %>%
       ggplot(aes(x = genus,
                  y = S,
                  fill = Antibiotic)) +
    @@ -1076,7 +1083,7 @@ Longest: 24

    Omit the translate_ab = FALSE to have the antibiotic codes (amox, amcl, cipr, gent) translated to official WHO names (amoxicillin, amoxicillin and betalactamase inhibitor, ciprofloxacin, gentamicin).

    If we group on e.g. the genus column and add some additional functions from our package, we can create this:

    # group the data on `genus`
    -ggplot(data_1st %>% group_by(genus)) + 
    +ggplot(data_1st %>% group_by(genus)) + 
       # create bars with genus on x axis
       # it looks for variables with class `rsi`,
       # of which we have 4 (earlier created with `as.rsi`)
    @@ -1098,7 +1105,7 @@ Longest: 24

    To simplify this, we also created the ggplot_rsi() function, which combines almost all above functions:

    data_1st %>% 
    -  group_by(genus) %>%
    +  group_by(genus) %>%
       ggplot_rsi(x = "genus",
                  facet = "Antibiotic",
                  breaks = 0:4 * 25,
    @@ -1132,12 +1139,12 @@ Longest: 24

    We can transform the data and apply the test in only a couple of lines:

    septic_patients %>%
    -  filter(hospital_id %in% c("A", "D")) %>% # filter on only hospitals A and D
    -  select(hospital_id, fosf) %>%            # select the hospitals and fosfomycin
    -  group_by(hospital_id) %>%                # group on the hospitals
    +  filter(hospital_id %in% c("A", "D")) %>% # filter on only hospitals A and D
    +  select(hospital_id, fosf) %>%            # select the hospitals and fosfomycin
    +  group_by(hospital_id) %>%                # group on the hospitals
       count_df(combine_IR = TRUE) %>%          # count all isolates per group (hospital_id)
    -  tidyr::spread(hospital_id, Value) %>%    # transform output so A and D are columns
    -  select(A, D) %>%                         # and select these only
    +  tidyr::spread(hospital_id, Value) %>%    # transform output so A and D are columns
    +  select(A, D) %>%                         # and select these only
       as.matrix() %>%                          # transform to good old matrix for fisher.test()
       fisher.test()                            # do Fisher's Exact Test
     # 
    diff --git a/docs/articles/AMR_files/figure-html/plot 1-1.png b/docs/articles/AMR_files/figure-html/plot 1-1.png
    index f9b2d55e..c9ff2753 100644
    Binary files a/docs/articles/AMR_files/figure-html/plot 1-1.png and b/docs/articles/AMR_files/figure-html/plot 1-1.png differ
    diff --git a/docs/articles/AMR_files/figure-html/plot 3-1.png b/docs/articles/AMR_files/figure-html/plot 3-1.png
    index 265ee49b..6bc000b5 100644
    Binary files a/docs/articles/AMR_files/figure-html/plot 3-1.png and b/docs/articles/AMR_files/figure-html/plot 3-1.png differ
    diff --git a/docs/articles/AMR_files/figure-html/plot 4-1.png b/docs/articles/AMR_files/figure-html/plot 4-1.png
    index eecd78d2..53cfc234 100644
    Binary files a/docs/articles/AMR_files/figure-html/plot 4-1.png and b/docs/articles/AMR_files/figure-html/plot 4-1.png differ
    diff --git a/docs/articles/AMR_files/figure-html/plot 5-1.png b/docs/articles/AMR_files/figure-html/plot 5-1.png
    index 882d3802..5cc23525 100644
    Binary files a/docs/articles/AMR_files/figure-html/plot 5-1.png and b/docs/articles/AMR_files/figure-html/plot 5-1.png differ
    diff --git a/docs/articles/EUCAST.html b/docs/articles/EUCAST.html
    index 4f91ef91..48d7603c 100644
    --- a/docs/articles/EUCAST.html
    +++ b/docs/articles/EUCAST.html
    @@ -111,6 +111,13 @@
             Use the G-test
           
         
  • +
  • + + + + Other: benchmarks + +
  • @@ -171,7 +178,7 @@

    How to apply EUCAST rules

    Matthijs S. Berends

    -

    08 January 2019

    +

    11 January 2019

    diff --git a/docs/articles/G_test.html b/docs/articles/G_test.html index 184ca5e6..fb6b4173 100644 --- a/docs/articles/G_test.html +++ b/docs/articles/G_test.html @@ -111,6 +111,13 @@ Use the G-test
  • +
  • + + + + Other: benchmarks + +
  • @@ -171,7 +178,7 @@

    How to use the G-test

    Matthijs S. Berends

    -

    08 January 2019

    +

    11 January 2019

    diff --git a/docs/articles/Predict.html b/docs/articles/Predict.html index 26cd4754..7cbb6fbd 100644 --- a/docs/articles/Predict.html +++ b/docs/articles/Predict.html @@ -111,6 +111,13 @@ Use the G-test
  • +
  • + + + + Other: benchmarks + +
  • @@ -171,7 +178,7 @@

    How to predict antimicrobial resistance

    Matthijs S. Berends

    -

    08 January 2019

    +

    11 January 2019

    diff --git a/docs/articles/ab_property.html b/docs/articles/ab_property.html index e7d88cbe..85478e42 100644 --- a/docs/articles/ab_property.html +++ b/docs/articles/ab_property.html @@ -111,6 +111,13 @@ Use the G-test
  • +
  • + + + + Other: benchmarks + +
  • @@ -171,7 +178,7 @@

    How to get properties of an antibiotic

    Matthijs S. Berends

    -

    08 January 2019

    +

    11 January 2019

    diff --git a/docs/articles/benchmarks.html b/docs/articles/benchmarks.html new file mode 100644 index 00000000..a3249e69 --- /dev/null +++ b/docs/articles/benchmarks.html @@ -0,0 +1,371 @@ + + + + + + + +Benchmarks • AMR (for R) + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    +
    + + + + +

    One of the most important features of this package is the complete microbial taxonomic database, supplied by ITIS (https://www.itis.gov). We created a function as.mo() that transforms any user input value to a valid microbial ID by using AI (Artificial Intelligence) and based on the taxonomic tree of ITIS.

    +

    Using the microbenchmark package, we can review the calculation performance of this function.

    +
    library(microbenchmark)
    +

    In the next test, we try to ‘coerce’ different input values for Staphylococcus aureus. The actual result is the same every time: it returns its MO code B_STPHY_AUR (B stands for Bacteria, the taxonomic kingdom).

    +

    But the calculation time differs a lot. Here, the AI effect can be reviewed best:

    + +

    In the table above, all measurements are shown in milliseconds (thousands of seconds), tested on a quite regular Linux server from 2007 (Core 2 Duo 2.7 GHz, 2 GB DDR2 RAM). A value of 6.9 milliseconds means it will roughly determine 144 input values per second. It case of 39.2 milliseconds, this is only 26 input values per second. The more an input value resembles a full name (like C, D and F), the faster the result will be found. In case of G, the input is already a valid MO code, so it only almost takes no time at all (0.0001 seconds on our server).

    +

    To achieve this speed, the as.mo function also takes into account the prevalence of human pathogenic microorganisms. The downside is of course that less prevalent microorganisms will be determined far less faster. See this example for the ID of Burkholderia nodosa (B_BRKHL_NOD):

    + +

    That takes up to 11 times as much time! A value of 158.4 milliseconds means it can only determine ~6 different input values per second. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance.

    +

    To relieve this pitfall and further improve performance, two important calculations take almost no time at all: repetitive results and already precalculated results.

    +
    +

    +Repetitive results

    +

    Repetitive results mean that unique values are present more than once. Unique values will only be calculated once by as.mo(). We will use mo_fullname() for this test - a helper function that returns the full microbial name (genus, species and possibly subspecies) and uses as.mo() internally.

    + +

    So transforming 500,000 values (!) of 96 unique values only takes 0.12 seconds (120 ms). You only lose time on your unique input values.

    +

    Results of a tenfold - 5,000,000 values:

    + +

    Even the full names of 5 Million values are calculated within a second.

    +
    +
    +

    +Precalculated results

    +

    What about precalculated results? If the input is an already precalculated result of a helper function like mo_fullname(), it almost doesn’t take any time at all (see ‘C’ below):

    + +

    So going from mo_fullname("Staphylococcus aureus") to "Staphylococcus aureus" takes 0.0001 seconds - it doesn’t even start calculating if the result would be the same as the expected resulting value. That goes for all helper functions:

    + +

    Of course, when running mo_phylum("Firmicutes") the function has zero knowledge about the actual microorganism, namely S. aureus. But since the result would be "Firmicutes" too, there is no point in calculating the result. And because this package ‘knows’ all phyla of all known microorganisms (according to ITIS), it can just return the initial value immediately.

    +
    +
    +

    +Results in other languages

    +

    When the system language is non-English and supported by this AMR package, some functions take a little while longer:

    + +

    Currently supported are German, Dutch, Spanish, Italian, French and Portuguese.

    +
    +
    + + + +
    + + + +
    + + + + + diff --git a/docs/articles/freq.html b/docs/articles/freq.html index eb3ca3d8..656b19d0 100644 --- a/docs/articles/freq.html +++ b/docs/articles/freq.html @@ -111,6 +111,13 @@ Use the G-test
  • +
  • + + + + Other: benchmarks + +
  • @@ -171,7 +178,7 @@

    How to create frequency tables

    Matthijs S. Berends

    -

    08 January 2019

    +

    11 January 2019

    diff --git a/docs/articles/index.html b/docs/articles/index.html index 3742304a..329fd1bd 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -149,6 +149,13 @@ Use the G-test
  • +
  • + + + + Other: benchmarks + +
  • @@ -220,6 +227,7 @@
  • How to use the *G*-test
  • How to predict antimicrobial resistance
  • How to get properties of an antibiotic
  • +
  • Benchmarks
  • How to create frequency tables
  • How to get properties of a microorganism
  • diff --git a/docs/articles/mo_property.html b/docs/articles/mo_property.html index 4d794a18..8c22946d 100644 --- a/docs/articles/mo_property.html +++ b/docs/articles/mo_property.html @@ -111,6 +111,13 @@ Use the G-test +
  • + + + + Other: benchmarks + +
  • @@ -171,7 +178,7 @@

    How to get properties of a microorganism

    Matthijs S. Berends

    -

    08 January 2019

    +

    11 January 2019

    diff --git a/docs/authors.html b/docs/authors.html index 58b38cd1..eeddf4b3 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -149,6 +149,13 @@ Use the G-test
  • +
  • + + + + Other: benchmarks + +
  • diff --git a/docs/extra.css b/docs/extra.css index df38f6b5..2c0d74ab 100644 --- a/docs/extra.css +++ b/docs/extra.css @@ -50,6 +50,7 @@ a, a:hover, a:focus { } a[href], a[href]:hover, a[href]:focus, code a[href], code a[href]:hover, code a[href]:focus, +a[href] code, a[href] code:hover, a[href] code:focus, pre a[href], pre a[href]:hover, pre a[href]:focus, a code[href], a code[href]:hover, a code[href]:focus, a pre[href], a pre[href]:hover, a pre[href]:focus { @@ -139,3 +140,9 @@ thead { tbody { border-bottom: 2px solid black; } +table a:not(.btn), .table a:not(.btn) { + text-decoration: inherit; +} +table a:not(.btn):hover, .table a:not(.btn):hover { + text-decoration: underline; +} diff --git a/docs/index.html b/docs/index.html index b5501c45..66eca03e 100644 --- a/docs/index.html +++ b/docs/index.html @@ -113,6 +113,13 @@ Use the G-test
  • +
  • + + + + Other: benchmarks + +
  • @@ -228,7 +235,8 @@ Short introduction

    This package contains the complete microbial taxonomic data (with all nine taxonomic ranks - from kingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov).

    -

    All (sub)species from the taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. Furthermore, the responsible authors and year of publication are available. This allows users to use authoritative taxonomic information for their data analysis on any microorganism, not only human pathogens. It also helps to quickly determine the Gram stain of bacteria, since all bacteria are classified into subkingdom Negibacteria or Posibacteria. ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists.

    +

    All (sub)species from the taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. Furthermore, the responsible authors and year of publication are available. This allows users to use authoritative taxonomic information for their data analysis on any microorganism, not only human pathogens. It also helps to quickly determine the Gram stain of bacteria, since all bacteria are classified into subkingdom Negibacteria or Posibacteria.

    +

    Read more about ITIS in our manual.

    The AMR package basically does four important things:

    1. diff --git a/docs/news/index.html b/docs/news/index.html index 87fcccc4..49aaec0b 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -149,6 +149,13 @@ Use the G-test
    2. +
    3. + + + + Other: benchmarks + +
    4. @@ -229,7 +236,7 @@
    5. Support for dplyr version 0.8.0
    6. -
    7. Function guess_ab to find an antibiotic column in a table
    8. +
    9. Function guess_ab_col to find an antibiotic column in a table
    10. Function mo_failures() to review values that could not be coerced to a valid MO code, using as.mo(). This latter function will now only show a maximum of 25 uncoerced values.
    11. Function mo_renamed() to get a list of all returned values from as.mo() that have had taxonomic renaming
    12. Function age() to calculate the (patients) age in years
    13. diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 1f121089..31227eab 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -7,6 +7,7 @@ articles: G_test: G_test.html Predict: Predict.html ab_property: ab_property.html + benchmarks: benchmarks.html freq: freq.html mo_property: mo_property.html urls: diff --git a/docs/reference/AMR-deprecated.html b/docs/reference/AMR-deprecated.html index 2797d1a9..a3ad0f9a 100644 --- a/docs/reference/AMR-deprecated.html +++ b/docs/reference/AMR-deprecated.html @@ -151,6 +151,13 @@ Use the G-test +
    14. + + + + Other: benchmarks + +
    15. diff --git a/docs/reference/AMR.html b/docs/reference/AMR.html index f90f8804..6e5b904b 100644 --- a/docs/reference/AMR.html +++ b/docs/reference/AMR.html @@ -151,6 +151,13 @@ Use the G-test
    16. +
    17. + + + + Other: benchmarks + +
    18. diff --git a/docs/reference/ITIS.html b/docs/reference/ITIS.html index 67f4bc3b..8463d5f7 100644 --- a/docs/reference/ITIS.html +++ b/docs/reference/ITIS.html @@ -151,6 +151,13 @@ Use the G-test
    19. +
    20. + + + + Other: benchmarks + +
    21. diff --git a/docs/reference/ab_property.html b/docs/reference/ab_property.html index 881ae1ec..3b186ce0 100644 --- a/docs/reference/ab_property.html +++ b/docs/reference/ab_property.html @@ -151,6 +151,13 @@ Use the G-test
    22. +
    23. + + + + Other: benchmarks + +
    24. diff --git a/docs/reference/abname.html b/docs/reference/abname.html index 07508cdd..2cb1232f 100644 --- a/docs/reference/abname.html +++ b/docs/reference/abname.html @@ -151,6 +151,13 @@ Use the G-test
    25. +
    26. + + + + Other: benchmarks + +
    27. diff --git a/docs/reference/age.html b/docs/reference/age.html index b8f1053b..f3b014a1 100644 --- a/docs/reference/age.html +++ b/docs/reference/age.html @@ -151,6 +151,13 @@ Use the G-test
    28. +
    29. + + + + Other: benchmarks + +
    30. diff --git a/docs/reference/age_groups.html b/docs/reference/age_groups.html index 2e31aa90..7ab69c23 100644 --- a/docs/reference/age_groups.html +++ b/docs/reference/age_groups.html @@ -151,6 +151,13 @@ Use the G-test
    31. +
    32. + + + + Other: benchmarks + +
    33. @@ -289,11 +296,11 @@ On our website https://msberends.gitla # resistance of ciprofloxacine per age group library(dplyr) septic_patients %>% - mutate(first_isolate = first_isolate(.)) %>% - filter(first_isolate == TRUE, + mutate(first_isolate = first_isolate(.)) %>% + filter(first_isolate == TRUE, mo == as.mo("E. coli")) %>% - group_by(age_group = age_groups(age)) %>% - select(age_group, + group_by(age_group = age_groups(age)) %>% + select(age_group, cipr) %>% ggplot_rsi(x = "age_group") # } diff --git a/docs/reference/antibiotics.html b/docs/reference/antibiotics.html index 032b329e..6fdbc034 100644 --- a/docs/reference/antibiotics.html +++ b/docs/reference/antibiotics.html @@ -151,6 +151,13 @@ Use the G-test
    34. +
    35. + + + + Other: benchmarks + +
    36. diff --git a/docs/reference/as.atc.html b/docs/reference/as.atc.html index 4f7041d3..8b2bbe7f 100644 --- a/docs/reference/as.atc.html +++ b/docs/reference/as.atc.html @@ -151,6 +151,13 @@ Use the G-test
    37. +
    38. + + + + Other: benchmarks + +
    39. diff --git a/docs/reference/as.mic.html b/docs/reference/as.mic.html index 6b1fa77a..29a21aeb 100644 --- a/docs/reference/as.mic.html +++ b/docs/reference/as.mic.html @@ -151,6 +151,13 @@ Use the G-test
    40. +
    41. + + + + Other: benchmarks + +
    42. diff --git a/docs/reference/as.mo.html b/docs/reference/as.mo.html index 40722c4f..e3a5f22d 100644 --- a/docs/reference/as.mo.html +++ b/docs/reference/as.mo.html @@ -151,6 +151,13 @@ Use the G-test
    43. +
    44. + + + + Other: benchmarks + +
    45. @@ -357,16 +364,16 @@ The mo_property functions (like # the select function of tidyverse is also supported: library(dplyr) df$mo <- df %>% - select(microorganism_name) %>% + select(microorganism_name) %>% as.mo() # and can even contain 2 columns, which is convenient for genus/species combinations: df$mo <- df %>% - select(genus, species) %>% + select(genus, species) %>% as.mo() # although this works easier and does the same: df <- df %>% - mutate(mo = as.mo(paste(genus, species))) + mutate(mo = as.mo(paste(genus, species))) # }
    46. +
    47. + + + + Other: benchmarks + +
    48. @@ -270,11 +277,11 @@ On our website https://msberends.gitla # using dplyr's mutate library(dplyr) septic_patients %>% - mutate_at(vars(peni:rifa), as.rsi) + mutate_at(vars(peni:rifa), as.rsi) # fastest way to transform all columns with already valid AB results to class `rsi`: septic_patients %>% - mutate_if(is.rsi.eligible, + mutate_if(is.rsi.eligible, as.rsi) # } diff --git a/docs/reference/atc_property.html b/docs/reference/atc_property.html index 68bb0b71..1850ce54 100644 --- a/docs/reference/atc_property.html +++ b/docs/reference/atc_property.html @@ -151,6 +151,13 @@ Use the G-test
    49. +
    50. + + + + Other: benchmarks + +
    51. diff --git a/docs/reference/count.html b/docs/reference/count.html index be6540b8..0b1af0a6 100644 --- a/docs/reference/count.html +++ b/docs/reference/count.html @@ -152,6 +152,13 @@ count_R and count_IR can be used to count resistant isolates, count_S and count_ Use the G-test
    52. +
    53. + + + + Other: benchmarks + +
    54. @@ -275,7 +282,7 @@ count_R and count_IR can be used to count resistant isolates, count_S and count_

      Details

      These functions are meant to count isolates. Use the portion_* functions to calculate microbial resistance.

      -

      n_rsi is an alias of count_all. They can be used to count all available isolates, i.e. where all input antibiotics have an available result (S, I or R). Their use is equal to n_distinct. Their function is equal to count_S(...) + count_IR(...).

      +

      n_rsi is an alias of count_all. They can be used to count all available isolates, i.e. where all input antibiotics have an available result (S, I or R). Their use is equal to n_distinct. Their function is equal to count_S(...) + count_IR(...).

      count_df takes any variable from data that has an "rsi" class (created with as.rsi) and counts the amounts of R, I and S. The resulting tidy data (see Source) data.frame will have three rows (S/I/R) and a column for each variable with class "rsi".

      Read more on our website!

      @@ -314,13 +321,13 @@ On our website https://msberends.gitla library(dplyr) septic_patients %>% - group_by(hospital_id) %>% - summarise(R = count_R(cipr), + group_by(hospital_id) %>% + summarise(R = count_R(cipr), I = count_I(cipr), S = count_S(cipr), n1 = count_all(cipr), # the actual total; sum of all three n2 = n_rsi(cipr), # same - analogous to n_distinct - total = n()) # NOT the amount of tested isolates! + total = n()) # NOT the amount of tested isolates! # Count co-resistance between amoxicillin/clav acid and gentamicin, # so we can see that combination therapy does a lot more than mono therapy. @@ -338,13 +345,13 @@ On our website https://msberends.gitla # Get portions S/I/R immediately of all rsi columns septic_patients %>% - select(amox, cipr) %>% + select(amox, cipr) %>% count_df(translate = FALSE) # It also supports grouping variables septic_patients %>% - select(hospital_id, amox, cipr) %>% - group_by(hospital_id) %>% + select(hospital_id, amox, cipr) %>% + group_by(hospital_id) %>% count_df(translate = FALSE) # } diff --git a/docs/reference/eucast_rules.html b/docs/reference/eucast_rules.html index 1d287876..062f79e9 100644 --- a/docs/reference/eucast_rules.html +++ b/docs/reference/eucast_rules.html @@ -151,6 +151,13 @@ Use the G-test
    55. +
    56. + + + + Other: benchmarks + +
    57. @@ -222,27 +229,38 @@
      eucast_rules(tbl, col_mo = NULL, info = TRUE,
         rules = c("breakpoints", "expert", "other", "all"), verbose = FALSE,
      -  amcl = guess_ab(), amik = guess_ab(), amox = guess_ab(),
      -  ampi = guess_ab(), azit = guess_ab(), azlo = guess_ab(),
      -  aztr = guess_ab(), cefa = guess_ab(), cfep = guess_ab(),
      -  cfot = guess_ab(), cfox = guess_ab(), cfra = guess_ab(),
      -  cfta = guess_ab(), cftr = guess_ab(), cfur = guess_ab(),
      -  chlo = guess_ab(), cipr = guess_ab(), clar = guess_ab(),
      -  clin = guess_ab(), clox = guess_ab(), coli = guess_ab(),
      -  czol = guess_ab(), dapt = guess_ab(), doxy = guess_ab(),
      -  erta = guess_ab(), eryt = guess_ab(), fosf = guess_ab(),
      -  fusi = guess_ab(), gent = guess_ab(), imip = guess_ab(),
      -  kana = guess_ab(), levo = guess_ab(), linc = guess_ab(),
      -  line = guess_ab(), mero = guess_ab(), mezl = guess_ab(),
      -  mino = guess_ab(), moxi = guess_ab(), nali = guess_ab(),
      -  neom = guess_ab(), neti = guess_ab(), nitr = guess_ab(),
      -  norf = guess_ab(), novo = guess_ab(), oflo = guess_ab(),
      -  oxac = guess_ab(), peni = guess_ab(), pipe = guess_ab(),
      -  pita = guess_ab(), poly = guess_ab(), pris = guess_ab(),
      -  qida = guess_ab(), rifa = guess_ab(), roxi = guess_ab(),
      -  siso = guess_ab(), teic = guess_ab(), tetr = guess_ab(),
      -  tica = guess_ab(), tige = guess_ab(), tobr = guess_ab(),
      -  trim = guess_ab(), trsu = guess_ab(), vanc = guess_ab())
      +  amcl = guess_ab_col(), amik = guess_ab_col(),
      +  amox = guess_ab_col(), ampi = guess_ab_col(),
      +  azit = guess_ab_col(), azlo = guess_ab_col(),
      +  aztr = guess_ab_col(), cefa = guess_ab_col(),
      +  cfep = guess_ab_col(), cfot = guess_ab_col(),
      +  cfox = guess_ab_col(), cfra = guess_ab_col(),
      +  cfta = guess_ab_col(), cftr = guess_ab_col(),
      +  cfur = guess_ab_col(), chlo = guess_ab_col(),
      +  cipr = guess_ab_col(), clar = guess_ab_col(),
      +  clin = guess_ab_col(), clox = guess_ab_col(),
      +  coli = guess_ab_col(), czol = guess_ab_col(),
      +  dapt = guess_ab_col(), doxy = guess_ab_col(),
      +  erta = guess_ab_col(), eryt = guess_ab_col(),
      +  fosf = guess_ab_col(), fusi = guess_ab_col(),
      +  gent = guess_ab_col(), imip = guess_ab_col(),
      +  kana = guess_ab_col(), levo = guess_ab_col(),
      +  linc = guess_ab_col(), line = guess_ab_col(),
      +  mero = guess_ab_col(), mezl = guess_ab_col(),
      +  mino = guess_ab_col(), moxi = guess_ab_col(),
      +  nali = guess_ab_col(), neom = guess_ab_col(),
      +  neti = guess_ab_col(), nitr = guess_ab_col(),
      +  norf = guess_ab_col(), novo = guess_ab_col(),
      +  oflo = guess_ab_col(), oxac = guess_ab_col(),
      +  peni = guess_ab_col(), pipe = guess_ab_col(),
      +  pita = guess_ab_col(), poly = guess_ab_col(),
      +  pris = guess_ab_col(), qida = guess_ab_col(),
      +  rifa = guess_ab_col(), roxi = guess_ab_col(),
      +  siso = guess_ab_col(), teic = guess_ab_col(),
      +  tetr = guess_ab_col(), tica = guess_ab_col(),
      +  tige = guess_ab_col(), tobr = guess_ab_col(),
      +  trim = guess_ab_col(), trsu = guess_ab_col(),
      +  vanc = guess_ab_col())
       
       EUCAST_rules(...)
       
      @@ -300,7 +318,7 @@
           

      Antibiotics

      -

      To define antibiotics column names, leave as it is to determine it automatically with guess_ab or input a text (case-insensitive) or use NULL to skip a column (e.g. tica = NULL). Non-existing columns will anyway be skipped with a warning.

      +

      To define antibiotics column names, leave as it is to determine it automatically with guess_ab_col or input a text (case-insensitive) or use NULL to skip a column (e.g. tica = NULL). Non-existing columns will anyway be skipped with a warning.

      Abbrevations of the column containing antibiotics in the form: abbreviation: generic name (ATC code)

      amcl: amoxicillin+clavulanic acid (J01CR02), amik: amikacin (J01GB06), diff --git a/docs/reference/first_isolate.html b/docs/reference/first_isolate.html index 64174a59..679cb3a7 100644 --- a/docs/reference/first_isolate.html +++ b/docs/reference/first_isolate.html @@ -151,6 +151,13 @@ Use the G-test

    58. +
    59. + + + + Other: benchmarks + +
    60. @@ -361,11 +368,11 @@ On our website https://msberends.gitla library(dplyr) # Filter on first isolates: septic_patients %>% - mutate(first_isolate = first_isolate(., + mutate(first_isolate = first_isolate(., col_date = "date", col_patient_id = "patient_id", col_mo = "mo")) %>% - filter(first_isolate == TRUE) + filter(first_isolate == TRUE) # Which can be shortened to: septic_patients %>% @@ -376,14 +383,14 @@ On our website https://msberends.gitla # Now let's see if first isolates matter: A <- septic_patients %>% - group_by(hospital_id) %>% - summarise(count = n_rsi(gent), # gentamicin availability + group_by(hospital_id) %>% + summarise(count = n_rsi(gent), # gentamicin availability resistance = portion_IR(gent)) # gentamicin resistance B <- septic_patients %>% filter_first_weighted_isolate() %>% # the 1st isolate filter - group_by(hospital_id) %>% - summarise(count = n_rsi(gent), # gentamicin availability + group_by(hospital_id) %>% + summarise(count = n_rsi(gent), # gentamicin availability resistance = portion_IR(gent)) # gentamicin resistance # Have a look at A and B. diff --git a/docs/reference/freq.html b/docs/reference/freq.html index cec7b99f..5d8ea29b 100644 --- a/docs/reference/freq.html +++ b/docs/reference/freq.html @@ -152,6 +152,13 @@ top_freq can be used to get the top/bottom n items of a frequency table, with co Use the G-test
    61. +
    62. + + + + Other: benchmarks + +
    63. @@ -378,34 +385,34 @@ On our website https://msberends.gitla # you could also use `select` or `pull` to get your variables septic_patients %>% - filter(hospital_id == "A") %>% - select(mo) %>% + filter(hospital_id == "A") %>% + select(mo) %>% freq() # multiple selected variables will be pasted together septic_patients %>% left_join_microorganisms %>% - filter(hospital_id == "A") %>% + filter(hospital_id == "A") %>% freq(genus, species) # group a variable and analyse another septic_patients %>% - group_by(hospital_id) %>% + group_by(hospital_id) %>% freq(gender) # get top 10 bugs of hospital A as a vector septic_patients %>% - filter(hospital_id == "A") %>% + filter(hospital_id == "A") %>% freq(mo) %>% top_freq(10) # save frequency table to an object years <- septic_patients %>% - mutate(year = format(date, "%Y")) %>% + mutate(year = format(date, "%Y")) %>% freq(year) @@ -456,11 +463,11 @@ On our website https://msberends.gitla # only get selected columns septic_patients %>% freq(hospital_id) %>% - select(item, percent) + select(item, percent) septic_patients %>% freq(hospital_id) %>% - select(-count, -cum_count) + select(-count, -cum_count) # check differences between frequency tables diff --git a/docs/reference/g.test.html b/docs/reference/g.test.html index 47991858..3d59d670 100644 --- a/docs/reference/g.test.html +++ b/docs/reference/g.test.html @@ -151,6 +151,13 @@ Use the G-test
    64. +
    65. + + + + Other: benchmarks + +
    66. diff --git a/docs/reference/get_locale.html b/docs/reference/get_locale.html index 048803e8..0ae61eab 100644 --- a/docs/reference/get_locale.html +++ b/docs/reference/get_locale.html @@ -151,6 +151,13 @@ Use the G-test
    67. +
    68. + + + + Other: benchmarks + +
    69. diff --git a/docs/reference/ggplot_rsi.html b/docs/reference/ggplot_rsi.html index 3c78957e..b1fad1b3 100644 --- a/docs/reference/ggplot_rsi.html +++ b/docs/reference/ggplot_rsi.html @@ -151,6 +151,13 @@ Use the G-test
    70. +
    71. + + + + Other: benchmarks + +
    72. @@ -327,7 +334,7 @@ On our website https://msberends.gitla library(ggplot2) # get antimicrobial results for drugs against a UTI: -ggplot(septic_patients %>% select(amox, nitr, fosf, trim, cipr)) + +ggplot(septic_patients %>% select(amox, nitr, fosf, trim, cipr)) + geom_rsi() # prettify the plot using some additional functions: @@ -341,17 +348,17 @@ On our website https://msberends.gitla # or better yet, simplify this using the wrapper function - a single command: septic_patients %>% - select(amox, nitr, fosf, trim, cipr) %>% + select(amox, nitr, fosf, trim, cipr) %>% ggplot_rsi() # get only portions and no counts: septic_patients %>% - select(amox, nitr, fosf, trim, cipr) %>% + select(amox, nitr, fosf, trim, cipr) %>% ggplot_rsi(fun = portion_df) # add other ggplot2 parameters as you like: septic_patients %>% - select(amox, nitr, fosf, trim, cipr) %>% + select(amox, nitr, fosf, trim, cipr) %>% ggplot_rsi(width = 0.5, colour = "black", size = 1, @@ -360,25 +367,25 @@ On our website https://msberends.gitla # resistance of ciprofloxacine per age group septic_patients %>% - mutate(first_isolate = first_isolate(.)) %>% - filter(first_isolate == TRUE, + mutate(first_isolate = first_isolate(.)) %>% + filter(first_isolate == TRUE, mo == as.mo("E. coli")) %>% # `age_group` is also a function of this package: - group_by(age_group = age_groups(age)) %>% - select(age_group, + group_by(age_group = age_groups(age)) %>% + select(age_group, cipr) %>% ggplot_rsi(x = "age_group") # }# NOT RUN { # for colourblind mode, use divergent colours from the viridis package: septic_patients %>% - select(amox, nitr, fosf, trim, cipr) %>% + select(amox, nitr, fosf, trim, cipr) %>% ggplot_rsi() + scale_fill_viridis_d() # it also supports groups (don't forget to use the group var on `x` or `facet`): septic_patients %>% - select(hospital_id, amox, nitr, fosf, trim, cipr) %>% - group_by(hospital_id) %>% + select(hospital_id, amox, nitr, fosf, trim, cipr) %>% + group_by(hospital_id) %>% ggplot_rsi(x = hospital_id, facet = Antibiotic, nrow = 1) + @@ -388,22 +395,22 @@ On our website https://msberends.gitla # genuine analysis: check 2 most prevalent microorganisms septic_patients %>% # create new bacterial ID's, with all CoNS under the same group (Becker et al.) - mutate(mo = as.mo(mo, Becker = TRUE)) %>% + mutate(mo = as.mo(mo, Becker = TRUE)) %>% # filter on top three bacterial ID's - filter(mo %in% top_freq(freq(.$mo), 3)) %>% + filter(mo %in% top_freq(freq(.$mo), 3)) %>% # determine first isolates - mutate(first_isolate = first_isolate(., + mutate(first_isolate = first_isolate(., col_date = "date", col_patient_id = "patient_id", col_mo = "mo")) %>% # filter on first isolates - filter(first_isolate == TRUE) %>% + filter(first_isolate == TRUE) %>% # get short MO names (like "E. coli") - mutate(mo = mo_shortname(mo, Becker = TRUE)) %>% + mutate(mo = mo_shortname(mo, Becker = TRUE)) %>% # select this short name and some antiseptic drugs - select(mo, cfur, gent, cipr) %>% + select(mo, cfur, gent, cipr) %>% # group by MO - group_by(mo) %>% + group_by(mo) %>% # plot the thing, putting MOs on the facet ggplot_rsi(x = Antibiotic, facet = mo, diff --git a/docs/reference/guess_ab.html b/docs/reference/guess_ab.html index ced585eb..ddeb079a 100644 --- a/docs/reference/guess_ab.html +++ b/docs/reference/guess_ab.html @@ -151,6 +151,13 @@ Use the G-test
    73. +
    74. + + + + Other: benchmarks + +
    75. diff --git a/docs/reference/guess_ab_col.html b/docs/reference/guess_ab_col.html new file mode 100644 index 00000000..d604a006 --- /dev/null +++ b/docs/reference/guess_ab_col.html @@ -0,0 +1,316 @@ + + + + + + + + +Guess antibiotic column — guess_ab_col • AMR (for R) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      +
      + + + +
      + +
      +
      + + +
      + +

      This tries to find a column name in a data set based on information from the antibiotics data set. You can look for an antibiotic (trade) of abbreviation and it will search the data for any column containing a name or ATC code of that antibiotic.

      + +
      + +
      guess_ab_col(tbl = NULL, col = NULL, verbose = FALSE)
      + +

      Arguments

      + + + + + + + + + + + + + + +
      tbl

      a data.frame

      col

      a character to look for

      verbose

      a logical to indicate whether additional info should be printed

      + +

      Read more on our website!

      + + +


      +On our website https://msberends.gitlab.io/AMR you can find a omprehensive tutorial about how to conduct AMR analysis and find the complete documentation of all functions, which reads a lot easier than in R.

      + + +

      Examples

      +
      # NOT RUN {
      +df <- data.frame(amox = "S",
      +                 tetr = "R")
      +
      +guess_ab_col(df, "amoxicillin")
      +# [1] "amox"
      +guess_ab_col(df, "J01AA07") # ATC code of Tetracycline
      +# [1] "tetr"
      +
      +guess_ab_col(df, "J01AA07", verbose = TRUE)
      +# using column `tetr` for col "J01AA07"
      +# [1] "tetr"
      +# }
      +
      + +
      + + +
      + + + + + + + + + diff --git a/docs/reference/index.html b/docs/reference/index.html index c298ed40..a85507b4 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -149,6 +149,13 @@ Use the G-test
    76. +
    77. + + + + Other: benchmarks + +
    78. @@ -222,7 +229,7 @@

      Background information

      -

      Some pages about our package and its external sources.

      +

      Some pages about our package and its external sources. Be sure to read our How To’s for more information about how to work with functions in this package.

      @@ -278,7 +285,7 @@ -

      guess_ab()

      +

      guess_ab_col()

      Guess antibiotic column

      @@ -478,7 +485,7 @@

      Other

      -

      +

      These functions are mostly for internal use, but some of them may also be suitable for your analysis. Especially the ‘like’ function can be useful: if (x %like% y) {...}.

      diff --git a/docs/reference/join.html b/docs/reference/join.html index 7a17757a..0df6d4d2 100644 --- a/docs/reference/join.html +++ b/docs/reference/join.html @@ -151,6 +151,13 @@ Use the G-test
    79. +
    80. + + + + Other: benchmarks + +
    81. diff --git a/docs/reference/key_antibiotics.html b/docs/reference/key_antibiotics.html index a733453b..ca20ed4e 100644 --- a/docs/reference/key_antibiotics.html +++ b/docs/reference/key_antibiotics.html @@ -151,6 +151,13 @@ Use the G-test
    82. +
    83. + + + + Other: benchmarks + +
    84. @@ -220,17 +227,20 @@ -
      key_antibiotics(tbl, col_mo = NULL, universal_1 = guess_ab(tbl,
      -  "amox"), universal_2 = guess_ab(tbl, "amcl"),
      -  universal_3 = guess_ab(tbl, "cfur"), universal_4 = guess_ab(tbl,
      -  "pita"), universal_5 = guess_ab(tbl, "cipr"),
      -  universal_6 = guess_ab(tbl, "trsu"), GramPos_1 = guess_ab(tbl,
      -  "vanc"), GramPos_2 = guess_ab(tbl, "teic"), GramPos_3 = guess_ab(tbl,
      -  "tetr"), GramPos_4 = guess_ab(tbl, "eryt"), GramPos_5 = guess_ab(tbl,
      -  "oxac"), GramPos_6 = guess_ab(tbl, "rifa"), GramNeg_1 = guess_ab(tbl,
      -  "gent"), GramNeg_2 = guess_ab(tbl, "tobr"), GramNeg_3 = guess_ab(tbl,
      -  "coli"), GramNeg_4 = guess_ab(tbl, "cfot"), GramNeg_5 = guess_ab(tbl,
      -  "cfta"), GramNeg_6 = guess_ab(tbl, "mero"), warnings = TRUE, ...)
      +    
      key_antibiotics(tbl, col_mo = NULL, universal_1 = guess_ab_col(tbl,
      +  "amox"), universal_2 = guess_ab_col(tbl, "amcl"),
      +  universal_3 = guess_ab_col(tbl, "cfur"),
      +  universal_4 = guess_ab_col(tbl, "pita"),
      +  universal_5 = guess_ab_col(tbl, "cipr"),
      +  universal_6 = guess_ab_col(tbl, "trsu"),
      +  GramPos_1 = guess_ab_col(tbl, "vanc"), GramPos_2 = guess_ab_col(tbl,
      +  "teic"), GramPos_3 = guess_ab_col(tbl, "tetr"),
      +  GramPos_4 = guess_ab_col(tbl, "eryt"), GramPos_5 = guess_ab_col(tbl,
      +  "oxac"), GramPos_6 = guess_ab_col(tbl, "rifa"),
      +  GramNeg_1 = guess_ab_col(tbl, "gent"), GramNeg_2 = guess_ab_col(tbl,
      +  "tobr"), GramNeg_3 = guess_ab_col(tbl, "coli"),
      +  GramNeg_4 = guess_ab_col(tbl, "cfot"), GramNeg_5 = guess_ab_col(tbl,
      +  "cfta"), GramNeg_6 = guess_ab_col(tbl, "mero"), warnings = TRUE, ...)
       
       key_antibiotics_equal(x, y, type = c("keyantibiotics", "points"),
         ignore_I = TRUE, points_threshold = 2, info = FALSE)
      @@ -248,15 +258,15 @@ universal_1, universal_2, universal_3, universal_4, universal_5, universal_6 -

      column names of broad-spectrum antibiotics, case-insensitive

      +

      column names of broad-spectrum antibiotics, case-insensitive. At default, the columns containing these antibiotics will be guessed with guess_ab_col.

      GramPos_1, GramPos_2, GramPos_3, GramPos_4, GramPos_5, GramPos_6 -

      column names of antibiotics for Gram positives, case-insensitive

      +

      column names of antibiotics for Gram positives, case-insensitive. At default, the columns containing these antibiotics will be guessed with guess_ab_col.

      GramNeg_1, GramNeg_2, GramNeg_3, GramNeg_4, GramNeg_5, GramNeg_6 -

      column names of antibiotics for Gram negatives, case-insensitive

      +

      column names of antibiotics for Gram negatives, case-insensitive. At default, the columns containing these antibiotics will be guessed with guess_ab_col.

      warnings @@ -290,7 +300,7 @@

      Details

      -

      The function key_antibiotics returns a character vector with 12 antibiotic results for every isolate. These isolates can then be compared using key_antibiotics_equal, to check if two isolates have generally the same antibiogram. Missing and invalid values are replaced with a dot ("."). The first_isolate function only uses this function on the same microbial species from the same patient. Using this, an MRSA will be included after a susceptible S. aureus (MSSA) found within the same episode (see episode parameter of first_isolate). Without key antibiotic comparison it wouldn't.

      +

      The function key_antibiotics returns a character vector with 12 antibiotic results for every isolate. These isolates can then be compared using key_antibiotics_equal, to check if two isolates have generally the same antibiogram. Missing and invalid values are replaced with a dot ("."). The first_isolate function only uses this function on the same microbial species from the same patient. Using this, an MRSA will be included after a susceptible S. aureus (MSSA) found within the same episode (see episode parameter of first_isolate). Without key antibiotic comparison it would not.

      At default, the antibiotics that are used for Gram positive bacteria are (colum names):
      "amox", "amcl", "cfur", "pita", "cipr", "trsu" (until here is universal), "vanc", "teic", "tetr", "eryt", "oxac", "rifa".

      At default, the antibiotics that are used for Gram negative bacteria are (colum names):
      diff --git a/docs/reference/kurtosis.html b/docs/reference/kurtosis.html index 17a3c09c..a946dc17 100644 --- a/docs/reference/kurtosis.html +++ b/docs/reference/kurtosis.html @@ -151,6 +151,13 @@ Use the G-test

    85. +
    86. + + + + Other: benchmarks + +
    87. diff --git a/docs/reference/like.html b/docs/reference/like.html index 1f77b2de..8d733814 100644 --- a/docs/reference/like.html +++ b/docs/reference/like.html @@ -151,6 +151,13 @@ Use the G-test
    88. +
    89. + + + + Other: benchmarks + +
    90. @@ -288,7 +295,7 @@ On our website https://msberends.gitla library(dplyr) septic_patients %>% left_join_microorganisms() %>% - filter(genus %like% '^ent') %>% + filter(genus %like% '^ent') %>% freq(genus, species) # } diff --git a/docs/reference/mdro.html b/docs/reference/mdro.html index fbc8c601..e532d5ff 100644 --- a/docs/reference/mdro.html +++ b/docs/reference/mdro.html @@ -151,6 +151,13 @@ Use the G-test
    91. +
    92. + + + + Other: benchmarks + +
    93. @@ -221,26 +228,36 @@
      mdro(tbl, country = NULL, col_mo = NULL, info = TRUE,
      -  amcl = guess_ab(), amik = guess_ab(), amox = guess_ab(),
      -  ampi = guess_ab(), azit = guess_ab(), aztr = guess_ab(),
      -  cefa = guess_ab(), cfra = guess_ab(), cfep = guess_ab(),
      -  cfot = guess_ab(), cfox = guess_ab(), cfta = guess_ab(),
      -  cftr = guess_ab(), cfur = guess_ab(), chlo = guess_ab(),
      -  cipr = guess_ab(), clar = guess_ab(), clin = guess_ab(),
      -  clox = guess_ab(), coli = guess_ab(), czol = guess_ab(),
      -  dapt = guess_ab(), doxy = guess_ab(), erta = guess_ab(),
      -  eryt = guess_ab(), fosf = guess_ab(), fusi = guess_ab(),
      -  gent = guess_ab(), imip = guess_ab(), kana = guess_ab(),
      -  levo = guess_ab(), linc = guess_ab(), line = guess_ab(),
      -  mero = guess_ab(), metr = guess_ab(), mino = guess_ab(),
      -  moxi = guess_ab(), nali = guess_ab(), neom = guess_ab(),
      -  neti = guess_ab(), nitr = guess_ab(), novo = guess_ab(),
      -  norf = guess_ab(), oflo = guess_ab(), peni = guess_ab(),
      -  pipe = guess_ab(), pita = guess_ab(), poly = guess_ab(),
      -  qida = guess_ab(), rifa = guess_ab(), roxi = guess_ab(),
      -  siso = guess_ab(), teic = guess_ab(), tetr = guess_ab(),
      -  tica = guess_ab(), tige = guess_ab(), tobr = guess_ab(),
      -  trim = guess_ab(), trsu = guess_ab(), vanc = guess_ab())
      +  amcl = guess_ab_col(), amik = guess_ab_col(),
      +  amox = guess_ab_col(), ampi = guess_ab_col(),
      +  azit = guess_ab_col(), aztr = guess_ab_col(),
      +  cefa = guess_ab_col(), cfra = guess_ab_col(),
      +  cfep = guess_ab_col(), cfot = guess_ab_col(),
      +  cfox = guess_ab_col(), cfta = guess_ab_col(),
      +  cftr = guess_ab_col(), cfur = guess_ab_col(),
      +  chlo = guess_ab_col(), cipr = guess_ab_col(),
      +  clar = guess_ab_col(), clin = guess_ab_col(),
      +  clox = guess_ab_col(), coli = guess_ab_col(),
      +  czol = guess_ab_col(), dapt = guess_ab_col(),
      +  doxy = guess_ab_col(), erta = guess_ab_col(),
      +  eryt = guess_ab_col(), fosf = guess_ab_col(),
      +  fusi = guess_ab_col(), gent = guess_ab_col(),
      +  imip = guess_ab_col(), kana = guess_ab_col(),
      +  levo = guess_ab_col(), linc = guess_ab_col(),
      +  line = guess_ab_col(), mero = guess_ab_col(),
      +  metr = guess_ab_col(), mino = guess_ab_col(),
      +  moxi = guess_ab_col(), nali = guess_ab_col(),
      +  neom = guess_ab_col(), neti = guess_ab_col(),
      +  nitr = guess_ab_col(), novo = guess_ab_col(),
      +  norf = guess_ab_col(), oflo = guess_ab_col(),
      +  peni = guess_ab_col(), pipe = guess_ab_col(),
      +  pita = guess_ab_col(), poly = guess_ab_col(),
      +  qida = guess_ab_col(), rifa = guess_ab_col(),
      +  roxi = guess_ab_col(), siso = guess_ab_col(),
      +  teic = guess_ab_col(), tetr = guess_ab_col(),
      +  tica = guess_ab_col(), tige = guess_ab_col(),
      +  tobr = guess_ab_col(), trim = guess_ab_col(),
      +  trsu = guess_ab_col(), vanc = guess_ab_col())
       
       brmo(..., country = "nl")
       
      @@ -524,7 +541,7 @@
           

      Antibiotics

      -

      To define antibiotics column names, leave as it is to determine it automatically with guess_ab or input a text (case-insensitive) or use NULL to skip a column (e.g. tica = NULL). Non-existing columns will anyway be skipped with a warning.

      +

      To define antibiotics column names, leave as it is to determine it automatically with guess_ab_col or input a text (case-insensitive) or use NULL to skip a column (e.g. tica = NULL). Non-existing columns will anyway be skipped with a warning.

      Abbrevations of the column containing antibiotics in the form: abbreviation: generic name (ATC code)

      amcl: amoxicillin+clavulanic acid (J01CR02), amik: amikacin (J01GB06), diff --git a/docs/reference/microorganisms.certe.html b/docs/reference/microorganisms.certe.html index ef184a02..198fc3e5 100644 --- a/docs/reference/microorganisms.certe.html +++ b/docs/reference/microorganisms.certe.html @@ -151,6 +151,13 @@ Use the G-test

    94. +
    95. + + + + Other: benchmarks + +
    96. diff --git a/docs/reference/microorganisms.html b/docs/reference/microorganisms.html index 160d44ea..377e80a3 100644 --- a/docs/reference/microorganisms.html +++ b/docs/reference/microorganisms.html @@ -151,6 +151,13 @@ Use the G-test
    97. +
    98. + + + + Other: benchmarks + +
    99. diff --git a/docs/reference/microorganisms.old.html b/docs/reference/microorganisms.old.html index f7753406..cdf8bbb8 100644 --- a/docs/reference/microorganisms.old.html +++ b/docs/reference/microorganisms.old.html @@ -151,6 +151,13 @@ Use the G-test
    100. +
    101. + + + + Other: benchmarks + +
    102. diff --git a/docs/reference/microorganisms.umcg.html b/docs/reference/microorganisms.umcg.html index ba61c085..4e938908 100644 --- a/docs/reference/microorganisms.umcg.html +++ b/docs/reference/microorganisms.umcg.html @@ -151,6 +151,13 @@ Use the G-test
    103. +
    104. + + + + Other: benchmarks + +
    105. diff --git a/docs/reference/mo_failures.html b/docs/reference/mo_failures.html index 81648d7c..309f5734 100644 --- a/docs/reference/mo_failures.html +++ b/docs/reference/mo_failures.html @@ -151,6 +151,13 @@ Use the G-test
    106. +
    107. + + + + Other: benchmarks + +
    108. diff --git a/docs/reference/mo_property.html b/docs/reference/mo_property.html index 56d2859d..9992a80c 100644 --- a/docs/reference/mo_property.html +++ b/docs/reference/mo_property.html @@ -151,6 +151,13 @@ Use the G-test
    109. +
    110. + + + + Other: benchmarks + +
    111. diff --git a/docs/reference/mo_renamed.html b/docs/reference/mo_renamed.html index f9a3e497..11ec3055 100644 --- a/docs/reference/mo_renamed.html +++ b/docs/reference/mo_renamed.html @@ -151,6 +151,13 @@ Use the G-test
    112. +
    113. + + + + Other: benchmarks + +
    114. diff --git a/docs/reference/p.symbol.html b/docs/reference/p.symbol.html index 9cc7460c..1ea7acd0 100644 --- a/docs/reference/p.symbol.html +++ b/docs/reference/p.symbol.html @@ -151,6 +151,13 @@ Use the G-test
    115. +
    116. + + + + Other: benchmarks + +
    117. diff --git a/docs/reference/portion.html b/docs/reference/portion.html index 44896e60..54e8cb51 100644 --- a/docs/reference/portion.html +++ b/docs/reference/portion.html @@ -152,6 +152,13 @@ portion_R and portion_IR can be used to calculate resistance, portion_S and port Use the G-test
    118. +
    119. + + + + Other: benchmarks + +
    120. @@ -333,17 +340,17 @@ On our website https://msberends.gitla septic_patients %>% portion_SI(amox) septic_patients %>% - group_by(hospital_id) %>% - summarise(p = portion_S(cipr), + group_by(hospital_id) %>% + summarise(p = portion_S(cipr), n = n_rsi(cipr)) # n_rsi works like n_distinct in dplyr septic_patients %>% - group_by(hospital_id) %>% - summarise(R = portion_R(cipr, as_percent = TRUE), + group_by(hospital_id) %>% + summarise(R = portion_R(cipr, as_percent = TRUE), I = portion_I(cipr, as_percent = TRUE), S = portion_S(cipr, as_percent = TRUE), n = n_rsi(cipr), # works like n_distinct in dplyr - total = n()) # NOT the amount of tested isolates! + total = n()) # NOT the amount of tested isolates! # Calculate co-resistance between amoxicillin/clav acid and gentamicin, # so we can see that combination therapy does a lot more than mono therapy: @@ -358,8 +365,8 @@ On our website https://msberends.gitla septic_patients %>% - group_by(hospital_id) %>% - summarise(cipro_p = portion_S(cipr, as_percent = TRUE), + group_by(hospital_id) %>% + summarise(cipro_p = portion_S(cipr, as_percent = TRUE), cipro_n = count_all(cipr), genta_p = portion_S(gent, as_percent = TRUE), genta_n = count_all(gent), @@ -368,22 +375,22 @@ On our website https://msberends.gitla # Get portions S/I/R immediately of all rsi columns septic_patients %>% - select(amox, cipr) %>% + select(amox, cipr) %>% portion_df(translate = FALSE) # It also supports grouping variables septic_patients %>% - select(hospital_id, amox, cipr) %>% - group_by(hospital_id) %>% + select(hospital_id, amox, cipr) %>% + group_by(hospital_id) %>% portion_df(translate = FALSE) # }# NOT RUN { # calculate current empiric combination therapy of Helicobacter gastritis: my_table %>% - filter(first_isolate == TRUE, + filter(first_isolate == TRUE, genus == "Helicobacter") %>% - summarise(p = portion_S(amox, metr), # amoxicillin with metronidazole + summarise(p = portion_S(amox, metr), # amoxicillin with metronidazole n = count_all(amox, metr)) # } diff --git a/docs/reference/read.4D.html b/docs/reference/read.4D.html index e545a16a..cbf9dec1 100644 --- a/docs/reference/read.4D.html +++ b/docs/reference/read.4D.html @@ -151,6 +151,13 @@ Use the G-test
    121. +
    122. + + + + Other: benchmarks + +
    123. diff --git a/docs/reference/resistance_predict.html b/docs/reference/resistance_predict.html index 2d789bbe..0249e17f 100644 --- a/docs/reference/resistance_predict.html +++ b/docs/reference/resistance_predict.html @@ -151,6 +151,13 @@ Use the G-test
    124. +
    125. + + + + Other: benchmarks + +
    126. @@ -311,7 +318,7 @@ On our website https://msberends.gitla # or use dplyr so you can actually read it: library(dplyr) tbl %>% - filter(first_isolate == TRUE, + filter(first_isolate == TRUE, genus == "Haemophilus") %>% resistance_predict(amcl, date) # }# NOT RUN { @@ -322,9 +329,9 @@ On our website https://msberends.gitla # get bacteria properties like genus and species left_join_microorganisms("mo") %>% # calculate first isolates - mutate(first_isolate = first_isolate(.)) %>% + mutate(first_isolate = first_isolate(.)) %>% # filter on first E. coli isolates - filter(genus == "Escherichia", + filter(genus == "Escherichia", species == "coli", first_isolate == TRUE) %>% # predict resistance of cefotaxime for next years @@ -338,7 +345,7 @@ On our website https://msberends.gitla if (!require(ggplot2)) { data <- septic_patients %>% - filter(mo == as.mo("E. coli")) %>% + filter(mo == as.mo("E. coli")) %>% resistance_predict(col_ab = "amox", col_date = "date", info = FALSE, diff --git a/docs/reference/rsi.html b/docs/reference/rsi.html index baab473f..bfc62eaa 100644 --- a/docs/reference/rsi.html +++ b/docs/reference/rsi.html @@ -151,6 +151,13 @@ Use the G-test
    127. +
    128. + + + + Other: benchmarks + +
    129. diff --git a/docs/reference/septic_patients.html b/docs/reference/septic_patients.html index 7e261c29..997bfc00 100644 --- a/docs/reference/septic_patients.html +++ b/docs/reference/septic_patients.html @@ -151,6 +151,13 @@ Use the G-test
    130. +
    131. + + + + Other: benchmarks + +
    132. diff --git a/docs/reference/skewness.html b/docs/reference/skewness.html index 2a7902dd..6e3ef8cd 100644 --- a/docs/reference/skewness.html +++ b/docs/reference/skewness.html @@ -152,6 +152,13 @@ When negative: the left tail is longer; the mass of the distribution is concentr Use the G-test
    133. +
    134. + + + + Other: benchmarks + +
    135. diff --git a/docs/reference/supplementary_data.html b/docs/reference/supplementary_data.html index d17871ad..742f1541 100644 --- a/docs/reference/supplementary_data.html +++ b/docs/reference/supplementary_data.html @@ -151,6 +151,13 @@ Use the G-test
    136. +
    137. + + + + Other: benchmarks + +
    138. diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 430b986c..c91fb660 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -64,7 +64,7 @@ https://msberends.gitlab.io/AMR/reference/ggplot_rsi.html - https://msberends.gitlab.io/AMR/reference/guess_ab.html + https://msberends.gitlab.io/AMR/reference/guess_ab_col.html https://msberends.gitlab.io/AMR/reference/join.html @@ -141,6 +141,9 @@ https://msberends.gitlab.io/AMR/articles/ab_property.html + + https://msberends.gitlab.io/AMR/articles/benchmarks.html + https://msberends.gitlab.io/AMR/articles/freq.html diff --git a/index.md b/index.md index 21e2bdd3..b33a556b 100644 --- a/index.md +++ b/index.md @@ -64,7 +64,9 @@ To find out how to conduct AMR analysis, please [continue reading here to get st This package contains the **complete microbial taxonomic data** (with all nine taxonomic ranks - from kingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov). -All (sub)species from **the taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package**, as well as all previously accepted names known to ITIS. Furthermore, the responsible authors and year of publication are available. This allows users to use authoritative taxonomic information for their data analysis on any microorganism, not only human pathogens. It also helps to quickly determine the Gram stain of bacteria, since all bacteria are classified into subkingdom Negibacteria or Posibacteria. ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists. +All (sub)species from **the taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package**, as well as all previously accepted names known to ITIS. Furthermore, the responsible authors and year of publication are available. This allows users to use authoritative taxonomic information for their data analysis on any microorganism, not only human pathogens. It also helps to quickly determine the Gram stain of bacteria, since all bacteria are classified into subkingdom Negibacteria or Posibacteria. + +Read more about ITIS [in our manual](./reference/ITIS.html). The `AMR` package basically does four important things: diff --git a/man/eucast_rules.Rd b/man/eucast_rules.Rd index 7e1e319f..9460fdef 100644 --- a/man/eucast_rules.Rd +++ b/man/eucast_rules.Rd @@ -25,27 +25,38 @@ \usage{ eucast_rules(tbl, col_mo = NULL, info = TRUE, rules = c("breakpoints", "expert", "other", "all"), verbose = FALSE, - amcl = guess_ab(), amik = guess_ab(), amox = guess_ab(), - ampi = guess_ab(), azit = guess_ab(), azlo = guess_ab(), - aztr = guess_ab(), cefa = guess_ab(), cfep = guess_ab(), - cfot = guess_ab(), cfox = guess_ab(), cfra = guess_ab(), - cfta = guess_ab(), cftr = guess_ab(), cfur = guess_ab(), - chlo = guess_ab(), cipr = guess_ab(), clar = guess_ab(), - clin = guess_ab(), clox = guess_ab(), coli = guess_ab(), - czol = guess_ab(), dapt = guess_ab(), doxy = guess_ab(), - erta = guess_ab(), eryt = guess_ab(), fosf = guess_ab(), - fusi = guess_ab(), gent = guess_ab(), imip = guess_ab(), - kana = guess_ab(), levo = guess_ab(), linc = guess_ab(), - line = guess_ab(), mero = guess_ab(), mezl = guess_ab(), - mino = guess_ab(), moxi = guess_ab(), nali = guess_ab(), - neom = guess_ab(), neti = guess_ab(), nitr = guess_ab(), - norf = guess_ab(), novo = guess_ab(), oflo = guess_ab(), - oxac = guess_ab(), peni = guess_ab(), pipe = guess_ab(), - pita = guess_ab(), poly = guess_ab(), pris = guess_ab(), - qida = guess_ab(), rifa = guess_ab(), roxi = guess_ab(), - siso = guess_ab(), teic = guess_ab(), tetr = guess_ab(), - tica = guess_ab(), tige = guess_ab(), tobr = guess_ab(), - trim = guess_ab(), trsu = guess_ab(), vanc = guess_ab()) + amcl = guess_ab_col(), amik = guess_ab_col(), + amox = guess_ab_col(), ampi = guess_ab_col(), + azit = guess_ab_col(), azlo = guess_ab_col(), + aztr = guess_ab_col(), cefa = guess_ab_col(), + cfep = guess_ab_col(), cfot = guess_ab_col(), + cfox = guess_ab_col(), cfra = guess_ab_col(), + cfta = guess_ab_col(), cftr = guess_ab_col(), + cfur = guess_ab_col(), chlo = guess_ab_col(), + cipr = guess_ab_col(), clar = guess_ab_col(), + clin = guess_ab_col(), clox = guess_ab_col(), + coli = guess_ab_col(), czol = guess_ab_col(), + dapt = guess_ab_col(), doxy = guess_ab_col(), + erta = guess_ab_col(), eryt = guess_ab_col(), + fosf = guess_ab_col(), fusi = guess_ab_col(), + gent = guess_ab_col(), imip = guess_ab_col(), + kana = guess_ab_col(), levo = guess_ab_col(), + linc = guess_ab_col(), line = guess_ab_col(), + mero = guess_ab_col(), mezl = guess_ab_col(), + mino = guess_ab_col(), moxi = guess_ab_col(), + nali = guess_ab_col(), neom = guess_ab_col(), + neti = guess_ab_col(), nitr = guess_ab_col(), + norf = guess_ab_col(), novo = guess_ab_col(), + oflo = guess_ab_col(), oxac = guess_ab_col(), + peni = guess_ab_col(), pipe = guess_ab_col(), + pita = guess_ab_col(), poly = guess_ab_col(), + pris = guess_ab_col(), qida = guess_ab_col(), + rifa = guess_ab_col(), roxi = guess_ab_col(), + siso = guess_ab_col(), teic = guess_ab_col(), + tetr = guess_ab_col(), tica = guess_ab_col(), + tige = guess_ab_col(), tobr = guess_ab_col(), + trim = guess_ab_col(), trsu = guess_ab_col(), + vanc = guess_ab_col()) EUCAST_rules(...) @@ -74,7 +85,7 @@ Apply susceptibility rules as defined by the European Committee on Antimicrobial } \section{Antibiotics}{ -To define antibiotics column names, leave as it is to determine it automatically with \code{\link{guess_ab}} or input a text (case-insensitive) or use \code{NULL} to skip a column (e.g. \code{tica = NULL}). Non-existing columns will anyway be skipped with a warning. +To define antibiotics column names, leave as it is to determine it automatically with \code{\link{guess_ab_col}} or input a text (case-insensitive) or use \code{NULL} to skip a column (e.g. \code{tica = NULL}). Non-existing columns will anyway be skipped with a warning. Abbrevations of the column containing antibiotics in the form: \strong{abbreviation}: generic name (\emph{ATC code}) diff --git a/man/guess_ab.Rd b/man/guess_ab_col.Rd similarity index 55% rename from man/guess_ab.Rd rename to man/guess_ab_col.Rd index 8ce0f929..42c703bf 100644 --- a/man/guess_ab.Rd +++ b/man/guess_ab_col.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/guess_ab.R -\name{guess_ab} -\alias{guess_ab} +% Please edit documentation in R/guess_ab_col.R +\name{guess_ab_col} +\alias{guess_ab_col} \title{Guess antibiotic column} \usage{ -guess_ab(tbl = NULL, col = NULL, verbose = FALSE) +guess_ab_col(tbl = NULL, col = NULL, verbose = FALSE) } \arguments{ \item{tbl}{a \code{data.frame}} @@ -14,7 +14,7 @@ guess_ab(tbl = NULL, col = NULL, verbose = FALSE) \item{verbose}{a logical to indicate whether additional info should be printed} } \description{ -This tries to find a column name in a data set based on information from the \code{\link{antibiotics}} data set. +This tries to find a column name in a data set based on information from the \code{\link{antibiotics}} data set. You can look for an antibiotic (trade) of abbreviation and it will search the data for any column containing a name or ATC code of that antibiotic. } \section{Read more on our website!}{ @@ -22,3 +22,16 @@ This tries to find a column name in a data set based on information from the \co On our website \url{https://msberends.gitlab.io/AMR} you can find \href{https://msberends.gitlab.io/AMR/articles/AMR.html}{a omprehensive tutorial} about how to conduct AMR analysis and find \href{https://msberends.gitlab.io/AMR/reference}{the complete documentation of all functions}, which reads a lot easier than in R. } +\examples{ +df <- data.frame(amox = "S", + tetr = "R") + +guess_ab_col(df, "amoxicillin") +# [1] "amox" +guess_ab_col(df, "J01AA07") # ATC code of Tetracycline +# [1] "tetr" + +guess_ab_col(df, "J01AA07", verbose = TRUE) +# using column `tetr` for col "J01AA07" +# [1] "tetr" +} diff --git a/man/key_antibiotics.Rd b/man/key_antibiotics.Rd index 03686a7e..cc4afe6c 100755 --- a/man/key_antibiotics.Rd +++ b/man/key_antibiotics.Rd @@ -5,17 +5,20 @@ \alias{key_antibiotics_equal} \title{Key antibiotics for first \emph{weighted} isolates} \usage{ -key_antibiotics(tbl, col_mo = NULL, universal_1 = guess_ab(tbl, - "amox"), universal_2 = guess_ab(tbl, "amcl"), - universal_3 = guess_ab(tbl, "cfur"), universal_4 = guess_ab(tbl, - "pita"), universal_5 = guess_ab(tbl, "cipr"), - universal_6 = guess_ab(tbl, "trsu"), GramPos_1 = guess_ab(tbl, - "vanc"), GramPos_2 = guess_ab(tbl, "teic"), GramPos_3 = guess_ab(tbl, - "tetr"), GramPos_4 = guess_ab(tbl, "eryt"), GramPos_5 = guess_ab(tbl, - "oxac"), GramPos_6 = guess_ab(tbl, "rifa"), GramNeg_1 = guess_ab(tbl, - "gent"), GramNeg_2 = guess_ab(tbl, "tobr"), GramNeg_3 = guess_ab(tbl, - "coli"), GramNeg_4 = guess_ab(tbl, "cfot"), GramNeg_5 = guess_ab(tbl, - "cfta"), GramNeg_6 = guess_ab(tbl, "mero"), warnings = TRUE, ...) +key_antibiotics(tbl, col_mo = NULL, universal_1 = guess_ab_col(tbl, + "amox"), universal_2 = guess_ab_col(tbl, "amcl"), + universal_3 = guess_ab_col(tbl, "cfur"), + universal_4 = guess_ab_col(tbl, "pita"), + universal_5 = guess_ab_col(tbl, "cipr"), + universal_6 = guess_ab_col(tbl, "trsu"), + GramPos_1 = guess_ab_col(tbl, "vanc"), GramPos_2 = guess_ab_col(tbl, + "teic"), GramPos_3 = guess_ab_col(tbl, "tetr"), + GramPos_4 = guess_ab_col(tbl, "eryt"), GramPos_5 = guess_ab_col(tbl, + "oxac"), GramPos_6 = guess_ab_col(tbl, "rifa"), + GramNeg_1 = guess_ab_col(tbl, "gent"), GramNeg_2 = guess_ab_col(tbl, + "tobr"), GramNeg_3 = guess_ab_col(tbl, "coli"), + GramNeg_4 = guess_ab_col(tbl, "cfot"), GramNeg_5 = guess_ab_col(tbl, + "cfta"), GramNeg_6 = guess_ab_col(tbl, "mero"), warnings = TRUE, ...) key_antibiotics_equal(x, y, type = c("keyantibiotics", "points"), ignore_I = TRUE, points_threshold = 2, info = FALSE) @@ -25,11 +28,11 @@ key_antibiotics_equal(x, y, type = c("keyantibiotics", "points"), \item{col_mo}{column name of the unique IDs of the microorganisms (see \code{\link{mo}}), defaults to the first column of class \code{mo}. Values will be coerced using \code{\link{as.mo}}.} -\item{universal_1, universal_2, universal_3, universal_4, universal_5, universal_6}{column names of \strong{broad-spectrum} antibiotics, case-insensitive} +\item{universal_1, universal_2, universal_3, universal_4, universal_5, universal_6}{column names of \strong{broad-spectrum} antibiotics, case-insensitive. At default, the columns containing these antibiotics will be guessed with \code{\link{guess_ab_col}}.} -\item{GramPos_1, GramPos_2, GramPos_3, GramPos_4, GramPos_5, GramPos_6}{column names of antibiotics for \strong{Gram positives}, case-insensitive} +\item{GramPos_1, GramPos_2, GramPos_3, GramPos_4, GramPos_5, GramPos_6}{column names of antibiotics for \strong{Gram positives}, case-insensitive. At default, the columns containing these antibiotics will be guessed with \code{\link{guess_ab_col}}.} -\item{GramNeg_1, GramNeg_2, GramNeg_3, GramNeg_4, GramNeg_5, GramNeg_6}{column names of antibiotics for \strong{Gram negatives}, case-insensitive} +\item{GramNeg_1, GramNeg_2, GramNeg_3, GramNeg_4, GramNeg_5, GramNeg_6}{column names of antibiotics for \strong{Gram negatives}, case-insensitive. At default, the columns containing these antibiotics will be guessed with \code{\link{guess_ab_col}}.} \item{warnings}{give warning about missing antibiotic columns, they will anyway be ignored} @@ -49,7 +52,7 @@ key_antibiotics_equal(x, y, type = c("keyantibiotics", "points"), These function can be used to determine first isolates (see \code{\link{first_isolate}}). Using key antibiotics to determine first isolates is more reliable than without key antibiotics. These selected isolates will then be called first \emph{weighted} isolates. } \details{ -The function \code{key_antibiotics} returns a character vector with 12 antibiotic results for every isolate. These isolates can then be compared using \code{key_antibiotics_equal}, to check if two isolates have generally the same antibiogram. Missing and invalid values are replaced with a dot (\code{"."}). The \code{\link{first_isolate}} function only uses this function on the same microbial species from the same patient. Using this, an MRSA will be included after a susceptible \emph{S. aureus} (MSSA) found within the same episode (see \code{episode} parameter of \code{\link{first_isolate}}). Without key antibiotic comparison it wouldn't. +The function \code{key_antibiotics} returns a character vector with 12 antibiotic results for every isolate. These isolates can then be compared using \code{key_antibiotics_equal}, to check if two isolates have generally the same antibiogram. Missing and invalid values are replaced with a dot (\code{"."}). The \code{\link{first_isolate}} function only uses this function on the same microbial species from the same patient. Using this, an MRSA will be included after a susceptible \emph{S. aureus} (MSSA) found within the same episode (see \code{episode} parameter of \code{\link{first_isolate}}). Without key antibiotic comparison it would not. At default, the antibiotics that are used for \strong{Gram positive bacteria} are (colum names): \cr \code{"amox"}, \code{"amcl"}, \code{"cfur"}, \code{"pita"}, \code{"cipr"}, \code{"trsu"} (until here is universal), \code{"vanc"}, \code{"teic"}, \code{"tetr"}, \code{"eryt"}, \code{"oxac"}, \code{"rifa"}. diff --git a/man/mdro.Rd b/man/mdro.Rd index 04efb7d4..b3220d30 100644 --- a/man/mdro.Rd +++ b/man/mdro.Rd @@ -8,26 +8,36 @@ \title{Determine multidrug-resistant organisms (MDRO)} \usage{ mdro(tbl, country = NULL, col_mo = NULL, info = TRUE, - amcl = guess_ab(), amik = guess_ab(), amox = guess_ab(), - ampi = guess_ab(), azit = guess_ab(), aztr = guess_ab(), - cefa = guess_ab(), cfra = guess_ab(), cfep = guess_ab(), - cfot = guess_ab(), cfox = guess_ab(), cfta = guess_ab(), - cftr = guess_ab(), cfur = guess_ab(), chlo = guess_ab(), - cipr = guess_ab(), clar = guess_ab(), clin = guess_ab(), - clox = guess_ab(), coli = guess_ab(), czol = guess_ab(), - dapt = guess_ab(), doxy = guess_ab(), erta = guess_ab(), - eryt = guess_ab(), fosf = guess_ab(), fusi = guess_ab(), - gent = guess_ab(), imip = guess_ab(), kana = guess_ab(), - levo = guess_ab(), linc = guess_ab(), line = guess_ab(), - mero = guess_ab(), metr = guess_ab(), mino = guess_ab(), - moxi = guess_ab(), nali = guess_ab(), neom = guess_ab(), - neti = guess_ab(), nitr = guess_ab(), novo = guess_ab(), - norf = guess_ab(), oflo = guess_ab(), peni = guess_ab(), - pipe = guess_ab(), pita = guess_ab(), poly = guess_ab(), - qida = guess_ab(), rifa = guess_ab(), roxi = guess_ab(), - siso = guess_ab(), teic = guess_ab(), tetr = guess_ab(), - tica = guess_ab(), tige = guess_ab(), tobr = guess_ab(), - trim = guess_ab(), trsu = guess_ab(), vanc = guess_ab()) + amcl = guess_ab_col(), amik = guess_ab_col(), + amox = guess_ab_col(), ampi = guess_ab_col(), + azit = guess_ab_col(), aztr = guess_ab_col(), + cefa = guess_ab_col(), cfra = guess_ab_col(), + cfep = guess_ab_col(), cfot = guess_ab_col(), + cfox = guess_ab_col(), cfta = guess_ab_col(), + cftr = guess_ab_col(), cfur = guess_ab_col(), + chlo = guess_ab_col(), cipr = guess_ab_col(), + clar = guess_ab_col(), clin = guess_ab_col(), + clox = guess_ab_col(), coli = guess_ab_col(), + czol = guess_ab_col(), dapt = guess_ab_col(), + doxy = guess_ab_col(), erta = guess_ab_col(), + eryt = guess_ab_col(), fosf = guess_ab_col(), + fusi = guess_ab_col(), gent = guess_ab_col(), + imip = guess_ab_col(), kana = guess_ab_col(), + levo = guess_ab_col(), linc = guess_ab_col(), + line = guess_ab_col(), mero = guess_ab_col(), + metr = guess_ab_col(), mino = guess_ab_col(), + moxi = guess_ab_col(), nali = guess_ab_col(), + neom = guess_ab_col(), neti = guess_ab_col(), + nitr = guess_ab_col(), novo = guess_ab_col(), + norf = guess_ab_col(), oflo = guess_ab_col(), + peni = guess_ab_col(), pipe = guess_ab_col(), + pita = guess_ab_col(), poly = guess_ab_col(), + qida = guess_ab_col(), rifa = guess_ab_col(), + roxi = guess_ab_col(), siso = guess_ab_col(), + teic = guess_ab_col(), tetr = guess_ab_col(), + tica = guess_ab_col(), tige = guess_ab_col(), + tobr = guess_ab_col(), trim = guess_ab_col(), + trsu = guess_ab_col(), vanc = guess_ab_col()) brmo(..., country = "nl") @@ -177,7 +187,7 @@ When \code{country} will be left blank, guidelines will be taken from EUCAST Exp } \section{Antibiotics}{ -To define antibiotics column names, leave as it is to determine it automatically with \code{\link{guess_ab}} or input a text (case-insensitive) or use \code{NULL} to skip a column (e.g. \code{tica = NULL}). Non-existing columns will anyway be skipped with a warning. +To define antibiotics column names, leave as it is to determine it automatically with \code{\link{guess_ab_col}} or input a text (case-insensitive) or use \code{NULL} to skip a column (e.g. \code{tica = NULL}). Non-existing columns will anyway be skipped with a warning. Abbrevations of the column containing antibiotics in the form: \strong{abbreviation}: generic name (\emph{ATC code}) diff --git a/pkgdown/extra.css b/pkgdown/extra.css index df38f6b5..2c0d74ab 100644 --- a/pkgdown/extra.css +++ b/pkgdown/extra.css @@ -50,6 +50,7 @@ a, a:hover, a:focus { } a[href], a[href]:hover, a[href]:focus, code a[href], code a[href]:hover, code a[href]:focus, +a[href] code, a[href] code:hover, a[href] code:focus, pre a[href], pre a[href]:hover, pre a[href]:focus, a code[href], a code[href]:hover, a code[href]:focus, a pre[href], a pre[href]:hover, a pre[href]:focus { @@ -139,3 +140,9 @@ thead { tbody { border-bottom: 2px solid black; } +table a:not(.btn), .table a:not(.btn) { + text-decoration: inherit; +} +table a:not(.btn):hover, .table a:not(.btn):hover { + text-decoration: underline; +} diff --git a/tests/testthat/test-guess_ab_col.R b/tests/testthat/test-guess_ab_col.R new file mode 100644 index 00000000..e72d6259 --- /dev/null +++ b/tests/testthat/test-guess_ab_col.R @@ -0,0 +1,36 @@ +# ==================================================================== # +# TITLE # +# Antimicrobial Resistance (AMR) Analysis # +# # +# SOURCE # +# https://gitlab.com/msberends/AMR # +# # +# LICENCE # +# (c) 2019 Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) # +# # +# This R package is free software; you can freely use and distribute # +# it for both personal and commercial purposes under the terms of the # +# GNU General Public License version 2.0 (GNU GPL-2), as published by # +# the Free Software Foundation. # +# # +# This R package was created for academic research and was publicly # +# released in the hope that it will be useful, but it comes WITHOUT # +# ANY WARRANTY OR LIABILITY. # +# Visit our website for more info: https://msberends.gitab.io/AMR. # +# ==================================================================== # + +context("guess_ab_col.R") + +test_that("guess_ab_col works", { + + expect_equal(guess_ab_col(septic_patients, "amox"), + "amox") + expect_equal(guess_ab_col(septic_patients, "amoxicillin"), + "amox") + expect_equal(guess_ab_col(septic_patients, "J01AA07"), + "tetr") + expect_equal(guess_ab_col(septic_patients, "tetracycline"), + "tetr") + expect_equal(guess_ab_col(septic_patients, "TETR"), + "tetr") +}) diff --git a/vignettes/AMR.Rmd b/vignettes/AMR.Rmd index 662d03e3..d6fe1289 100755 --- a/vignettes/AMR.Rmd +++ b/vignettes/AMR.Rmd @@ -21,8 +21,6 @@ knitr::opts_chunk$set( fig.width = 7.5, fig.height = 4.5 ) -# set to original language (English) -Sys.setlocale(locale = "C") ``` **Note:** values on this page will change with every website update since they are based on randomly created values and the page was written in [RMarkdown](https://rmarkdown.rstudio.com/). However, the methodology remains unchanged. This page was generated on `r format(Sys.Date(), "%d %B %Y")`. diff --git a/vignettes/EUCAST.Rmd b/vignettes/EUCAST.Rmd index f9d5a35f..4db83445 100644 --- a/vignettes/EUCAST.Rmd +++ b/vignettes/EUCAST.Rmd @@ -21,8 +21,6 @@ knitr::opts_chunk$set( fig.width = 7.5, fig.height = 4.5 ) -# set to original language (English) -Sys.setlocale(locale = "C") ``` ## Introduction diff --git a/vignettes/G_test.Rmd b/vignettes/G_test.Rmd index 64a89f49..019c2f08 100755 --- a/vignettes/G_test.Rmd +++ b/vignettes/G_test.Rmd @@ -18,8 +18,6 @@ knitr::opts_chunk$set( collapse = TRUE, comment = "#" ) -# set to original language (English) -Sys.setlocale(locale = "C") ``` *(will be available soon - in the meanwhile, please read about [this *G*-test in the manual](./../reference/g.test.html))* diff --git a/vignettes/Predict.Rmd b/vignettes/Predict.Rmd index 8ee2e1cd..56cdbac3 100755 --- a/vignettes/Predict.Rmd +++ b/vignettes/Predict.Rmd @@ -18,8 +18,6 @@ knitr::opts_chunk$set( collapse = TRUE, comment = "#" ) -# set to original language (English) -Sys.setlocale(locale = "C") ``` *(will be available soon)* diff --git a/vignettes/benchmarks.Rmd b/vignettes/benchmarks.Rmd new file mode 100755 index 00000000..e2308414 --- /dev/null +++ b/vignettes/benchmarks.Rmd @@ -0,0 +1,205 @@ +--- +title: "Benchmarks" +author: "Matthijs S. Berends" +date: '`r format(Sys.Date(), "%d %B %Y")`' +output: + rmarkdown::html_vignette: + toc: true + toc_depth: 3 +vignette: > + %\VignetteIndexEntry{Benchmarks} + %\VignetteEncoding{UTF-8} + %\VignetteEngine{knitr::rmarkdown} +editor_options: + chunk_output_type: console +--- + +```{r setup, include = FALSE, results = 'markup'} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#", + fig.width = 7.5, + fig.height = 4.5 +) +``` + +One of the most important features of this package is the complete microbial taxonomic database, supplied by ITIS (https://www.itis.gov). We created a function `as.mo()` that transforms any user input value to a valid microbial ID by using AI (Artificial Intelligence) and based on the taxonomic tree of ITIS. + +Using the `microbenchmark` package, we can review the calculation performance of this function. + +```r +library(microbenchmark) +``` + +In the next test, we try to 'coerce' different input values for *Staphylococcus aureus*. The actual result is the same every time: it returns its MO code `B_STPHY_AUR` (*B* stands for *Bacteria*, the taxonomic kingdom). + +But the calculation time differs a lot. Here, the AI effect can be reviewed best: + +```r +microbenchmark(A = as.mo("stau"), + B = as.mo("staaur"), + C = as.mo("S. aureus"), + D = as.mo("S. aureus"), + E = as.mo("STAAUR"), + F = as.mo("Staphylococcus aureus"), + G = as.mo("B_STPHY_AUR"), + times = 10, + unit = "ms") +# Unit: milliseconds +# expr min lq mean median uq max neval +# A 34.745551 34.798630 35.2596102 34.8994810 35.258325 38.067062 10 +# B 7.095386 7.125348 7.2219948 7.1613865 7.240377 7.495857 10 +# C 11.677114 11.733826 11.8304789 11.7715050 11.843756 12.317559 10 +# D 11.694435 11.730054 11.9859313 11.8775585 12.206371 12.750016 10 +# E 7.044402 7.117387 7.2271630 7.1923610 7.246104 7.742396 10 +# F 6.642326 6.778446 6.8988042 6.8753165 6.923577 7.513945 10 +# G 0.106788 0.131023 0.1351229 0.1357725 0.144014 0.146458 10 +``` + +In the table above, all measurements are shown in milliseconds (thousands of seconds), tested on a quite regular Linux server from 2007 (Core 2 Duo 2.7 GHz, 2 GB DDR2 RAM). A value of 6.9 milliseconds means it will roughly determine 144 input values per second. It case of 39.2 milliseconds, this is only 26 input values per second. The more an input value resembles a full name (like C, D and F), the faster the result will be found. In case of G, the input is already a valid MO code, so it only almost takes no time at all (0.0001 seconds on our server). + +To achieve this speed, the `as.mo` function also takes into account the prevalence of human pathogenic microorganisms. The downside is of course that less prevalent microorganisms will be determined far less faster. See this example for the ID of *Burkholderia nodosa* (`B_BRKHL_NOD`): + +```r +microbenchmark(A = as.mo("buno"), + B = as.mo("burnod"), + C = as.mo("B. nodosa"), + D = as.mo("B. nodosa"), + E = as.mo("BURNOD"), + F = as.mo("Burkholderia nodosa"), + G = as.mo("B_BRKHL_NOD"), + times = 10, + unit = "ms") +# Unit: milliseconds +# expr min lq mean median uq max neval +# A 124.175427 124.474837 125.8610536 125.3750560 126.160945 131.485994 10 +# B 154.249713 155.364729 160.9077032 156.8738940 157.136183 197.315105 10 +# C 66.066571 66.162393 66.5538611 66.4488130 66.698077 67.623404 10 +# D 86.747693 86.918665 90.7831016 87.8149725 89.440982 116.767991 10 +# E 154.863827 155.208563 162.6535954 158.4062465 168.593785 187.378088 10 +# F 32.427028 32.638648 32.9929454 32.7860475 32.992813 34.674241 10 +# G 0.213155 0.216578 0.2369226 0.2338985 0.253734 0.285581 10 +``` + +That takes up to 11 times as much time! A value of 158.4 milliseconds means it can only determine ~6 different input values per second. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. + +To relieve this pitfall and further improve performance, two important calculations take almost no time at all: **repetitive results** and **already precalculated results**. + +### Repetitive results + +Repetitive results mean that unique values are present more than once. Unique values will only be calculated once by `as.mo()`. We will use `mo_fullname()` for this test - a helper function that returns the full microbial name (genus, species and possibly subspecies) and uses `as.mo()` internally. + +```r +library(dplyr) +# take 500,000 random MO codes from the septic_patients data set +x = septic_patients %>% + sample_n(500000, replace = TRUE) %>% + pull(mo) + +# got the right length? +length(x) +# [1] 500000 + +# and how many unique values do we have? +n_distinct(x) +# [1] 96 + +# only 96, but distributed in 500,000 results. now let's see: +microbenchmark(X = mo_fullname(x), + times = 10, + unit = "ms") +# Unit: milliseconds +# expr min lq mean median uq max neval +# X 114.9342 117.1076 129.6448 120.2047 131.5005 168.6371 10 +``` + +So transforming 500,000 values (!) of 96 unique values only takes 0.12 seconds (120 ms). You only lose time on your unique input values. + +Results of a tenfold - 5,000,000 values: + +```r +# Unit: milliseconds +# expr min lq mean median uq max neval +# X 882.9045 901.3011 1001.677 940.3421 1168.088 1226.846 10 +``` + +Even the full names of 5 *Million* values are calculated within a second. + +### Precalculated results + +What about precalculated results? If the input is an already precalculated result of a helper function like `mo_fullname()`, it almost doesn't take any time at all (see 'C' below): + +```r +microbenchmark(A = mo_fullname("B_STPHY_AUR"), + B = mo_fullname("S. aureus"), + C = mo_fullname("Staphylococcus aureus"), + times = 10, + unit = "ms") +# Unit: milliseconds +# expr min lq mean median uq max neval +# A 11.364086 11.460537 11.5104799 11.4795330 11.524860 11.818263 10 +# B 11.976454 12.012352 12.1704592 12.0853020 12.210004 12.881737 10 +# C 0.095823 0.102528 0.1167754 0.1153785 0.132629 0.140661 10 +``` + +So going from `mo_fullname("Staphylococcus aureus")` to `"Staphylococcus aureus"` takes 0.0001 seconds - it doesn't even start calculating *if the result would be the same as the expected resulting value*. That goes for all helper functions: + +```r +microbenchmark(A = mo_species("aureus"), + B = mo_genus("Staphylococcus"), + C = mo_fullname("Staphylococcus aureus"), + D = mo_family("Staphylococcaceae"), + E = mo_order("Bacillales"), + F = mo_class("Bacilli"), + G = mo_phylum("Firmicutes"), + H = mo_subkingdom("Posibacteria"), + I = mo_kingdom("Bacteria"), + times = 10, + unit = "ms") +# Unit: milliseconds +# expr min lq mean median uq max neval +# A 0.105181 0.121314 0.1478538 0.1465265 0.166711 0.211409 10 +# B 0.132558 0.146388 0.1584278 0.1499835 0.164895 0.208477 10 +# C 0.135492 0.160355 0.2341847 0.1884665 0.348857 0.395931 10 +# D 0.109650 0.115727 0.1270481 0.1264130 0.128648 0.168317 10 +# E 0.081574 0.096940 0.0992582 0.0980915 0.101479 0.120477 10 +# F 0.081575 0.088489 0.0988463 0.0989650 0.103365 0.126482 10 +# G 0.091981 0.095333 0.1043568 0.1001530 0.111327 0.129625 10 +# H 0.092610 0.093169 0.1009135 0.0985455 0.101828 0.120406 10 +# I 0.087371 0.091213 0.1069758 0.0941815 0.109302 0.192831 10 +``` + +Of course, when running `mo_phylum("Firmicutes")` the function has zero knowledge about the actual microorganism, namely *S. aureus*. But since the result would be `"Firmicutes"` too, there is no point in calculating the result. And because this package 'knows' all phyla of all known microorganisms (according to ITIS), it can just return the initial value immediately. + +### Results in other languages + +When the system language is non-English and supported by this `AMR` package, some functions take a little while longer: + +```r +mo_fullname("CoNS", language = "en") # or just mo_fullname("CoNS") on an English system +# "Coagulase Negative Staphylococcus (CoNS)" + +mo_fullname("CoNS", language = "fr") # or just mo_fullname("CoNS") on a French system +# "Staphylococcus à coagulase négative (CoNS)" + +microbenchmark(en = mo_fullname("CoNS", language = "en"), + de = mo_fullname("CoNS", language = "de"), + nl = mo_fullname("CoNS", language = "nl"), + es = mo_fullname("CoNS", language = "es"), + it = mo_fullname("CoNS", language = "it"), + fr = mo_fullname("CoNS", language = "fr"), + pt = mo_fullname("CoNS", language = "pt"), + times = 10, + unit = "ms") +# Unit: milliseconds +# expr min lq mean median uq max neval +# en 6.093583 6.51724 6.555105 6.562986 6.630663 6.99698 100 +# de 13.934874 14.35137 16.891587 14.462210 14.764658 43.63956 100 +# nl 13.900092 14.34729 15.943268 14.424565 14.581535 43.76283 100 +# es 13.833813 14.34596 14.574783 14.439757 14.653994 17.49168 100 +# it 13.811883 14.36621 15.179060 14.453515 14.812359 43.64284 100 +# fr 13.798683 14.37019 16.344731 14.468775 14.697610 48.62923 100 +# pt 13.789674 14.36244 15.706321 14.443772 14.679905 44.76701 100 +``` + +Currently supported are German, Dutch, Spanish, Italian, French and Portuguese. diff --git a/vignettes/freq.Rmd b/vignettes/freq.Rmd index cbf22f04..250a51f9 100644 --- a/vignettes/freq.Rmd +++ b/vignettes/freq.Rmd @@ -22,8 +22,6 @@ knitr::opts_chunk$set( fig.width = 7.5, fig.height = 4.5 ) -# set to original language (English) -Sys.setlocale(locale = "C") library(dplyr) library(AMR) ``` diff --git a/vignettes/mo_property.Rmd b/vignettes/mo_property.Rmd index 4ccbbdeb..71aee34c 100755 --- a/vignettes/mo_property.Rmd +++ b/vignettes/mo_property.Rmd @@ -18,8 +18,6 @@ knitr::opts_chunk$set( collapse = TRUE, comment = "#" ) -# set to original language (English) -Sys.setlocale(locale = "C") ``` *(will be available soon)*