diff --git a/DESCRIPTION b/DESCRIPTION index e5a0937b..c44e49c6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 1.2.0.9013 -Date: 2020-06-22 +Version: 1.2.0.9014 +Date: 2020-06-25 Title: Antimicrobial Resistance Analysis Authors@R: c( person(role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index c3daabc8..ebe195af 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,12 @@ -# AMR 1.2.0.9013 -## Last updated: 22-Jun-2020 +# AMR 1.2.0.9014 +## Last updated: 25-Jun-2020 ### New +* Function `ab_from_text()` to retrieve antimicrobial drugs from clinical texts in e.g. health care records, which also corrects for misspelling since it uses `as.ab()` internally: + ```r + ab_from_text("28/03/2020 regular amoxiciliin 500mg po tds") + #> [1] "Amoxicillin" + ``` * [Tidyverse selections](https://tidyselect.r-lib.org/reference/language.html) for antibiotic classes, that help to select the columns of antibiotics that are of a specific antibiotic class, without the need to define the columns or antibiotic abbreviations. They can be used in any function that allows Tidyverse selections, like `dplyr::select()` and `tidyr::pivot_longer()`: ```r library(dplyr) @@ -26,6 +31,7 @@ * Added antibiotics code "FOX1" for cefoxitin screening (abbreviation "cfsc") to the `antibiotics` data set ### Changed +* Fixed a bug for using `susceptibility` or `resistance()` outside `summarise()` * Fixed a bug where `eucast_rules()` would not work on a tibble when the `tibble` or `dplyr` package was loaded * All `*_join_microorganisms()` functions and `bug_drug_combinations()` now return the original data class (e.g. `tibble`s and `data.table`s) * Fixed a bug where `as.ab()` would return an error on invalid input values @@ -34,6 +40,8 @@ * Fixed a bug in `bug_drug_combinations()` for when only one antibiotic was in the input data * Changed the summary for class ``, to highlight the %SI vs. %R * Improved error handling, giving more useful info when functions return an error +* Algorithm improvements to `as.ab()` +* Added Monuril as trade name for fosfomycin # AMR 1.2.0 diff --git a/R/ab.R b/R/ab.R index baf11e39..31da9d82 100755 --- a/R/ab.R +++ b/R/ab.R @@ -30,6 +30,7 @@ #' @details All entries in the [antibiotics] data set have three different identifiers: a human readable EARS-Net code (column `ab`, used by ECDC and WHONET), an ATC code (column `atc`, used by WHO), and a CID code (column `cid`, Compound ID, used by PubChem). The data set contains more than 5,000 official brand names from many different countries, as found in PubChem. #' #' Use the [ab_property()] functions to get properties based on the returned antibiotic ID, see Examples. +#' #' @section Source: #' World Health Organization (WHO) Collaborating Centre for Drug Statistics Methodology: \url{https://www.whocc.no/atc_ddd_index/} #' @@ -38,7 +39,9 @@ #' European Commission Public Health PHARMACEUTICALS - COMMUNITY REGISTER: \url{http://ec.europa.eu/health/documents/community-register/html/atc.htm} #' @aliases ab #' @return Character (vector) with class [`ab`]. Unknown values will return `NA`. -#' @seealso [antibiotics] for the dataframe that is being used to determine ATCs. +#' @seealso +#' * [antibiotics] for the dataframe that is being used to determine ATCs +#' * [ab_from_text()] for a function to retrieve antimicrobial drugs from clinical text (from health care records) #' @inheritSection AMR Read more on our website! #' @export #' @examples @@ -72,6 +75,9 @@ as.ab <- function(x, ...) { return(x) } + initial <- is.null(list(...)$initial) + already_regex <- isTRUE(list(...)$already_regex) + if (all(toupper(x) %in% antibiotics$ab)) { # valid AB code, but not yet right class return(structure(.Data = toupper(x), @@ -79,26 +85,30 @@ as.ab <- function(x, ...) { } x_bak <- x + x <- toupper(x) # remove diacritics x <- iconv(x, from = "UTF-8", to = "ASCII//TRANSLIT") x <- gsub('"', "", x, fixed = TRUE) - # remove suffices - x_bak_clean <- gsub("_(mic|rsi|dis[ck])$", "", x, ignore.case = TRUE) - # remove disk concentrations, like LVX_NM -> LVX - x_bak_clean <- gsub("_[A-Z]{2}[0-9_.]{0,3}$", "", x_bak_clean, ignore.case = TRUE) - # remove part between brackets if that's followed by another string - x_bak_clean <- gsub("(.*)+ [(].*[)]", "\\1", x_bak_clean) - # keep only max 1 space - x_bak_clean <- trimws(gsub(" +", " ", x_bak_clean, ignore.case = TRUE)) - # non-character, space or number should be a slash - x_bak_clean <- gsub("[^A-Za-z0-9 -]", "/", x_bak_clean) - # spaces around non-characters must be removed: amox + clav -> amox/clav - x_bak_clean <- gsub("(.*[a-zA-Z0-9]) ([^a-zA-Z0-9].*)", "\\1\\2", x_bak_clean) - x_bak_clean <- gsub("(.*[^a-zA-Z0-9]) ([a-zA-Z0-9].*)", "\\1\\2", x_bak_clean) - # remove hyphen after a starting "co" - x_bak_clean <- gsub("^co-", "co", x_bak_clean, ignore.case = TRUE) - # replace text 'and' with a slash - x_bak_clean <- gsub(" and ", "/", x_bak_clean, ignore.case = TRUE) + x_bak_clean <- x + if (already_regex == FALSE) { + # remove suffices + x_bak_clean <- gsub("_(MIC|RSI|DIS[CK])$", "", x_bak_clean) + # remove disk concentrations, like LVX_NM -> LVX + x_bak_clean <- gsub("_[A-Z]{2}[0-9_.]{0,3}$", "", x_bak_clean) + # remove part between brackets if that's followed by another string + x_bak_clean <- gsub("(.*)+ [(].*[)]", "\\1", x_bak_clean) + # keep only max 1 space + x_bak_clean <- trimws(gsub(" +", " ", x_bak_clean)) + # non-character, space or number should be a slash + x_bak_clean <- gsub("[^A-Z0-9 -]", "/", x_bak_clean) + # spaces around non-characters must be removed: amox + clav -> amox/clav + x_bak_clean <- gsub("(.*[A-Z0-9]) ([^A-Z0-9].*)", "\\1\\2", x_bak_clean) + x_bak_clean <- gsub("(.*[^A-Z0-9]) ([A-Z0-9].*)", "\\1\\2", x_bak_clean) + # remove hyphen after a starting "co" + x_bak_clean <- gsub("^CO-", "CO", x_bak_clean) + # replace text 'and' with a slash + x_bak_clean <- gsub(" AND ", "/", x_bak_clean) + } x <- unique(x_bak_clean) x_new <- rep(NA_character_, length(x)) @@ -118,14 +128,14 @@ as.ab <- function(x, ...) { } # exact AB code - found <- antibiotics[which(antibiotics$ab == toupper(x[i])), ]$ab + found <- antibiotics[which(antibiotics$ab == x[i]), ]$ab if (length(found) > 0) { x_new[i] <- found[1L] next } # exact ATC code - found <- antibiotics[which(antibiotics$atc == toupper(x[i])), ]$ab + found <- antibiotics[which(antibiotics$atc == x[i]), ]$ab if (length(found) > 0) { x_new[i] <- found[1L] next @@ -139,7 +149,7 @@ as.ab <- function(x, ...) { } # exact name - found <- antibiotics[which(toupper(antibiotics$name) == toupper(x[i])), ]$ab + found <- antibiotics[which(toupper(antibiotics$name) == x[i]), ]$ab if (length(found) > 0) { x_new[i] <- found[1L] next @@ -147,11 +157,7 @@ as.ab <- function(x, ...) { # exact LOINC code loinc_found <- unlist(lapply(antibiotics$loinc, - function(s) if (x[i] %in% s) { - TRUE - } else { - FALSE - })) + function(s) x[i] %in% s)) found <- antibiotics$ab[loinc_found == TRUE] if (length(found) > 0) { x_new[i] <- found[1L] @@ -160,11 +166,7 @@ as.ab <- function(x, ...) { # exact synonym synonym_found <- unlist(lapply(antibiotics$synonyms, - function(s) if (toupper(x[i]) %in% toupper(s)) { - TRUE - } else { - FALSE - })) + function(s) x[i] %in% toupper(s))) found <- antibiotics$ab[synonym_found == TRUE] if (length(found) > 0) { x_new[i] <- found[1L] @@ -173,90 +175,87 @@ as.ab <- function(x, ...) { # exact abbreviation abbr_found <- unlist(lapply(antibiotics$abbreviations, - function(a) if (toupper(x[i]) %in% toupper(a)) { - TRUE - } else { - FALSE - })) + function(a) x[i] %in% toupper(a))) found <- antibiotics$ab[abbr_found == TRUE] if (length(found) > 0) { x_new[i] <- found[1L] next } - # first >=4 characters of name - if (nchar(x[i]) >= 4) { - found <- antibiotics[which(toupper(antibiotics$name) %like% paste0("^", x[i])), ]$ab - if (length(found) > 0) { - x_new[i] <- found[1L] - next - } - } - # allow characters that resemble others, but only continue when having more than 3 characters if (nchar(x[i]) <= 3) { x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1]) next } - x_spelling <- tolower(x[i]) - x_spelling <- gsub("[iy]+", "[iy]+", x_spelling) - x_spelling <- gsub("(c|k|q|qu|s|z|x|ks)+", "(c|k|q|qu|s|z|x|ks)+", x_spelling) - x_spelling <- gsub("(ph|f|v)+", "(ph|f|v)+", x_spelling) - x_spelling <- gsub("(th|t)+", "(th|t)+", x_spelling) - x_spelling <- gsub("a+", "a+", x_spelling) - x_spelling <- gsub("e+", "e+", x_spelling) - x_spelling <- gsub("o+", "o+", x_spelling) - # allow any ending of -in/-ine and -im/-ime - x_spelling <- gsub("(\\[iy\\]\\+(n|m)|\\[iy\\]\\+(n|m)e\\+)$", "[iy]+(n|m)e*", x_spelling) - # allow any ending of -ol/-ole - x_spelling <- gsub("(o\\+l|o\\+le\\+)$", "o+le*", x_spelling) - # allow any ending of -on/-one - x_spelling <- gsub("(o\\+n|o\\+ne\\+)$", "o+ne*", x_spelling) - # replace multiple same characters to single one with '+', like "ll" -> "l+" - x_spelling <- gsub("(.)\\1+", "\\1+", x_spelling) - # replace spaces and slashes with a possibility on both - x_spelling <- gsub("[ /]", "( .*|.*/)", x_spelling) - + x_spelling <- x[i] + if (already_regex == FALSE) { + x_spelling <- gsub("[IY]+", "[IY]+", x_spelling) + x_spelling <- gsub("(C|K|Q|QU|S|Z|X|KS)+", "(C|K|Q|QU|S|Z|X|KS)+", x_spelling) + x_spelling <- gsub("(PH|F|V)+", "(PH|F|V)+", x_spelling) + x_spelling <- gsub("(TH|T)+", "(TH|T)+", x_spelling) + x_spelling <- gsub("A+", "A+", x_spelling) + x_spelling <- gsub("E+", "E+", x_spelling) + x_spelling <- gsub("O+", "O+", x_spelling) + # allow any ending of -in/-ine and -im/-ime + x_spelling <- gsub("(\\[IY\\]\\+(N|M)|\\[IY\\]\\+(N|M)E\\+)$", "[IY]+(N|M)E*", x_spelling) + # allow any ending of -ol/-ole + x_spelling <- gsub("(O\\+L|O\\+LE\\+)$", "O+LE*", x_spelling) + # allow any ending of -on/-one + x_spelling <- gsub("(O\\+N|O\\+NE\\+)$", "O+NE*", x_spelling) + # replace multiple same characters to single one with '+', like "ll" -> "l+" + x_spelling <- gsub("(.)\\1+", "\\1+", x_spelling) + # replace spaces and slashes with a possibility on both + x_spelling <- gsub("[ /]", "( .*|.*/)", x_spelling) + # correct for digital reading text (OCR) + x_spelling <- gsub("[NRD]", "[NRD]", x_spelling) + } + # try if name starts with it found <- antibiotics[which(antibiotics$name %like% paste0("^", x_spelling)), ]$ab if (length(found) > 0) { x_new[i] <- found[1L] next } + # try if name ends with it + found <- antibiotics[which(antibiotics$name %like% paste0(x_spelling, "$")), ]$ab + if (nchar(x[i]) >= 4 & length(found) > 0) { + x_new[i] <- found[1L] + next + } # and try if any synonym starts with it synonym_found <- unlist(lapply(antibiotics$synonyms, - function(s) if (any(s %like% paste0("^", x_spelling))) { - TRUE - } else { - FALSE - })) + function(s) any(s %like% paste0("^", x_spelling)))) found <- antibiotics$ab[synonym_found == TRUE] if (length(found) > 0) { x_new[i] <- found[1L] next } - - # try by removing all spaces - if (x[i] %like% " ") { - found <- suppressWarnings(as.ab(gsub(" +", "", x[i]))) - if (length(found) > 0 & !is.na(found)) { - x_new[i] <- found[1L] - next + + # INITIAL - More uncertain results ---- + + if (initial == TRUE) { + # only run on first try + + # try by removing all spaces + if (x[i] %like% " ") { + found <- suppressWarnings(as.ab(gsub(" +", "", x[i]), initial = FALSE)) + if (length(found) > 0 & !is.na(found)) { + x_new[i] <- found[1L] + next + } } - } - - # try by removing all spaces and numbers - if (x[i] %like% " " | x[i] %like% "[0-9]") { - found <- suppressWarnings(as.ab(gsub("[ 0-9]", "", x[i]))) - if (length(found) > 0 & !is.na(found)) { - x_new[i] <- found[1L] - next + + # try by removing all spaces and numbers + if (x[i] %like% " " | x[i] %like% "[0-9]") { + found <- suppressWarnings(as.ab(gsub("[ 0-9]", "", x[i]), initial = FALSE)) + if (length(found) > 0 & !is.na(found)) { + x_new[i] <- found[1L] + next + } } - } - - if (!isFALSE(list(...)$initial_search)) { + # transform back from other languages and try again - x_translated <- paste(lapply(strsplit(x[i], "[^a-zA-Z0-9 ]"), + x_translated <- paste(lapply(strsplit(x[i], "[^A-Z0-9 ]"), function(y) { for (i in seq_len(length(y))) { y[i] <- ifelse(tolower(y[i]) %in% tolower(translations_file$replacement), @@ -267,41 +266,102 @@ as.ab <- function(x, ...) { y })[[1]], collapse = "/") - x_translated_guess <- suppressWarnings(as.ab(x_translated, initial_search = FALSE)) + x_translated_guess <- suppressWarnings(as.ab(x_translated, initial = FALSE)) if (!is.na(x_translated_guess)) { x_new[i] <- x_translated_guess next } - if (!isFALSE(list(...)$initial_search2)) { - # now also try to coerce brandname combinations like "Amoxy/clavulanic acid" - x_translated <- paste(lapply(strsplit(x_translated, "[^a-zA-Z0-9 ]"), - function(y) { - for (i in seq_len(length(y))) { - y_name <- suppressWarnings(ab_name(y[i], language = NULL, initial_search = FALSE, initial_search2 = FALSE)) - y[i] <- ifelse(!is.na(y_name), - y_name, - y[i]) - } - y - })[[1]], - collapse = "/") - x_translated_guess <- suppressWarnings(as.ab(x_translated, initial_search = FALSE)) - if (!is.na(x_translated_guess)) { - x_new[i] <- x_translated_guess + # now also try to coerce brandname combinations like "Amoxy/clavulanic acid" + x_translated <- paste(lapply(strsplit(x_translated, "[^A-Z0-9 ]"), + function(y) { + for (i in seq_len(length(y))) { + y_name <- suppressWarnings(ab_name(y[i], language = NULL, initial = FALSE)) + y[i] <- ifelse(!is.na(y_name), + y_name, + y[i]) + } + y + })[[1]], + collapse = "/") + x_translated_guess <- suppressWarnings(as.ab(x_translated, initial = FALSE)) + if (!is.na(x_translated_guess)) { + x_new[i] <- x_translated_guess + next + } + + # try by removing all trailing capitals + if (x[i] %like_case% "[a-z]+[A-Z]+$") { + found <- suppressWarnings(as.ab(gsub("[A-Z]+$", "", x[i]), initial = FALSE)) + if (!is.na(found)) { + x_new[i] <- found[1L] next } } - } - - # try by removing all trailing capitals - if (x[i] %like_case% "[a-z]+[A-Z]+$") { - found <- suppressWarnings(as.ab(gsub("[A-Z]+$", "", x[i]))) - if (length(found) > 0 & !is.na(found)) { + + # keep only letters + found <- suppressWarnings(as.ab(gsub("[^A-Z]", "", x[i]), initial = FALSE)) + if (!is.na(found)) { x_new[i] <- found[1L] next } - } + + # try from a bigger text, like from a health care record, see ?ab_from_text + found <- suppressWarnings(ab_from_text(x[i], initial = FALSE, translate_ab = FALSE)[1L]) + if (!is.na(found)) { + x_new[i] <- found[1L] + next + } + + # first 5 except for cephalosporins, then first 7 (those cephalosporins all start quite the same!) + found <- suppressWarnings(as.ab(substr(x[i], 1, 5), initial = FALSE)) + if (!is.na(found) && !ab_group(found, initial = FALSE) %like% "cephalosporins") { + x_new[i] <- found[1L] + next + } + found <- suppressWarnings(as.ab(substr(x[i], 1, 7), initial = FALSE)) + if (!is.na(found)) { + x_new[i] <- found[1L] + next + } + + # make all consonants facultative + search_str <- gsub("([BCDFGHJKLMNPQRSTVWXZ])", "\\1*", x[i]) + found <- suppressWarnings(as.ab(search_str, initial = FALSE, already_regex = TRUE)) + # keep at least 4 normal characters + if (nchar(gsub(".\\*", "", search_str)) < 4) { + found <- NA + } + if (!is.na(found)) { + x_new[i] <- found[1L] + next + } + + # make all vowels facultative + search_str <- gsub("([AEIOUY])", "\\1*", x[i]) + found <- suppressWarnings(as.ab(search_str, initial = FALSE, already_regex = TRUE)) + # keep at least 5 normal characters + if (nchar(gsub(".\\*", "", search_str)) < 5) { + found <- NA + } + if (!is.na(found)) { + x_new[i] <- found[1L] + next + } + + # allow misspelling of vowels + x_spelling <- gsub("A+", "[AEIOU]+", x_spelling, fixed = TRUE) + x_spelling <- gsub("E+", "[AEIOU]+", x_spelling, fixed = TRUE) + x_spelling <- gsub("I+", "[AEIOU]+", x_spelling, fixed = TRUE) + x_spelling <- gsub("O+", "[AEIOU]+", x_spelling, fixed = TRUE) + x_spelling <- gsub("U+", "[AEIOU]+", x_spelling, fixed = TRUE) + found <- suppressWarnings(as.ab(x_spelling, initial = FALSE, already_regex = TRUE)) + if (!is.na(found)) { + x_new[i] <- found[1L] + next + } + + } # end of initial = TRUE # not found x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1]) @@ -316,7 +376,7 @@ as.ab <- function(x, ...) { ".", call. = FALSE) } - + if (length(x_unknown) > 0) { warning("These values could not be coerced to a valid antimicrobial ID: ", paste('"', sort(unique(x_unknown)), '"', sep = "", collapse = ", "), diff --git a/R/ab_from_text.R b/R/ab_from_text.R new file mode 100644 index 00000000..2edef087 --- /dev/null +++ b/R/ab_from_text.R @@ -0,0 +1,74 @@ +# ==================================================================== # +# TITLE # +# Antimicrobial Resistance (AMR) Analysis # +# # +# SOURCE # +# https://gitlab.com/msberends/AMR # +# # +# LICENCE # +# (c) 2018-2020 Berends MS, Luz CF et al. # +# # +# This R package is free software; you can freely use and distribute # +# it for both personal and commercial purposes under the terms of the # +# GNU General Public License version 2.0 (GNU GPL-2), as published by # +# the Free Software Foundation. # +# # +# We created this package for both routine data analysis and academic # +# research and it was publicly released in the hope that it will be # +# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. # +# Visit our website for more info: https://msberends.gitlab.io/AMR. # +# ==================================================================== # + +#' Retrieve antimicrobial drugs from text +#' +#' Use this function on e.g. clinical texts from health care records. It returns a vector of antimicrobial drugs found in the texts. +#' @param text text to analyse +#' @param collapse character to pass on to `paste(..., collapse = ...)` to only return one character per element of `text`, see Examples +#' @param translate_ab a column name of the [antibiotics] data set to translate the antibiotic abbreviations to, using [ab_property()]. Defaults to "name", which is equal to using `TRUE`. Use a value `FALSE`, `NULL` or `NA` to prevent translation of the `` code. +#' @param ... parameters passed on to [as.ab()] +#' @details To use this for creating a new variable in a data set (e.g. with `mutate()`), it could be convenient to paste the outcome together with the `collapse` parameter so every value in your new variable will be a character of length 1:\cr +#' `df %>% mutate(abx = ab_from_text(clinical_text, collapse = "|"))` +#' +#' This function is also internally used by [as.ab()], although it then only returns the first hit. +#' @examples +#' # mind the bad spelling of amoxicillin in this line, +#' # straight from a true health care record: +#' ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds") +#' +#' ab_from_text("administered amoxi/clav and cipro") +#' ab_from_text("administered amoxi/clav and cipro", collapse = ", ") +#' +#' # if you want to know which antibiotic groups were administered, check it: +#' abx <- ab_from_text("administered amoxi/clav and cipro") +#' ab_group(abx) +ab_from_text <- function(text, collapse = NULL, translate_ab = "name", ...) { + + text <- tolower(text) + + abbr <- unlist(antibiotics$abbreviations) + abbr <- abbr[nchar(abbr) >= 4] + names <- substr(antibiotics$name, 1, 5) + synonyms <- unlist(antibiotics$synonyms) + synonyms <- synonyms[nchar(synonyms) >= 4] + to_regex <- function(x) { + paste0("^(", + paste0(unique(gsub("[^a-z0-9]", ".*", sort(tolower(x)))), collapse = "|"), + ").*") + } + + text_split <- unlist(strsplit(text, "[ ;.,:/\\|-]")) + result <- as.ab(unique(c(text_split[grep(to_regex(abbr), text_split)], + text_split[grep(to_regex(names), text_split)], + # regular expression must not be too long, so split synonyms in two: + text_split[grep(to_regex(synonyms[c(1:0.5 * length(synonyms))]), text_split)], + text_split[grep(to_regex(synonyms[c(0.5 * length(synonyms):length(synonyms))]), text_split)])), + ...) + translate_ab <- get_translate_ab(translate_ab) + if (!isFALSE(translate_ab)) { + result <- ab_property(result, property = translate_ab) + } + if (!is.null(collapse)) { + result <- paste0(result, collapse = collapse) + } + result +} diff --git a/R/like.R b/R/like.R index 964fea10..68d38c4f 100755 --- a/R/like.R +++ b/R/like.R @@ -64,7 +64,7 @@ #' } like <- function(x, pattern, ignore.case = TRUE) { # set to fixed if no regex found - fixed <- all(!grepl("[$.^*?+}{|)(]", pattern)) + fixed <- all(!grepl("[\\[$.^*?+-}{|)(]", pattern)) if (ignore.case == TRUE) { # set here, otherwise if fixed = TRUE, this warning will be thrown: argument 'ignore.case = TRUE' will be ignored x <- tolower(x) diff --git a/R/proportion.R b/R/proportion.R index 3bae3a7a..5392b2d1 100755 --- a/R/proportion.R +++ b/R/proportion.R @@ -30,7 +30,7 @@ #' @param as_percent a logical to indicate whether the output must be returned as a hundred fold with % sign (a character). A value of `0.123456` will then be returned as `"12.3%"`. #' @param only_all_tested (for combination therapies, i.e. using more than one variable for `...`): a logical to indicate that isolates must be tested for all antibiotics, see section *Combination therapy* below #' @param data a [`data.frame`] containing columns with class [`rsi`] (see [as.rsi()]) -#' @param translate_ab a column name of the [antibiotics] data set to translate the antibiotic abbreviations to, using [ab_property()] +#' @param translate_ab a column name of the [antibiotics] data set to translate the antibiotic abbreviations to, using [ab_property()]. Use a value #' @inheritParams ab_property #' @param combine_SI a logical to indicate whether all values of S and I must be merged into one, so the output only consists of S+I vs. R (susceptible vs. resistant). This used to be the parameter `combine_IR`, but this now follows the redefinition by EUCAST about the interpretion of I (increased exposure) in 2019, see section 'Interpretation of S, I and R' below. Default is `TRUE`. #' @param combine_IR a logical to indicate whether all values of I and R must be merged into one, so the output only consists of S vs. I+R (susceptible vs. non-susceptible). This is outdated, see parameter `combine_SI`. diff --git a/R/rsi_calc.R b/R/rsi_calc.R index 1a182f62..ee1ada71 100755 --- a/R/rsi_calc.R +++ b/R/rsi_calc.R @@ -56,8 +56,10 @@ rsi_calc <- function(..., # for complete data.frames, like example_isolates %>% select(AMC, GEN) %>% proportion_S() # and the old rsi function, which has "df" as name of the first parameter x <- dots_df + } else if (length(dots) == 1 | all(!dots %in% colnames(dots_df))) { + x <- dots_df } else { - x <- dots_df[, dots[dots %in% colnames(dots_df)]] + x <- dots_df[, dots[dots %in% colnames(dots_df)], drop = FALSE] } } else if (ndots == 1) { # only 1 variable passed (can also be data.frame), like: proportion_S(example_isolates$AMC) and example_isolates$AMC %>% proportion_S() @@ -111,7 +113,7 @@ rsi_calc <- function(..., base::all(y %in% other_values) & base::any(is.na(y)) }) numerator <- sum(as.logical(by(x, seq_len(nrow(x)), function(row) any(unlist(row) %in% ab_result, na.rm = TRUE)))) - denominator <- nrow(x[!other_values_filter, ]) + denominator <- nrow(x[!other_values_filter, , drop = FALSE]) } } else { # x is not a data.frame @@ -168,9 +170,7 @@ rsi_calc_df <- function(type, # "proportion", "count" or "both" } stop_if(isTRUE(combine_SI) & isTRUE(combine_IR), "either `combine_SI` or `combine_IR` can be TRUE, not both", call = -2) - if (as.character(translate_ab) %in% c("TRUE", "official")) { - translate_ab <- "name" - } + translate_ab <- get_translate_ab(translate_ab) # select only groups and antibiotics if (has_groups(data)) { @@ -292,3 +292,19 @@ rsi_calc_df <- function(type, # "proportion", "count" or "both" rownames(out) <- NULL out } + +get_translate_ab <- function(translate_ab) { + translate_ab <- as.character(translate_ab)[1L] + if (translate_ab %in% c("TRUE", "official")) { + return("name") + } else if (translate_ab %in% c(NA_character_, "FALSE")) { + return(FALSE) + } else { + translate_ab <- tolower(translate_ab) + stop_ifnot(translate_ab %in% colnames(AMR::antibiotics), + "invalid value for 'translate_ab', this must be a column name of the antibiotics data set\n", + "or TRUE (equals 'name') or FALSE to not translate at all.", + call = FALSE) + translate_ab + } +} diff --git a/data-raw/antibiotics.txt b/data-raw/antibiotics.txt index 78647a6a..92a57f41 100644 --- a/data-raw/antibiotics.txt +++ b/data-raw/antibiotics.txt @@ -200,7 +200,7 @@ "FLM" "J01MB07" 3374 "Flumequine" "Quinolones" "Quinolone antibacterials" "Other quinolones" "" "c(\"apurone\", \"fantacin\", \"flumequine\", \"flumequino\", \"flumequinum\", \"flumigal\", \"flumiquil\", \"flumisol\", \"flumix\", \"imequyl\")" 1.2 "g" "character(0)" "FLR1" "J01FA14" 71260 "Flurithromycin" "Macrolides/lincosamides" "Macrolides, lincosamides and streptogramins" "Macrolides" "" "c(\"flurithromicina\", \"flurithromycime\", \"flurithromycin\", \"flurithromycine\", \"flurithromycinum\", \"fluritromicina\", \"fluritromycinum\", \"flurizic\")" 0.75 "g" "character(0)" "FFL" 214356 "Fosfluconazole" "Antifungals/antimycotics" "" "c(\"fosfluconazole\", \"phosfluconazole\", \"procif\", \"prodif\")" "character(0)" -"FOS" "J01XX01" 446987 "Fosfomycin" "Other antibacterials" "Other antibacterials" "Other antibacterials" "c(\"ff\", \"fm\", \"fo\", \"fos\", \"fosf\")" "c(\"fosfocina\", \"fosfomicina\", \"fosfomycin\", \"fosfomycin sodium\", \"fosfomycine\", \"fosfomycinum\", \"fosfonomycin\", \"phosphonemycin\", \"phosphonomycin\", \"veramina\")" 3 "g" 8 "g" "character(0)" +"FOS" "J01XX01" 446987 "Fosfomycin" "Other antibacterials" "Other antibacterials" "Other antibacterials" "c(\"ff\", \"fm\", \"fo\", \"fos\", \"fosf\")" "c(\"fosfocina\", \"fosfomicina\", \"fosfomycin\", \"fosfomycin sodium\", \"fosfomycine\", \"fosfomycinum\", \"fosfonomycin\", \"monuril\", \"monurol\", \"phosphonemycin\", \"phosphonomycin\", \"veramina\")" 3 "g" 8 "g" "character(0)" "FMD" 572 "Fosmidomycin" "Other antibacterials" "" "c(\"fosmidomycin\", \"fosmidomycina\", \"fosmidomycine\", \"fosmidomycinum\")" "character(0)" "FRM" 8378 "Framycetin" "Aminoglycosides" "c(\"\", \"fram\")" "c(\"actilin\", \"actiline\", \"antibiotique\", \"bycomycin\", \"endomixin\", \"enterfram\", \"fradiomycin\", \"fradiomycin b\", \"fradiomycinum\", \"framicetina\", \"framycetin\", \"framycetin sulfate\", \"framycetine\", \"framycetinum\", \"framygen\", \"fraquinol\", \"jernadex\", \"myacine\", \"myacyne\", \"mycerin\", \"mycifradin\", \"neobrettin\", \"neolate\", \"neomas\", \"neomcin\", \"neomicina\", \"neomin\", \"neomycin\", \"neomycin b\", \"neomycin b sulfate\", \"neomycin solution\", \"neomycin sulfate\", \"neomycin sulphate\", \"neomycinb\", \"neomycine\", \"neomycinum\", \"nivemycin\", \"pimavecort\", \"soframycin\", \"soframycine\", \"tuttomycin\", \"vonamycin\", \"vonamycin powder v\")" "character(0)" diff --git a/data-raw/reproduction_of_antibiotics.R b/data-raw/reproduction_of_antibiotics.R index 3f065606..fac3b94f 100644 --- a/data-raw/reproduction_of_antibiotics.R +++ b/data-raw/reproduction_of_antibiotics.R @@ -488,6 +488,10 @@ antibiotics[which(antibiotics$ab == "SLT4"), "abbreviations"][[1]] <- list(c(ant antibiotics[which(antibiotics$ab == "SXT"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "SXT"), "abbreviations"][[1]], "trsx")) antibiotics[which(antibiotics$ab == "VAN"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "VAN"), "abbreviations"][[1]], "vanc")) antibiotics[which(antibiotics$ab == "VOR"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "VOR"), "abbreviations"][[1]], "vori")) + +antibiotics[which(antibiotics$ab == "FOS"), "synonyms"][[1]] <- list(sort(c(antibiotics[which(antibiotics$ab == "FOS"), "synonyms"][[1]], "Monuril"))) +antibiotics[which(antibiotics$ab == "FOS"), "synonyms"][[1]] <- list(sort(c(antibiotics[which(antibiotics$ab == "FOS"), "synonyms"][[1]], "Monurol"))) + antibiotics <- antibiotics %>% mutate(ab = as.character(ab)) %>% rbind(antibiotics %>% diff --git a/data/antibiotics.rda b/data/antibiotics.rda index bed5645a..ca57f4df 100755 Binary files a/data/antibiotics.rda and b/data/antibiotics.rda differ diff --git a/docs/404.html b/docs/404.html index 0a28af01..d99ed4d5 100644 --- a/docs/404.html +++ b/docs/404.html @@ -81,7 +81,7 @@ AMR (for R) - 1.2.0.9013 + 1.2.0.9014 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 5887367b..a3c366c0 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -81,7 +81,7 @@ AMR (for R) - 1.2.0.9013 + 1.2.0.9014 diff --git a/docs/articles/index.html b/docs/articles/index.html index d5bd305a..d8d1ea9f 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.2.0.9013 + 1.2.0.9014 diff --git a/docs/authors.html b/docs/authors.html index d89dfd3c..add975f4 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -81,7 +81,7 @@ AMR (for R) - 1.2.0.9013 + 1.2.0.9014 diff --git a/docs/index.html b/docs/index.html index 7e872990..62dd4c9e 100644 --- a/docs/index.html +++ b/docs/index.html @@ -43,7 +43,7 @@ AMR (for R) - 1.2.0.9013 + 1.2.0.9014 diff --git a/docs/news/index.html b/docs/news/index.html index 88cd6c66..fe6d6366 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.2.0.9013 + 1.2.0.9014 @@ -229,21 +229,26 @@ Source: NEWS.md -
-

-AMR 1.2.0.9013 Unreleased +
+

+AMR 1.2.0.9014 Unreleased

-
+

-Last updated: 22-Jun-2020 +Last updated: 25-Jun-2020

New

  • +

    Function ab_from_text() to retrieve antimicrobial drugs from clinical texts in e.g. health care records, which also corrects for misspelling since it uses as.ab() internally:

    +
    ab_from_text("28/03/2020 regular amoxiciliin 500mg po tds")
    +#> [1] "Amoxicillin"
    +
  • +
  • Tidyverse selections for antibiotic classes, that help to select the columns of antibiotics that are of a specific antibiotic class, without the need to define the columns or antibiotic abbreviations. They can be used in any function that allows Tidyverse selections, like dplyr::select() and tidyr::pivot_longer():

    -
    library(dplyr)
    +
    library(dplyr)
     
     # Columns 'IPM' and 'MEM' are in the example_isolates data set
     example_isolates %>%
    @@ -269,6 +274,8 @@
     

    Changed

      +
    • Fixed a bug for using susceptibility or resistance() outside summarise() +
    • Fixed a bug where eucast_rules() would not work on a tibble when the tibble or dplyr package was loaded
    • All *_join_microorganisms() functions and bug_drug_combinations() now return the original data class (e.g. tibbles and data.tables)
    • Fixed a bug where as.ab() would return an error on invalid input values
    • @@ -278,6 +285,9 @@
    • Fixed a bug in bug_drug_combinations() for when only one antibiotic was in the input data
    • Changed the summary for class <mo>, to highlight the %SI vs. %R
    • Improved error handling, giving more useful info when functions return an error
    • +
    • Algorithm improvements to as.ab() +
    • +
    • Added Monuril as trade name for fosfomycin
    @@ -399,7 +409,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
  • Fixed important floating point error for some MIC comparisons in EUCAST 2020 guideline

  • Interpretation from MIC values (and disk zones) to R/SI can now be used with mutate_at() of the dplyr package:

    -
    yourdata %>%
    +
    yourdata %>%
       mutate_at(vars(antibiotic1:antibiotic25), as.rsi, mo = "E. coli")
     
     yourdata %>%
    @@ -426,7 +436,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
     
    • Support for LOINC codes in the antibiotics data set. Use ab_loinc() to retrieve LOINC codes, or use a LOINC code for input in any ab_* function:

      -
      ab_loinc("ampicillin")
      +
      ab_loinc("ampicillin")
       #> [1] "21066-6" "3355-5"  "33562-0" "33919-2" "43883-8" "43884-6" "87604-5"
       ab_name("21066-6")
       #> [1] "Ampicillin"
      @@ -435,7 +445,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
       
    • Support for SNOMED CT codes in the microorganisms data set. Use mo_snomed() to retrieve SNOMED codes, or use a SNOMED code for input in any mo_* function:

      -
      mo_snomed("S. aureus")
      +
      mo_snomed("S. aureus")
       #> [1] 115329001   3092008 113961008
       mo_name(115329001)
       #> [1] "Staphylococcus aureus"
      @@ -498,9 +508,9 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
       
      • If you were dependent on the old Enterobacteriaceae family e.g. by using in your code:

        -
        if (mo_family(somebugs) == "Enterobacteriaceae") ...
        +
        if (mo_family(somebugs) == "Enterobacteriaceae") ...

        then please adjust this to:

        -
        if (mo_order(somebugs) == "Enterobacterales") ...
        +
        if (mo_order(somebugs) == "Enterobacterales") ...
    • @@ -512,7 +522,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
      • Functions susceptibility() and resistance() as aliases of proportion_SI() and proportion_R(), respectively. These functions were added to make it more clear that “I” should be considered susceptible and not resistant.

        -
        library(dplyr)
        +
        library(dplyr)
         example_isolates %>%
           group_by(bug = mo_name(mo)) %>%
           summarise(amoxicillin = resistance(AMX),
        @@ -539,7 +549,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
         
      • More intelligent way of coping with some consonants like “l” and “r”

      • Added a score (a certainty percentage) to mo_uncertainties(), that is calculated using the Levenshtein distance:

        -
        as.mo(c("Stafylococcus aureus",
        +
        as.mo(c("Stafylococcus aureus",
                 "staphylokok aureuz"))
         #> Warning: 
         #> Results of two values were guessed with uncertainty. Use mo_uncertainties() to review them.
        @@ -596,12 +606,12 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
         
        • Determination of first isolates now excludes all ‘unknown’ microorganisms at default, i.e. microbial code "UNKNOWN". They can be included with the new parameter include_unknown:

          -
          first_isolate(..., include_unknown = TRUE)
          +
          first_isolate(..., include_unknown = TRUE)

          For WHONET users, this means that all records/isolates with organism code "con" (contamination) will be excluded at default, since as.mo("con") = "UNKNOWN". The function always shows a note with the number of ‘unknown’ microorganisms that were included or excluded.

        • For code consistency, classes ab and mo will now be preserved in any subsetting or assignment. For the sake of data integrity, this means that invalid assignments will now result in NA:

          -
          # how it works in base R:
          +
          # how it works in base R:
           x <- factor("A")
           x[1] <- "B"
           #> Warning message:
          @@ -624,7 +634,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
           
          • Function bug_drug_combinations() to quickly get a data.frame with the results of all bug-drug combinations in a data set. The column containing microorganism codes is guessed automatically and its input is transformed with mo_shortname() at default:

            -
            x <- bug_drug_combinations(example_isolates)
            +
            x <- bug_drug_combinations(example_isolates)
             #> NOTE: Using column `mo` as input for `col_mo`.
             x[1:4, ]
             #>             mo  ab S I R total
            @@ -645,11 +655,11 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
             #> 4 Gram-negative AMX 227  0 405   632
             #> NOTE: Use 'format()' on this result to get a publicable/printable format.

            You can format this to a printable format, ready for reporting or exporting to e.g. Excel with the base R format() function:

            -
            format(x, combine_IR = FALSE)
            +
            format(x, combine_IR = FALSE)
          • Additional way to calculate co-resistance, i.e. when using multiple antimicrobials as input for portion_* functions or count_* functions. This can be used to determine the empiric susceptibility of a combination therapy. A new parameter only_all_tested (which defaults to FALSE) replaces the old also_single_tested and can be used to select one of the two methods to count isolates and calculate portions. The difference can be seen in this example table (which is also on the portion and count help pages), where the %SI is being determined:

            -
            # --------------------------------------------------------------------
            +
            # --------------------------------------------------------------------
             #                     only_all_tested = FALSE  only_all_tested = TRUE
             #                     -----------------------  -----------------------
             #  Drug A    Drug B   include as  include as   include as  include as
            @@ -669,7 +679,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
             
          • tibble printing support for classes rsi, mic, disk, ab mo. When using tibbles containing antimicrobial columns, values S will print in green, values I will print in yellow and values R will print in red. Microbial IDs (class mo) will emphasise on the genus and species, not on the kingdom.

            -
            # (run this on your own console, as this page does not support colour printing)
            +
            # (run this on your own console, as this page does not support colour printing)
             library(dplyr)
             example_isolates %>%
               select(mo:AMC) %>%
            @@ -750,7 +760,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
             
            • Function rsi_df() to transform a data.frame to a data set containing only the microbial interpretation (S, I, R), the antibiotic, the percentage of S/I/R and the number of available isolates. This is a convenient combination of the existing functions count_df() and portion_df() to immediately show resistance percentages and number of available isolates:

              -
              septic_patients %>%
              +
              septic_patients %>%
                 select(AMX, CIP) %>%
                 rsi_df()
               #      antibiotic  interpretation      value  isolates
              @@ -775,7 +785,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
               
            • UPEC (Uropathogenic E. coli)

            All these lead to the microbial ID of E. coli:

            -
            as.mo("UPEC")
            +
            as.mo("UPEC")
             # B_ESCHR_COL
             mo_name("UPEC")
             # "Escherichia coli"
            @@ -882,7 +892,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
             
          • when all values are unique it now shows a message instead of a warning

          • support for boxplots:

            -
            septic_patients %>%
            +
            septic_patients %>%
               freq(age) %>%
               boxplot()
             # grouped boxplots:
            @@ -973,7 +983,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
             
          • New filters for antimicrobial classes. Use these functions to filter isolates on results in one of more antibiotics from a specific class:

            -
            filter_aminoglycosides()
            +
            filter_aminoglycosides()
             filter_carbapenems()
             filter_cephalosporins()
             filter_1st_cephalosporins()
            @@ -985,14 +995,14 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
             filter_macrolides()
             filter_tetracyclines()

            The antibiotics data set will be searched, after which the input data will be checked for column names with a value in any abbreviations, codes or official names found in the antibiotics data set. For example:

            -
            septic_patients %>% filter_glycopeptides(result = "R")
            +
            septic_patients %>% filter_glycopeptides(result = "R")
             # Filtering on glycopeptide antibacterials: any of `vanc` or `teic` is R
             septic_patients %>% filter_glycopeptides(result = "R", scope = "all")
             # Filtering on glycopeptide antibacterials: all of `vanc` and `teic` is R
          • All ab_* functions are deprecated and replaced by atc_* functions:

            -
            ab_property -> atc_property()
            +
            ab_property -> atc_property()
             ab_name -> atc_name()
             ab_official -> atc_official()
             ab_trivial_nl -> atc_trivial_nl()
            @@ -1011,17 +1021,17 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
             
          • New function age_groups() to split ages into custom or predefined groups (like children or elderly). This allows for easier demographic antimicrobial resistance analysis per age group.

          • New function ggplot_rsi_predict() as well as the base R plot() function can now be used for resistance prediction calculated with resistance_predict():

            -
            x <- resistance_predict(septic_patients, col_ab = "amox")
            +
            x <- resistance_predict(septic_patients, col_ab = "amox")
             plot(x)
             ggplot_rsi_predict(x)
          • Functions filter_first_isolate() and filter_first_weighted_isolate() to shorten and fasten filtering on data sets with antimicrobial results, e.g.:

            -
            septic_patients %>% filter_first_isolate(...)
            +
            septic_patients %>% filter_first_isolate(...)
             # or
             filter_first_isolate(septic_patients, ...)

            is equal to:

            -
            septic_patients %>%
            +
            septic_patients %>%
               mutate(only_firsts = first_isolate(septic_patients, ...)) %>%
               filter(only_firsts == TRUE) %>%
               select(-only_firsts)
            @@ -1052,7 +1062,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
            • Now handles incorrect spelling, like i instead of y and f instead of ph:

              -
              # mo_fullname() uses as.mo() internally
              +
              # mo_fullname() uses as.mo() internally
               
               mo_fullname("Sthafilokockus aaureuz")
               #> [1] "Staphylococcus aureus"
              @@ -1062,7 +1072,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
               
            • Uncertainty of the algorithm is now divided into four levels, 0 to 3, where the default allow_uncertain = TRUE is equal to uncertainty level 2. Run ?as.mo for more info about these levels.

              -
              # equal:
              +
              # equal:
               as.mo(..., allow_uncertain = TRUE)
               as.mo(..., allow_uncertain = 2)
               
              @@ -1075,7 +1085,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
               
            • All microbial IDs that found are now saved to a local file ~/.Rhistory_mo. Use the new function clean_mo_history() to delete this file, which resets the algorithms.

            • Incoercible results will now be considered ‘unknown’, MO code UNKNOWN. On foreign systems, properties of these will be translated to all languages already previously supported: German, Dutch, French, Italian, Spanish and Portuguese:

              -
              mo_genus("qwerty", language = "es")
              +
              mo_genus("qwerty", language = "es")
               # Warning: 
               # one unique value (^= 100.0%) could not be coerced and is considered 'unknown': "qwerty". Use mo_failures() to review it.
               #> [1] "(género desconocido)"
              @@ -1123,7 +1133,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
              • Support for tidyverse quasiquotation! Now you can create frequency tables of function outcomes:

                -
                # Determine genus of microorganisms (mo) in `septic_patients` data set:
                +
                # Determine genus of microorganisms (mo) in `septic_patients` data set:
                 # OLD WAY
                 septic_patients %>%
                   mutate(genus = mo_genus(mo)) %>%
                @@ -1206,7 +1216,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
                 
              • Fewer than 3 characters as input for as.mo will return NA

              • Function as.mo (and all mo_* wrappers) now supports genus abbreviations with “species” attached

                -
                as.mo("E. species")        # B_ESCHR
                +
                as.mo("E. species")        # B_ESCHR
                 mo_fullname("E. spp.")     # "Escherichia species"
                 as.mo("S. spp")            # B_STPHY
                 mo_fullname("S. species")  # "Staphylococcus species"
                @@ -1221,13 +1231,13 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
                • Support for grouping variables, test with:

                  -
                  septic_patients %>%
                  +
                  septic_patients %>%
                     group_by(hospital_id) %>%
                     freq(gender)
                • Support for (un)selecting columns:

                  -
                  septic_patients %>%
                  +
                  septic_patients %>%
                     freq(hospital_id) %>%
                     select(-count, -cum_count) # only get item, percent, cum_percent
                • @@ -1305,7 +1315,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/

                They also come with support for German, Dutch, French, Italian, Spanish and Portuguese:

                -
                mo_gramstain("E. coli")
                +
                mo_gramstain("E. coli")
                 # [1] "Gram negative"
                 mo_gramstain("E. coli", language = "de") # German
                 # [1] "Gramnegativ"
                @@ -1314,7 +1324,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
                 mo_fullname("S. group A", language = "pt") # Portuguese
                 # [1] "Streptococcus grupo A"

                Furthermore, former taxonomic names will give a note about the current taxonomic name:

                -
                mo_gramstain("Esc blattae")
                +
                mo_gramstain("Esc blattae")
                 # Note: 'Escherichia blattae' (Burgess et al., 1973) was renamed 'Shimwellia blattae' (Priest and Barker, 2010)
                 # [1] "Gram negative"
              • @@ -1327,14 +1337,14 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
              • Function is.rsi.eligible to check for columns that have valid antimicrobial results, but do not have the rsi class yet. Transform the columns of your raw data with: data %>% mutate_if(is.rsi.eligible, as.rsi)

              • Functions as.mo and is.mo as replacements for as.bactid and is.bactid (since the microoganisms data set not only contains bacteria). These last two functions are deprecated and will be removed in a future release. The as.mo function determines microbial IDs using intelligent rules:

                -
                as.mo("E. coli")
                +
                as.mo("E. coli")
                 # [1] B_ESCHR_COL
                 as.mo("MRSA")
                 # [1] B_STPHY_AUR
                 as.mo("S group A")
                 # [1] B_STRPTC_GRA

                And with great speed too - on a quite regular Linux server from 2007 it takes us less than 0.02 seconds to transform 25,000 items:

                -
                thousands_of_E_colis <- rep("E. coli", 25000)
                +
                thousands_of_E_colis <- rep("E. coli", 25000)
                 microbenchmark::microbenchmark(as.mo(thousands_of_E_colis), unit = "s")
                 # Unit: seconds
                 #         min       median         max  neval
                @@ -1366,7 +1376,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
                 
              • Added three antimicrobial agents to the antibiotics data set: Terbinafine (D01BA02), Rifaximin (A07AA11) and Isoconazole (D01AC05)

              • Added 163 trade names to the antibiotics data set, it now contains 298 different trade names in total, e.g.:

                -
                ab_official("Bactroban")
                +
                ab_official("Bactroban")
                 # [1] "Mupirocin"
                 ab_name(c("Bactroban", "Amoxil", "Zithromax", "Floxapen"))
                 # [1] "Mupirocin" "Amoxicillin" "Azithromycin" "Flucloxacillin"
                @@ -1381,7 +1391,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
                 
              • Added parameters minimum and as_percent to portion_df

              • Support for quasiquotation in the functions series count_* and portions_*, and n_rsi. This allows to check for more than 2 vectors or columns.

                -
                septic_patients %>% select(amox, cipr) %>% count_IR()
                +
                septic_patients %>% select(amox, cipr) %>% count_IR()
                 # which is the same as:
                 septic_patients %>% count_IR(amox, cipr)
                 
                @@ -1399,10 +1409,10 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
                 
              • Added longest en shortest character length in the frequency table (freq) header of class character

              • Support for types (classes) list and matrix for freq

                -
                my_matrix = with(septic_patients, matrix(c(age, gender), ncol = 2))
                +
                my_matrix = with(septic_patients, matrix(c(age, gender), ncol = 2))
                 freq(my_matrix)

                For lists, subsetting is possible:

                -
                my_list = list(age = septic_patients$age, gender = septic_patients$gender)
                +
                my_list = list(age = septic_patients$age, gender = septic_patients$gender)
                 my_list %>% freq(age)
                 my_list %>% freq(gender)
              • diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 31d91fe1..bce17854 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -10,7 +10,7 @@ articles: WHONET: WHONET.html benchmarks: benchmarks.html resistance_predict: resistance_predict.html -last_built: 2020-06-22T11:18Z +last_built: 2020-06-25T15:34Z urls: reference: https://msberends.gitlab.io/AMR/reference article: https://msberends.gitlab.io/AMR/articles diff --git a/docs/reference/ab_from_text.html b/docs/reference/ab_from_text.html new file mode 100644 index 00000000..6cac6cf6 --- /dev/null +++ b/docs/reference/ab_from_text.html @@ -0,0 +1,305 @@ + + + + + + + + +Retrieve antimicrobial drugs from text — ab_from_text • AMR (for R) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                +
                + + + + +
                + +
                +
                + + +
                +

                Use this function on e.g. clinical texts from health care records. It returns a vector of antimicrobial drugs found in the texts.

                +
                + +
                ab_from_text(text, collapse = NULL, translate_ab = "name", ...)
                + +

                Arguments

                + + + + + + + + + + + + + + + + + + +
                text

                text to analyse

                collapse

                character to pass on to paste(..., collapse = ...) to only return one character per element of text, see Examples

                translate_ab

                a column name of the antibiotics data set to translate the antibiotic abbreviations to, using ab_property(). Defaults to "name", which is equal to using TRUE. Use a value FALSE, NULL or NA to prevent translation of the <ab> code.

                ...

                parameters passed on to as.ab()

                + +

                Details

                + +

                To use this for creating a new variable in a data set (e.g. with mutate()), it could be convenient to paste the outcome together with the collapse parameter so every value in your new variable will be a character of length 1:
                +df %>% mutate(abx = ab_from_text(clinical_text, collapse = "|"))

                +

                This function is also internally used by as.ab(), although it then only returns the first hit.

                + +

                Examples

                +
                # mind the bad spelling of amoxicillin in this line, 
                +# straight from a true health care record:
                +ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds")
                +
                +ab_from_text("administered amoxi/clav and cipro")
                +ab_from_text("administered amoxi/clav and cipro", collapse = ", ")
                +
                +# if you want to know which antibiotic groups were administered, check it:
                +abx <- ab_from_text("administered amoxi/clav and cipro")
                +ab_group(abx)
                +
                + +
                + + + +
                + + + + + + + + diff --git a/docs/reference/as.ab.html b/docs/reference/as.ab.html index 566ceae5..31309376 100644 --- a/docs/reference/as.ab.html +++ b/docs/reference/as.ab.html @@ -82,7 +82,7 @@ AMR (for R) - 1.2.0 + 1.2.0.9014
                @@ -288,7 +288,12 @@ This package contains all ~550 antibiotic, antimycotic and antiviral dru

                On our website https://msberends.gitlab.io/AMR you can find a comprehensive tutorial about how to conduct AMR analysis, the complete documentation of all functions (which reads a lot easier than here in R) and an example analysis using WHONET data.

                See also

                -

                antibiotics for the dataframe that is being used to determine ATCs.

                +
                +
                  +
                • antibiotics for the dataframe that is being used to determine ATCs

                • +
                • ab_from_text() for a function to retrieve antimicrobial drugs from clinical text (from health care records)

                • +
                +

                Examples

                # these examples all return "ERY", the ID of erythromycin:
                diff --git a/docs/reference/as.rsi.html b/docs/reference/as.rsi.html
                index 18b4879e..87c8ae26 100644
                --- a/docs/reference/as.rsi.html
                +++ b/docs/reference/as.rsi.html
                @@ -82,7 +82,7 @@
                       
                       
                         AMR (for R)
                -        1.2.0.9013
                +        1.2.0.9014
                       
                     
                diff --git a/docs/reference/count.html b/docs/reference/count.html index c2ddf447..b077baca 100644 --- a/docs/reference/count.html +++ b/docs/reference/count.html @@ -83,7 +83,7 @@ count_resistant() should be used to count resistant isolates, count_susceptible( AMR (for R) - 1.2.0 + 1.2.0.9014
              • @@ -280,7 +280,7 @@ count_resistant() should be used to count resistant isolates, count_susceptible( translate_ab -

                a column name of the antibiotics data set to translate the antibiotic abbreviations to, using ab_property()

                +

                a column name of the antibiotics data set to translate the antibiotic abbreviations to, using ab_property(). Use a value

                language @@ -400,7 +400,7 @@ A microorganism is categorised as Susceptible, Increased exposure when S = count_S(CIP), n1 = count_all(CIP), # the actual total; sum of all three n2 = n_rsi(CIP), # same - analogous to n_distinct - total = n()) # NOT the number of tested isolates! + total = n()) # NOT the number of tested isolates! # Count co-resistance between amoxicillin/clav acid and gentamicin, # so we can see that combination therapy does a lot more than mono therapy. diff --git a/docs/reference/ggplot_rsi.html b/docs/reference/ggplot_rsi.html index e060340d..20599a41 100644 --- a/docs/reference/ggplot_rsi.html +++ b/docs/reference/ggplot_rsi.html @@ -82,7 +82,7 @@ AMR (for R) - 1.2.0 + 1.2.0.9014
                @@ -326,7 +326,7 @@ translate_ab -

                a column name of the antibiotics data set to translate the antibiotic abbreviations to, using ab_property()

                +

                a column name of the antibiotics data set to translate the antibiotic abbreviations to, using ab_property(). Use a value

                combine_SI diff --git a/docs/reference/index.html b/docs/reference/index.html index fb04aa9c..6f6480ea 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.2.0.9013 + 1.2.0.9014
              • diff --git a/docs/reference/mo_property.html b/docs/reference/mo_property.html index 53d40444..7e9d3b80 100644 --- a/docs/reference/mo_property.html +++ b/docs/reference/mo_property.html @@ -82,7 +82,7 @@ AMR (for R) - 1.2.0.9011 + 1.2.0.9014
                diff --git a/docs/reference/proportion.html b/docs/reference/proportion.html index 9bd826a5..79f5d734 100644 --- a/docs/reference/proportion.html +++ b/docs/reference/proportion.html @@ -83,7 +83,7 @@ resistance() should be used to calculate resistance, susceptibility() should be AMR (for R) - 1.2.0.9011 + 1.2.0.9014
                @@ -296,7 +296,7 @@ resistance() should be used to calculate resistance, susceptibility() should be translate_ab -

                a column name of the antibiotics data set to translate the antibiotic abbreviations to, using ab_property()

                +

                a column name of the antibiotics data set to translate the antibiotic abbreviations to, using ab_property(). Use a value

                language diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 320f21f0..5dd3ef71 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -15,6 +15,9 @@ https://msberends.gitlab.io/AMR/reference/WHONET.html + + https://msberends.gitlab.io/AMR/reference/ab_from_text.html + https://msberends.gitlab.io/AMR/reference/ab_property.html diff --git a/man/ab_from_text.Rd b/man/ab_from_text.Rd new file mode 100644 index 00000000..be1bdcb8 --- /dev/null +++ b/man/ab_from_text.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ab_from_text.R +\name{ab_from_text} +\alias{ab_from_text} +\title{Retrieve antimicrobial drugs from text} +\usage{ +ab_from_text(text, collapse = NULL, translate_ab = "name", ...) +} +\arguments{ +\item{text}{text to analyse} + +\item{collapse}{character to pass on to \code{paste(..., collapse = ...)} to only return one character per element of \code{text}, see Examples} + +\item{translate_ab}{a column name of the \link{antibiotics} data set to translate the antibiotic abbreviations to, using \code{\link[=ab_property]{ab_property()}}. Defaults to "name", which is equal to using \code{TRUE}. Use a value \code{FALSE}, \code{NULL} or \code{NA} to prevent translation of the \verb{} code.} + +\item{...}{parameters passed on to \code{\link[=as.ab]{as.ab()}}} +} +\description{ +Use this function on e.g. clinical texts from health care records. It returns a vector of antimicrobial drugs found in the texts. +} +\details{ +To use this for creating a new variable in a data set (e.g. with \code{mutate()}), it could be convenient to paste the outcome together with the \code{collapse} parameter so every value in your new variable will be a character of length 1:\cr +\code{df \%>\% mutate(abx = ab_from_text(clinical_text, collapse = "|"))} + +This function is also internally used by \code{\link[=as.ab]{as.ab()}}, although it then only returns the first hit. +} +\examples{ +# mind the bad spelling of amoxicillin in this line, +# straight from a true health care record: +ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds") + +ab_from_text("administered amoxi/clav and cipro") +ab_from_text("administered amoxi/clav and cipro", collapse = ", ") + +# if you want to know which antibiotic groups were administered, check it: +abx <- ab_from_text("administered amoxi/clav and cipro") +ab_group(abx) +} diff --git a/man/as.ab.Rd b/man/as.ab.Rd index fc3d097a..e2a9d9d3 100644 --- a/man/as.ab.Rd +++ b/man/as.ab.Rd @@ -83,5 +83,8 @@ ab_name("J01FA01") # "Erythromycin" ab_name("eryt") # "Erythromycin" } \seealso{ -\link{antibiotics} for the dataframe that is being used to determine ATCs. +\itemize{ +\item \link{antibiotics} for the dataframe that is being used to determine ATCs +\item \code{\link[=ab_from_text]{ab_from_text()}} for a function to retrieve antimicrobial drugs from clinical text (from health care records) +} } diff --git a/man/count.Rd b/man/count.Rd index 74e7a636..e772a1c7 100644 --- a/man/count.Rd +++ b/man/count.Rd @@ -47,7 +47,7 @@ count_df( \item{data}{a \code{\link{data.frame}} containing columns with class \code{\link{rsi}} (see \code{\link[=as.rsi]{as.rsi()}})} -\item{translate_ab}{a column name of the \link{antibiotics} data set to translate the antibiotic abbreviations to, using \code{\link[=ab_property]{ab_property()}}} +\item{translate_ab}{a column name of the \link{antibiotics} data set to translate the antibiotic abbreviations to, using \code{\link[=ab_property]{ab_property()}}. Use a value} \item{language}{language of the returned text, defaults to system language (see \code{\link[=get_locale]{get_locale()}}) and can also be set with \code{getOption("AMR_locale")}. Use \code{language = NULL} or \code{language = ""} to prevent translation.} diff --git a/man/ggplot_rsi.Rd b/man/ggplot_rsi.Rd index 392861c2..2116a72f 100644 --- a/man/ggplot_rsi.Rd +++ b/man/ggplot_rsi.Rd @@ -83,7 +83,7 @@ labels_rsi_count( \item{limits}{numeric vector of length two providing limits of the scale, use \code{NA} to refer to the existing minimum or maximum} -\item{translate_ab}{a column name of the \link{antibiotics} data set to translate the antibiotic abbreviations to, using \code{\link[=ab_property]{ab_property()}}} +\item{translate_ab}{a column name of the \link{antibiotics} data set to translate the antibiotic abbreviations to, using \code{\link[=ab_property]{ab_property()}}. Use a value} \item{combine_SI}{a logical to indicate whether all values of S and I must be merged into one, so the output only consists of S+I vs. R (susceptible vs. resistant). This used to be the parameter \code{combine_IR}, but this now follows the redefinition by EUCAST about the interpretion of I (increased exposure) in 2019, see section 'Interpretation of S, I and R' below. Default is \code{TRUE}.} diff --git a/man/proportion.Rd b/man/proportion.Rd index bd7be656..22ea1a55 100644 --- a/man/proportion.Rd +++ b/man/proportion.Rd @@ -62,7 +62,7 @@ rsi_df( \item{data}{a \code{\link{data.frame}} containing columns with class \code{\link{rsi}} (see \code{\link[=as.rsi]{as.rsi()}})} -\item{translate_ab}{a column name of the \link{antibiotics} data set to translate the antibiotic abbreviations to, using \code{\link[=ab_property]{ab_property()}}} +\item{translate_ab}{a column name of the \link{antibiotics} data set to translate the antibiotic abbreviations to, using \code{\link[=ab_property]{ab_property()}}. Use a value} \item{language}{language of the returned text, defaults to system language (see \code{\link[=get_locale]{get_locale()}}) and can also be set with \code{getOption("AMR_locale")}. Use \code{language = NULL} or \code{language = ""} to prevent translation.} diff --git a/tests/testthat/test-ab_from_text.R b/tests/testthat/test-ab_from_text.R new file mode 100644 index 00000000..ce9be46e --- /dev/null +++ b/tests/testthat/test-ab_from_text.R @@ -0,0 +1,32 @@ +# ==================================================================== # +# TITLE # +# Antimicrobial Resistance (AMR) Analysis # +# # +# SOURCE # +# https://gitlab.com/msberends/AMR # +# # +# LICENCE # +# (c) 2018-2020 Berends MS, Luz CF et al. # +# # +# This R package is free software; you can freely use and distribute # +# it for both personal and commercial purposes under the terms of the # +# GNU General Public License version 2.0 (GNU GPL-2), as published by # +# the Free Software Foundation. # +# # +# We created this package for both routine data analysis and academic # +# research and it was publicly released in the hope that it will be # +# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. # +# Visit our website for more info: https://msberends.gitlab.io/AMR. # +# ==================================================================== # + +context("ab_from_text.R") + +test_that("ab_from_text works", { + + expect_identical(ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds"), + "Amoxicillin") + expect_identical(ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", translate_ab = FALSE), + as.ab("AMX")) + expect_identical(ab_from_text("administered amoxi/clav and cipro", collapse = ", "), + "Amoxicillin, Ciprofloxacin") +})