diff --git a/DESCRIPTION b/DESCRIPTION index bb38c078..f67ad6ce 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: AMR -Version: 1.2.0.9016 +Version: 1.2.0.9017 Date: 2020-06-26 Title: Antimicrobial Resistance Analysis Authors@R: c( diff --git a/NEWS.md b/NEWS.md index 3094fc49..ece152fd 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,11 +1,18 @@ -# AMR 1.2.0.9016 +# AMR 1.2.0.9017 ## Last updated: 26-Jun-2020 ### New * Function `ab_from_text()` to retrieve antimicrobial drugs from clinical texts in e.g. health care records, which also corrects for misspelling since it uses `as.ab()` internally: ```r - ab_from_text("28/03/2020 regular amoxiciliin 500mg po tds") - #> [1] "Amoxicillin" + ab_from_text(c("28/03/2020 regular amoxiciliin 500mg po tds", + "15/04/2020 started on ciprofloxi-thingy and tobra today")) + #> [[1]] + #> Class + #> [1] AMX + #> + #> [[2]] + #> Class + #> [1] CIP TOB ``` * [Tidyverse selections](https://tidyselect.r-lib.org/reference/language.html) for antibiotic classes, that help to select the columns of antibiotics that are of a specific antibiotic class, without the need to define the columns or antibiotic abbreviations. They can be used in any function that allows Tidyverse selections, like `dplyr::select()` and `tidyr::pivot_longer()`: ```r @@ -29,6 +36,7 @@ * Added function `filter_penicillins()` to filter isolates on a specific result in any column with a name in the antimicrobial 'penicillins' class (more specific: ATC subgroup *Beta-lactam antibacterials, penicillins*) * Added official antimicrobial names to all `filter_ab_class()` functions, such as `filter_aminoglycosides()` * Added antibiotics code "FOX1" for cefoxitin screening (abbreviation "cfsc") to the `antibiotics` data set +* Added Monuril as trade name for fosfomycin ### Changed * Using unexisting columns in all `count_*()`, `proportion_*()`, `susceptibility()` and `resistance()` functions wil now return an error instead of dropping them silently @@ -40,8 +48,8 @@ * Fixed a bug in `bug_drug_combinations()` for when only one antibiotic was in the input data * Changed the summary for class ``, to highlight the %SI vs. %R * Improved error handling, giving more useful info when functions return an error -* Algorithm improvements to `as.ab()`, many more misspellings are now translatable -* Added Monuril as trade name for fosfomycin +* Algorithm improvements to `as.ab()`, many more misspellings are now translatable. The `as.ab()` function will now throw a note if more than 1 antimicrobial drug could be retrieved from a single input value. +* Added progress bar to `as.ab()` # AMR 1.2.0 diff --git a/R/ab.R b/R/ab.R index c1efbcc0..e36c2a0e 100755 --- a/R/ab.R +++ b/R/ab.R @@ -24,6 +24,7 @@ #' Use this function to determine the antibiotic code of one or more antibiotics. The data set [antibiotics] will be searched for abbreviations, official names and synonyms (brand names). #' @inheritSection lifecycle Maturing lifecycle #' @param x character vector to determine to antibiotic ID +#' @param flag_multiple_results logical to indicate whether a note should be printed to the console that probably more than one antibiotic code or name can be retrieved from a single input value. #' @param ... arguments passed on to internal functions #' @rdname as.ab #' @inheritSection WHOCC WHOCC @@ -67,7 +68,7 @@ #' # they use as.ab() internally: #' ab_name("J01FA01") # "Erythromycin" #' ab_name("eryt") # "Erythromycin" -as.ab <- function(x, ...) { +as.ab <- function(x, flag_multiple_results = TRUE, ...) { check_dataset_integrity() @@ -75,7 +76,7 @@ as.ab <- function(x, ...) { return(x) } - initial <- is.null(list(...)$initial) + initial_search <- is.null(list(...)$initial_search) already_regex <- isTRUE(list(...)$already_regex) if (all(toupper(x) %in% antibiotics$ab)) { @@ -114,7 +115,24 @@ as.ab <- function(x, ...) { x_new <- rep(NA_character_, length(x)) x_unknown <- character(0) + note_if_more_than_one_found <- function(found, index, from_text) { + if (initial_search == TRUE & isTRUE(length(from_text) > 1)) { + message(font_blue(paste0("NOTE: more than one result was found for item ", index, ": ", + paste0(ab_name(from_text, tolower = TRUE, initial_search = FALSE), collapse = ", ")))) + } + found[1L] + } + + if (initial_search == TRUE) { + progress <- progress_estimated(n = length(x), n_min = 25) # start if n >= 25 + on.exit(close(progress)) + } + for (i in seq_len(length(x))) { + if (initial_search == TRUE) { + progress$tick() + } + if (is.na(x[i]) | is.null(x[i])) { next } @@ -127,31 +145,37 @@ as.ab <- function(x, ...) { next } + if (isTRUE(flag_multiple_results) & x[i] %like% "[ ]") { + from_text <- suppressWarnings(ab_from_text(x[i], initial_search = FALSE, translate_ab = FALSE)) + } else { + from_text <- character(0) + } + # exact AB code found <- antibiotics[which(antibiotics$ab == x[i]), ]$ab if (length(found) > 0) { - x_new[i] <- found[1L] + x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } # exact ATC code found <- antibiotics[which(antibiotics$atc == x[i]), ]$ab if (length(found) > 0) { - x_new[i] <- found[1L] + x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } # exact CID code found <- antibiotics[which(antibiotics$cid == x[i]), ]$ab if (length(found) > 0) { - x_new[i] <- found[1L] + x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } # exact name found <- antibiotics[which(toupper(antibiotics$name) == x[i]), ]$ab if (length(found) > 0) { - x_new[i] <- found[1L] + x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } @@ -160,7 +184,7 @@ as.ab <- function(x, ...) { function(s) x[i] %in% s)) found <- antibiotics$ab[loinc_found == TRUE] if (length(found) > 0) { - x_new[i] <- found[1L] + x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } @@ -169,7 +193,7 @@ as.ab <- function(x, ...) { function(s) x[i] %in% toupper(s))) found <- antibiotics$ab[synonym_found == TRUE] if (length(found) > 0) { - x_new[i] <- found[1L] + x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } @@ -178,7 +202,7 @@ as.ab <- function(x, ...) { function(a) x[i] %in% toupper(a))) found <- antibiotics$ab[abbr_found == TRUE] if (length(found) > 0) { - x_new[i] <- found[1L] + x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } @@ -213,13 +237,13 @@ as.ab <- function(x, ...) { # try if name starts with it found <- antibiotics[which(antibiotics$name %like% paste0("^", x_spelling)), ]$ab if (length(found) > 0) { - x_new[i] <- found[1L] + x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } # try if name ends with it found <- antibiotics[which(antibiotics$name %like% paste0(x_spelling, "$")), ]$ab if (nchar(x[i]) >= 4 & length(found) > 0) { - x_new[i] <- found[1L] + x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } # and try if any synonym starts with it @@ -227,29 +251,29 @@ as.ab <- function(x, ...) { function(s) any(s %like% paste0("^", x_spelling)))) found <- antibiotics$ab[synonym_found == TRUE] if (length(found) > 0) { - x_new[i] <- found[1L] + x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } - # INITIAL - More uncertain results ---- + # INITIAL SEARCH - More uncertain results ---- - if (initial == TRUE) { + if (initial_search == TRUE) { # only run on first try # try by removing all spaces if (x[i] %like% " ") { - found <- suppressWarnings(as.ab(gsub(" +", "", x[i]), initial = FALSE)) + found <- suppressWarnings(as.ab(gsub(" +", "", x[i]), initial_search = FALSE)) if (length(found) > 0 & !is.na(found)) { - x_new[i] <- found[1L] + x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } } # try by removing all spaces and numbers if (x[i] %like% " " | x[i] %like% "[0-9]") { - found <- suppressWarnings(as.ab(gsub("[ 0-9]", "", x[i]), initial = FALSE)) + found <- suppressWarnings(as.ab(gsub("[ 0-9]", "", x[i]), initial_search = FALSE)) if (length(found) > 0 & !is.na(found)) { - x_new[i] <- found[1L] + x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } } @@ -266,7 +290,7 @@ as.ab <- function(x, ...) { y })[[1]], collapse = "/") - x_translated_guess <- suppressWarnings(as.ab(x_translated, initial = FALSE)) + x_translated_guess <- suppressWarnings(as.ab(x_translated, initial_search = FALSE)) if (!is.na(x_translated_guess)) { x_new[i] <- x_translated_guess next @@ -276,7 +300,7 @@ as.ab <- function(x, ...) { x_translated <- paste(lapply(strsplit(x_translated, "[^A-Z0-9 ]"), function(y) { for (i in seq_len(length(y))) { - y_name <- suppressWarnings(ab_name(y[i], language = NULL, initial = FALSE)) + y_name <- suppressWarnings(ab_name(y[i], language = NULL, initial_search = FALSE)) y[i] <- ifelse(!is.na(y_name), y_name, y[i]) @@ -284,7 +308,7 @@ as.ab <- function(x, ...) { y })[[1]], collapse = "/") - x_translated_guess <- suppressWarnings(as.ab(x_translated, initial = FALSE)) + x_translated_guess <- suppressWarnings(as.ab(x_translated, initial_search = FALSE)) if (!is.na(x_translated_guess)) { x_new[i] <- x_translated_guess next @@ -292,60 +316,65 @@ as.ab <- function(x, ...) { # try by removing all trailing capitals if (x[i] %like_case% "[a-z]+[A-Z]+$") { - found <- suppressWarnings(as.ab(gsub("[A-Z]+$", "", x[i]), initial = FALSE)) + found <- suppressWarnings(as.ab(gsub("[A-Z]+$", "", x[i]), initial_search = FALSE)) if (!is.na(found)) { - x_new[i] <- found[1L] + x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } } # keep only letters - found <- suppressWarnings(as.ab(gsub("[^A-Z]", "", x[i]), initial = FALSE)) + found <- suppressWarnings(as.ab(gsub("[^A-Z]", "", x[i]), initial_search = FALSE)) if (!is.na(found)) { - x_new[i] <- found[1L] + x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } # try from a bigger text, like from a health care record, see ?ab_from_text - found <- suppressWarnings(ab_from_text(x[i], initial = FALSE, translate_ab = FALSE)[1L]) + # already calculated above if flag_multiple_results = TRUE + if (isTRUE(flag_multiple_results)) { + found <- from_text[1L] + } else { + found <- suppressWarnings(ab_from_text(x[i], initial_search = FALSE, translate_ab = FALSE)[1L]) + } if (!is.na(found)) { - x_new[i] <- found[1L] + x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } # first 5 except for cephalosporins, then first 7 (those cephalosporins all start quite the same!) - found <- suppressWarnings(as.ab(substr(x[i], 1, 5), initial = FALSE)) - if (!is.na(found) && !ab_group(found, initial = FALSE) %like% "cephalosporins") { - x_new[i] <- found[1L] + found <- suppressWarnings(as.ab(substr(x[i], 1, 5), initial_search = FALSE)) + if (!is.na(found) && !ab_group(found, initial_search = FALSE) %like% "cephalosporins") { + x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } - found <- suppressWarnings(as.ab(substr(x[i], 1, 7), initial = FALSE)) + found <- suppressWarnings(as.ab(substr(x[i], 1, 7), initial_search = FALSE)) if (!is.na(found)) { - x_new[i] <- found[1L] + x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } # make all consonants facultative search_str <- gsub("([BCDFGHJKLMNPQRSTVWXZ])", "\\1*", x[i]) - found <- suppressWarnings(as.ab(search_str, initial = FALSE, already_regex = TRUE)) + found <- suppressWarnings(as.ab(search_str, initial_search = FALSE, already_regex = TRUE)) # keep at least 4 normal characters if (nchar(gsub(".\\*", "", search_str)) < 4) { found <- NA } if (!is.na(found)) { - x_new[i] <- found[1L] + x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } # make all vowels facultative search_str <- gsub("([AEIOUY])", "\\1*", x[i]) - found <- suppressWarnings(as.ab(search_str, initial = FALSE, already_regex = TRUE)) + found <- suppressWarnings(as.ab(search_str, initial_search = FALSE, already_regex = TRUE)) # keep at least 5 normal characters if (nchar(gsub(".\\*", "", search_str)) < 5) { found <- NA } if (!is.na(found)) { - x_new[i] <- found[1L] + x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } @@ -355,17 +384,21 @@ as.ab <- function(x, ...) { x_spelling <- gsub("I+", "[AEIOU]+", x_spelling, fixed = TRUE) x_spelling <- gsub("O+", "[AEIOU]+", x_spelling, fixed = TRUE) x_spelling <- gsub("U+", "[AEIOU]+", x_spelling, fixed = TRUE) - found <- suppressWarnings(as.ab(x_spelling, initial = FALSE, already_regex = TRUE)) + found <- suppressWarnings(as.ab(x_spelling, initial_search = FALSE, already_regex = TRUE)) if (!is.na(found)) { - x_new[i] <- found[1L] + x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } - } # end of initial = TRUE + } # end of initial_search = TRUE # not found x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1]) } + + if (initial_search == TRUE) { + close(progress) + } # take failed ATC codes apart from rest x_unknown_ATCs <- x_unknown[x_unknown %like% "[A-Z][0-9][0-9][A-Z][A-Z][0-9][0-9]"] diff --git a/R/ab_from_text.R b/R/ab_from_text.R index 5d6d7275..3c8cf1d8 100644 --- a/R/ab_from_text.R +++ b/R/ab_from_text.R @@ -19,18 +19,24 @@ # Visit our website for more info: https://msberends.gitlab.io/AMR. # # ==================================================================== # -#' Retrieve antimicrobial drugs from text +#' Retrieve antimicrobial drugs from clinical text #' -#' Use this function on e.g. clinical texts from health care records. It returns a vector of antimicrobial drugs found in the texts. +#' Use this function on e.g. clinical texts from health care records. It returns a [list] with all antimicrobial drugs found in the texts. #' @param text text to analyse #' @param collapse character to pass on to `paste(..., collapse = ...)` to only return one character per element of `text`, see Examples -#' @param translate_ab a column name of the [antibiotics] data set to translate the antibiotic abbreviations to, using [ab_property()]. Defaults to "name", which is equal to using `TRUE`. Use a value `FALSE`, `NULL` or `NA` to prevent translation of the `` code. +#' @param translate_ab a column name of the [antibiotics] data set to translate the antibiotic abbreviations to, using [ab_property()]. Defaults to `FALSE`. Using `TRUE` is equal to using "name". #' @param ... parameters passed on to [as.ab()] -#' @details To use this for creating a new variable in a data set (e.g. with `mutate()`), it could be convenient to paste the outcome together with the `collapse` parameter so every value in your new variable will be a character of length 1:\cr +#' @details Without using `collapse`, this function will return a [list]. This can be convenient to use e.g. inside a `mutate()`):\cr +#' `df %>% mutate(abx = ab_from_text(clinical_text))` +#' +#' The returned AB codes can be transformed to official names, groups, etc. with all [ab_property()] functions like [ab_name()] and [ab_group()], or by using the `translate_ab` parameter. +#' +#' With using `collapse`, this function will return a [character]:\cr #' `df %>% mutate(abx = ab_from_text(clinical_text, collapse = "|"))` #' -#' This function is also internally used by [as.ab()], although it then only returns the first hit. +#' This function is also internally used by [as.ab()], although it then only returns the first hit and will throw a note if more results could have been returned. #' @export +#' @return A [list], or a [character] if `collapse` is not `NULL` #' @examples #' # mind the bad spelling of amoxicillin in this line, #' # straight from a true health care record: @@ -41,10 +47,23 @@ #' #' # if you want to know which antibiotic groups were administered, check it: #' abx <- ab_from_text("administered amoxi/clav and cipro") -#' ab_group(abx) -ab_from_text <- function(text, collapse = NULL, translate_ab = "name", ...) { +#' ab_group(abx[[1]]) +#' +#' if (require(dplyr)) { +#' tibble(clinical_text = c("given cipro and mero", +#' "started on doxy today")) %>% +#' mutate(abx = ab_from_text(clinical_text), +#' abx2 = ab_from_text(clinical_text, +#' collapse = "|"), +#' abx3 = ab_from_text(clinical_text, +#' collapse = "|", +#' translate_ab = "name")) +#' +#' } +ab_from_text <- function(text, collapse = NULL, translate_ab = FALSE, ...) { - text <- tolower(text) + text <- tolower(as.character(text)) + translate_ab <- get_translate_ab(translate_ab) abbr <- unlist(antibiotics$abbreviations) abbr <- abbr[nchar(abbr) >= 4] @@ -57,24 +76,29 @@ ab_from_text <- function(text, collapse = NULL, translate_ab = "name", ...) { ").*") } - text_split <- unlist(strsplit(text, "[ ;.,:/\\|-]")) - result <- suppressWarnings( - as.ab(unique(c(text_split[grep(to_regex(abbr), text_split)], - text_split[grep(to_regex(names), text_split)], - # regular expression must not be too long, so split synonyms in two: - text_split[grep(to_regex(synonyms[c(1:0.5 * length(synonyms))]), text_split)], - text_split[grep(to_regex(synonyms[c(0.5 * length(synonyms):length(synonyms))]), text_split)])), - ...)) - result <- result[!is.na(result)] - if (length(result) == 0) { - result <- as.ab(NA) - } - translate_ab <- get_translate_ab(translate_ab) - if (!isFALSE(translate_ab)) { - result <- ab_property(result, property = translate_ab) - } + text_split_all <- strsplit(text, "[ ;.,:/\\|-]") + result <- lapply(text_split_all, function(text_split) { + suppressWarnings( + out <- as.ab(unique(c(text_split[grep(to_regex(abbr), text_split)], + text_split[grep(to_regex(names), text_split)], + # regular expression must not be too long, so split synonyms in two: + text_split[grep(to_regex(synonyms[c(1:0.5 * length(synonyms))]), text_split)], + text_split[grep(to_regex(synonyms[c(0.5 * length(synonyms):length(synonyms))]), text_split)])), + ...)) + out <- out[!is.na(out)] + if (length(out) == 0) { + as.ab(NA) + } else { + if (!isFALSE(translate_ab)) { + out <- ab_property(out, property = translate_ab, initial = FALSE) + } + out + } + }) + if (!is.null(collapse)) { - result <- paste0(result, collapse = collapse) + result <- sapply(result, function(x) paste0(x, collapse = collapse)) } + result } diff --git a/R/first_isolate.R b/R/first_isolate.R index 9c2d3fa6..15351f26 100755 --- a/R/first_isolate.R +++ b/R/first_isolate.R @@ -291,7 +291,8 @@ first_isolate <- function(x, # did find some isolates - add new index numbers of rows x$newvar_row_index_sorted <- seq_len(nrow(x)) - scope.size <- row.end - row.start + 1 + scope.size <- nrow(x[which(x$newvar_row_index_sorted %in% c(row.start + 1:row.end) & + !is.na(x$newvar_mo)), , drop = FALSE]) identify_new_year <- function(x, episode_days) { # I asked on StackOverflow: @@ -390,7 +391,7 @@ first_isolate <- function(x, # handle empty microorganisms if (any(x$newvar_mo == "UNKNOWN", na.rm = TRUE) & info == TRUE) { message(font_blue(paste0("NOTE: ", ifelse(include_unknown == TRUE, "Included ", "Excluded "), - format(sum(x$newvar_mo == "UNKNOWN"), + format(sum(x$newvar_mo == "UNKNOWN", na.rm = TRUE), decimal.mark = decimal.mark, big.mark = big.mark), " isolates with a microbial ID 'UNKNOWN' (column `", font_bold(col_mo), "`)"))) } @@ -398,7 +399,7 @@ first_isolate <- function(x, # exclude all NAs if (any(is.na(x$newvar_mo)) & info == TRUE) { - message(font_blue(paste0("NOTE: Excluded ", format(sum(is.na(x$newvar_mo)), + message(font_blue(paste0("NOTE: Excluded ", format(sum(is.na(x$newvar_mo), na.rm = TRUE), decimal.mark = decimal.mark, big.mark = big.mark), " isolates with a microbial ID 'NA' (column `", font_bold(col_mo), "`)"))) } @@ -410,18 +411,18 @@ first_isolate <- function(x, if (info == TRUE) { n_found <- base::sum(x$newvar_first_isolate, na.rm = TRUE) - p_found_total <- percentage(n_found / nrow(x)) + p_found_total <- percentage(n_found / nrow(x[which(!is.na(x$newvar_mo)), , drop = FALSE])) p_found_scope <- percentage(n_found / scope.size) # mark up number of found n_found <- base::format(n_found, big.mark = big.mark, decimal.mark = decimal.mark) if (p_found_total != p_found_scope) { msg_txt <- paste0("=> Found ", font_bold(paste0(n_found, " first ", weighted.notice, "isolates")), - " (", p_found_scope, " within scope and ", p_found_total, " of total)") + " (", p_found_scope, " within scope and ", p_found_total, " of total where a microbial ID was available)") } else { msg_txt <- paste0("=> Found ", font_bold(paste0(n_found, " first ", weighted.notice, "isolates")), - " (", p_found_total, " of total)") + " (", p_found_total, " of total where a microbial ID was available)") } message(font_black(msg_txt)) } diff --git a/docs/404.html b/docs/404.html index eca69945..ebc09655 100644 --- a/docs/404.html +++ b/docs/404.html @@ -81,7 +81,7 @@ AMR (for R) - 1.2.0.9016 + 1.2.0.9017 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index ad80d18f..b8368c71 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -81,7 +81,7 @@ AMR (for R) - 1.2.0.9016 + 1.2.0.9017 diff --git a/docs/articles/index.html b/docs/articles/index.html index 346ec7c4..053998cc 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.2.0.9016 + 1.2.0.9017 diff --git a/docs/authors.html b/docs/authors.html index b59291f2..c3400891 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -81,7 +81,7 @@ AMR (for R) - 1.2.0.9016 + 1.2.0.9017 diff --git a/docs/index.html b/docs/index.html index 87b69ddb..bf82eace 100644 --- a/docs/index.html +++ b/docs/index.html @@ -43,7 +43,7 @@ AMR (for R) - 1.2.0.9016 + 1.2.0.9017 diff --git a/docs/news/index.html b/docs/news/index.html index 452737c1..aea5f8ad 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.2.0.9016 + 1.2.0.9017 @@ -229,9 +229,9 @@ Source: NEWS.md -
-

-AMR 1.2.0.9016 Unreleased +
+

+AMR 1.2.0.9017 Unreleased

@@ -243,8 +243,15 @@
  • Function ab_from_text() to retrieve antimicrobial drugs from clinical texts in e.g. health care records, which also corrects for misspelling since it uses as.ab() internally:

    -
    ab_from_text("28/03/2020 regular amoxiciliin 500mg po tds")
    -#> [1] "Amoxicillin"
    +
    ab_from_text(c("28/03/2020 regular amoxiciliin 500mg po tds",
    +               "15/04/2020 started on ciprofloxi-thingy and tobra today"))
    +#> [[1]]
    +#> Class <ab>
    +#> [1] AMX
    +#> 
    +#> [[2]]
    +#> Class <ab>
    +#> [1] CIP TOB
  • Tidyverse selections for antibiotic classes, that help to select the columns of antibiotics that are of a specific antibiotic class, without the need to define the columns or antibiotic abbreviations. They can be used in any function that allows Tidyverse selections, like dplyr::select() and tidyr::pivot_longer():

    @@ -268,6 +275,7 @@
  • Added function filter_penicillins() to filter isolates on a specific result in any column with a name in the antimicrobial ‘penicillins’ class (more specific: ATC subgroup Beta-lactam antibacterials, penicillins)

  • Added official antimicrobial names to all filter_ab_class() functions, such as filter_aminoglycosides()

  • Added antibiotics code “FOX1” for cefoxitin screening (abbreviation “cfsc”) to the antibiotics data set

  • +
  • Added Monuril as trade name for fosfomycin

@@ -284,8 +292,9 @@
  • Fixed a bug in bug_drug_combinations() for when only one antibiotic was in the input data
  • Changed the summary for class <mo>, to highlight the %SI vs. %R
  • Improved error handling, giving more useful info when functions return an error
  • -
  • Algorithm improvements to as.ab(), many more misspellings are now translatable
  • -
  • Added Monuril as trade name for fosfomycin
  • +
  • Algorithm improvements to as.ab(), many more misspellings are now translatable. The as.ab() function will now throw a note if more than 1 antimicrobial drug could be retrieved from a single input value.
  • +
  • Added progress bar to as.ab() +
  • diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 05ec846d..8109437b 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -10,7 +10,7 @@ articles: WHONET: WHONET.html benchmarks: benchmarks.html resistance_predict: resistance_predict.html -last_built: 2020-06-26T08:20Z +last_built: 2020-06-26T10:31Z urls: reference: https://msberends.gitlab.io/AMR/reference article: https://msberends.gitlab.io/AMR/articles diff --git a/docs/reference/ab_from_text.html b/docs/reference/ab_from_text.html index 32d59125..3a61777d 100644 --- a/docs/reference/ab_from_text.html +++ b/docs/reference/ab_from_text.html @@ -6,7 +6,7 @@ -Retrieve antimicrobial drugs from text — ab_from_text • AMR (for R) +Retrieve antimicrobial drugs from clinical text — ab_from_text • AMR (for R) @@ -48,8 +48,8 @@ - - + + @@ -82,7 +82,7 @@ AMR (for R) - 1.2.0.9016 + 1.2.0.9017

    @@ -226,16 +226,16 @@
    -

    Use this function on e.g. clinical texts from health care records. It returns a vector of antimicrobial drugs found in the texts.

    +

    Use this function on e.g. clinical texts from health care records. It returns a list with all antimicrobial drugs found in the texts.

    -
    ab_from_text(text, collapse = NULL, translate_ab = "name", ...)
    +
    ab_from_text(text, collapse = NULL, translate_ab = FALSE, ...)

    Arguments

    @@ -250,7 +250,7 @@ - + @@ -258,11 +258,17 @@
    translate_ab

    a column name of the antibiotics data set to translate the antibiotic abbreviations to, using ab_property(). Defaults to "name", which is equal to using TRUE. Use a value FALSE, NULL or NA to prevent translation of the <ab> code.

    a column name of the antibiotics data set to translate the antibiotic abbreviations to, using ab_property(). Defaults to FALSE. Using TRUE is equal to using "name".

    ...
    +

    Value

    + +

    A list, or a character if collapse is not NULL

    Details

    -

    To use this for creating a new variable in a data set (e.g. with mutate()), it could be convenient to paste the outcome together with the collapse parameter so every value in your new variable will be a character of length 1:
    +

    Without using collapse, this function will return a list. This can be convenient to use e.g. inside a mutate()):
    +df %>% mutate(abx = ab_from_text(clinical_text))

    +

    The returned AB codes can be transformed to official names, groups, etc. with all ab_property() functions like ab_name() and ab_group(), or by using the translate_ab parameter.

    +

    With using collapse, this function will return a character:
    df %>% mutate(abx = ab_from_text(clinical_text, collapse = "|"))

    -

    This function is also internally used by as.ab(), although it then only returns the first hit.

    +

    This function is also internally used by as.ab(), although it then only returns the first hit and will throw a note if more results could have been returned.

    Examples

    # mind the bad spelling of amoxicillin in this line, 
    @@ -274,7 +280,19 @@
     
     # if you want to know which antibiotic groups were administered, check it:
     abx <- ab_from_text("administered amoxi/clav and cipro")
    -ab_group(abx)
    +ab_group(abx[[1]]) + +if (require(dplyr)) { + tibble(clinical_text = c("given cipro and mero", + "started on doxy today")) %>% + mutate(abx = ab_from_text(clinical_text), + abx2 = ab_from_text(clinical_text, + collapse = "|"), + abx3 = ab_from_text(clinical_text, + collapse = "|", + translate_ab = "name")) + +}
    @@ -235,7 +235,7 @@

    Use this function to determine the antibiotic code of one or more antibiotics. The data set antibiotics will be searched for abbreviations, official names and synonyms (brand names).

    -
    as.ab(x, ...)
    +    
    as.ab(x, flag_multiple_results = TRUE, ...)
     
     is.ab(x)
    @@ -246,6 +246,10 @@ x

    character vector to determine to antibiotic ID

    + + flag_multiple_results +

    logical to indicate whether a note should be printed to the console that probably more than one antibiotic code or name can be retrieved from a single input value.

    + ...

    arguments passed on to internal functions

    diff --git a/docs/reference/index.html b/docs/reference/index.html index e1d2603d..6fed9854 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.2.0.9016 + 1.2.0.9017 @@ -289,7 +289,7 @@

    ab_from_text()

    -

    Retrieve antimicrobial drugs from text

    +

    Retrieve antimicrobial drugs from clinical text

    diff --git a/man/ab_from_text.Rd b/man/ab_from_text.Rd index be1bdcb8..78ae6120 100644 --- a/man/ab_from_text.Rd +++ b/man/ab_from_text.Rd @@ -2,27 +2,35 @@ % Please edit documentation in R/ab_from_text.R \name{ab_from_text} \alias{ab_from_text} -\title{Retrieve antimicrobial drugs from text} +\title{Retrieve antimicrobial drugs from clinical text} \usage{ -ab_from_text(text, collapse = NULL, translate_ab = "name", ...) +ab_from_text(text, collapse = NULL, translate_ab = FALSE, ...) } \arguments{ \item{text}{text to analyse} \item{collapse}{character to pass on to \code{paste(..., collapse = ...)} to only return one character per element of \code{text}, see Examples} -\item{translate_ab}{a column name of the \link{antibiotics} data set to translate the antibiotic abbreviations to, using \code{\link[=ab_property]{ab_property()}}. Defaults to "name", which is equal to using \code{TRUE}. Use a value \code{FALSE}, \code{NULL} or \code{NA} to prevent translation of the \verb{} code.} +\item{translate_ab}{a column name of the \link{antibiotics} data set to translate the antibiotic abbreviations to, using \code{\link[=ab_property]{ab_property()}}. Defaults to \code{FALSE}. Using \code{TRUE} is equal to using "name".} \item{...}{parameters passed on to \code{\link[=as.ab]{as.ab()}}} } +\value{ +A \link{list}, or a \link{character} if \code{collapse} is not \code{NULL} +} \description{ -Use this function on e.g. clinical texts from health care records. It returns a vector of antimicrobial drugs found in the texts. +Use this function on e.g. clinical texts from health care records. It returns a \link{list} with all antimicrobial drugs found in the texts. } \details{ -To use this for creating a new variable in a data set (e.g. with \code{mutate()}), it could be convenient to paste the outcome together with the \code{collapse} parameter so every value in your new variable will be a character of length 1:\cr +Without using \code{collapse}, this function will return a \link{list}. This can be convenient to use e.g. inside a \code{mutate()}):\cr +\code{df \%>\% mutate(abx = ab_from_text(clinical_text))} + +The returned AB codes can be transformed to official names, groups, etc. with all \code{\link[=ab_property]{ab_property()}} functions like \code{\link[=ab_name]{ab_name()}} and \code{\link[=ab_group]{ab_group()}}, or by using the \code{translate_ab} parameter. + +With using \code{collapse}, this function will return a \link{character}:\cr \code{df \%>\% mutate(abx = ab_from_text(clinical_text, collapse = "|"))} -This function is also internally used by \code{\link[=as.ab]{as.ab()}}, although it then only returns the first hit. +This function is also internally used by \code{\link[=as.ab]{as.ab()}}, although it then only returns the first hit and will throw a note if more results could have been returned. } \examples{ # mind the bad spelling of amoxicillin in this line, @@ -34,5 +42,17 @@ ab_from_text("administered amoxi/clav and cipro", collapse = ", ") # if you want to know which antibiotic groups were administered, check it: abx <- ab_from_text("administered amoxi/clav and cipro") -ab_group(abx) +ab_group(abx[[1]]) + +if (require(dplyr)) { + tibble(clinical_text = c("given cipro and mero", + "started on doxy today")) \%>\% + mutate(abx = ab_from_text(clinical_text), + abx2 = ab_from_text(clinical_text, + collapse = "|"), + abx3 = ab_from_text(clinical_text, + collapse = "|", + translate_ab = "name")) + +} } diff --git a/man/as.ab.Rd b/man/as.ab.Rd index e2a9d9d3..6f464bb9 100644 --- a/man/as.ab.Rd +++ b/man/as.ab.Rd @@ -6,13 +6,15 @@ \alias{is.ab} \title{Transform to antibiotic ID} \usage{ -as.ab(x, ...) +as.ab(x, flag_multiple_results = TRUE, ...) is.ab(x) } \arguments{ \item{x}{character vector to determine to antibiotic ID} +\item{flag_multiple_results}{logical to indicate whether a note should be printed to the console that probably more than one antibiotic code or name can be retrieved from a single input value.} + \item{...}{arguments passed on to internal functions} } \value{ diff --git a/tests/testthat/test-ab.R b/tests/testthat/test-ab.R index 14b9bb09..05000130 100755 --- a/tests/testthat/test-ab.R +++ b/tests/testthat/test-ab.R @@ -54,6 +54,8 @@ test_that("as.ab works", { expect_equal(as.character(as.ab("Amoxy + clavulaanzuur")), "AMC") + + expect_message(as.ab("cipro mero")) # assigning and subsetting x <- antibiotics$ab diff --git a/tests/testthat/test-ab_from_text.R b/tests/testthat/test-ab_from_text.R index ce9be46e..a5a52548 100644 --- a/tests/testthat/test-ab_from_text.R +++ b/tests/testthat/test-ab_from_text.R @@ -23,10 +23,10 @@ context("ab_from_text.R") test_that("ab_from_text works", { - expect_identical(ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds"), + expect_identical(ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds")[[1]], + as.ab("Amoxicillin")) + expect_identical(ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", translate_ab = TRUE)[[1]], "Amoxicillin") - expect_identical(ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", translate_ab = FALSE), - as.ab("AMX")) - expect_identical(ab_from_text("administered amoxi/clav and cipro", collapse = ", "), - "Amoxicillin, Ciprofloxacin") + expect_identical(ab_from_text("administered amoxi/clav and cipro", collapse = ", ")[[1]], + "AMX, CIP") })