(v1.2.0.9017) ab_from_text() improvement

2025-07-10 05:41:59 +02:00 · 2020-06-26 12:31:27 +02:00
parent b31003c0b6
commit 4f6f056077
19 changed files with 239 additions and 118 deletions
--- a/R/ab_from_text.R
+++ b/R/ab_from_text.R
@ -19,18 +19,24 @@
 # Visit our website for more info: https://msberends.gitlab.io/AMR.    #
 # ==================================================================== #

-#' Retrieve antimicrobial drugs from text
+#' Retrieve antimicrobial drugs from clinical text
 #' 
-#' Use this function on e.g. clinical texts from health care records. It returns a vector of antimicrobial drugs found in the texts.
+#' Use this function on e.g. clinical texts from health care records. It returns a [list] with all antimicrobial drugs found in the texts.
 #' @param text text to analyse
 #' @param collapse character to pass on to `paste(..., collapse = ...)` to only return one character per element of `text`, see Examples
-#' @param translate_ab a column name of the [antibiotics] data set to translate the antibiotic abbreviations to, using [ab_property()]. Defaults to "name", which is equal to using `TRUE`. Use a value `FALSE`, `NULL` or `NA` to prevent translation of the `<ab>` code.
+#' @param translate_ab a column name of the [antibiotics] data set to translate the antibiotic abbreviations to, using [ab_property()]. Defaults to `FALSE`. Using `TRUE` is equal to using "name".
 #' @param ... parameters passed on to [as.ab()]
-#' @details To use this for creating a new variable in a data set (e.g. with `mutate()`), it could be convenient to paste the outcome together with the `collapse` parameter so every value in your new variable will be a character of length 1:\cr
+#' @details Without using `collapse`, this function will return a [list]. This can be convenient to use e.g. inside a `mutate()`):\cr
+#' `df %>% mutate(abx = ab_from_text(clinical_text))` 
+#' 
+#' The returned AB codes can be transformed to official names, groups, etc. with all [ab_property()] functions like [ab_name()] and [ab_group()], or by using the `translate_ab` parameter.
+#' 
+#' With using `collapse`, this function will return a [character]:\cr
 #' `df %>% mutate(abx = ab_from_text(clinical_text, collapse = "|"))` 
 #' 
-#' This function is also internally used by [as.ab()], although it then only returns the first hit.
+#' This function is also internally used by [as.ab()], although it then only returns the first hit and will throw a note if more results could have been returned.
 #' @export
+#' @return A [list], or a [character] if `collapse` is not `NULL`
 #' @examples 
 #' # mind the bad spelling of amoxicillin in this line, 
 #' # straight from a true health care record:
@ -41,10 +47,23 @@
 #' 
 #' # if you want to know which antibiotic groups were administered, check it:
 #' abx <- ab_from_text("administered amoxi/clav and cipro")
-#' ab_group(abx)
-ab_from_text <- function(text, collapse = NULL, translate_ab = "name", ...) {
+#' ab_group(abx[[1]])
+#' 
+#' if (require(dplyr)) {
+#'   tibble(clinical_text = c("given cipro and mero",
+#'                            "started on doxy today")) %>% 
+#'     mutate(abx = ab_from_text(clinical_text),
+#'            abx2 = ab_from_text(clinical_text,
+#'                                collapse = "|"),
+#'            abx3 = ab_from_text(clinical_text,
+#'                                collapse = "|",
+#'                                translate_ab = "name"))
+#' 
+#' }
+ab_from_text <- function(text, collapse = NULL, translate_ab = FALSE, ...) {
  
-  text <- tolower(text)
+  text <- tolower(as.character(text))
+  translate_ab <- get_translate_ab(translate_ab)
  
  abbr <- unlist(antibiotics$abbreviations)
  abbr <- abbr[nchar(abbr) >= 4]
@ -57,24 +76,29 @@ ab_from_text <- function(text, collapse = NULL, translate_ab = "name", ...) {
           ").*")
  }
  
-  text_split <- unlist(strsplit(text, "[ ;.,:/\\|-]"))
-  result <- suppressWarnings(
-    as.ab(unique(c(text_split[grep(to_regex(abbr), text_split)],
-                   text_split[grep(to_regex(names), text_split)],
-                   # regular expression must not be too long, so split synonyms in two:
-                   text_split[grep(to_regex(synonyms[c(1:0.5 * length(synonyms))]), text_split)],
-                   text_split[grep(to_regex(synonyms[c(0.5 * length(synonyms):length(synonyms))]), text_split)])),
-          ...))
-  result <- result[!is.na(result)]
-  if (length(result) == 0) {
-    result <- as.ab(NA)
-  }
-  translate_ab <- get_translate_ab(translate_ab)
-  if (!isFALSE(translate_ab)) {
-    result <- ab_property(result, property = translate_ab)
-  }
+  text_split_all <- strsplit(text, "[ ;.,:/\\|-]")
+  result <- lapply(text_split_all, function(text_split) {
+    suppressWarnings(
+      out <- as.ab(unique(c(text_split[grep(to_regex(abbr), text_split)],
+                     text_split[grep(to_regex(names), text_split)],
+                     # regular expression must not be too long, so split synonyms in two:
+                     text_split[grep(to_regex(synonyms[c(1:0.5 * length(synonyms))]), text_split)],
+                     text_split[grep(to_regex(synonyms[c(0.5 * length(synonyms):length(synonyms))]), text_split)])),
+            ...))
+    out <- out[!is.na(out)]
+    if (length(out) == 0) {
+      as.ab(NA)
+    } else {
+      if (!isFALSE(translate_ab)) {
+        out <- ab_property(out, property = translate_ab, initial = FALSE)
+      }
+      out
+    }
+  })
+
  if (!is.null(collapse)) {
-    result <- paste0(result, collapse = collapse)
+    result <- sapply(result, function(x) paste0(x, collapse = collapse))
  }
+
  result
 }