2020-06-25 17:34:50 +02:00
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# SOURCE #
# https://gitlab.com/msberends/AMR #
# #
# LICENCE #
# (c) 2018-2020 Berends MS, Luz CF et al. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
# Visit our website for more info: https://msberends.gitlab.io/AMR. #
# ==================================================================== #
2020-06-26 12:31:27 +02:00
#' Retrieve antimicrobial drugs from clinical text
2020-06-25 17:34:50 +02:00
#'
2020-06-26 12:31:27 +02:00
#' Use this function on e.g. clinical texts from health care records. It returns a [list] with all antimicrobial drugs found in the texts.
2020-06-25 17:34:50 +02:00
#' @param text text to analyse
#' @param collapse character to pass on to `paste(..., collapse = ...)` to only return one character per element of `text`, see Examples
2020-06-26 12:31:27 +02:00
#' @param translate_ab a column name of the [antibiotics] data set to translate the antibiotic abbreviations to, using [ab_property()]. Defaults to `FALSE`. Using `TRUE` is equal to using "name".
2020-06-25 17:34:50 +02:00
#' @param ... parameters passed on to [as.ab()]
2020-06-26 12:31:27 +02:00
#' @details Without using `collapse`, this function will return a [list]. This can be convenient to use e.g. inside a `mutate()`):\cr
#' `df %>% mutate(abx = ab_from_text(clinical_text))`
#'
#' The returned AB codes can be transformed to official names, groups, etc. with all [ab_property()] functions like [ab_name()] and [ab_group()], or by using the `translate_ab` parameter.
#'
#' With using `collapse`, this function will return a [character]:\cr
2020-06-25 17:34:50 +02:00
#' `df %>% mutate(abx = ab_from_text(clinical_text, collapse = "|"))`
#'
2020-06-26 12:31:27 +02:00
#' This function is also internally used by [as.ab()], although it then only returns the first hit and will throw a note if more results could have been returned.
2020-06-25 19:20:50 +02:00
#' @export
2020-06-26 12:31:27 +02:00
#' @return A [list], or a [character] if `collapse` is not `NULL`
2020-06-25 17:34:50 +02:00
#' @examples
#' # mind the bad spelling of amoxicillin in this line,
#' # straight from a true health care record:
#' ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds")
#'
#' ab_from_text("administered amoxi/clav and cipro")
#' ab_from_text("administered amoxi/clav and cipro", collapse = ", ")
#'
#' # if you want to know which antibiotic groups were administered, check it:
#' abx <- ab_from_text("administered amoxi/clav and cipro")
2020-06-26 12:31:27 +02:00
#' ab_group(abx[[1]])
#'
#' if (require(dplyr)) {
#' tibble(clinical_text = c("given cipro and mero",
#' "started on doxy today")) %>%
#' mutate(abx = ab_from_text(clinical_text),
#' abx2 = ab_from_text(clinical_text,
#' collapse = "|"),
#' abx3 = ab_from_text(clinical_text,
#' collapse = "|",
#' translate_ab = "name"))
#'
#' }
ab_from_text <- function ( text , collapse = NULL , translate_ab = FALSE , ... ) {
2020-06-25 17:34:50 +02:00
2020-06-26 12:31:27 +02:00
text <- tolower ( as.character ( text ) )
translate_ab <- get_translate_ab ( translate_ab )
2020-06-25 17:34:50 +02:00
abbr <- unlist ( antibiotics $ abbreviations )
abbr <- abbr [nchar ( abbr ) >= 4 ]
names <- substr ( antibiotics $ name , 1 , 5 )
synonyms <- unlist ( antibiotics $ synonyms )
synonyms <- synonyms [nchar ( synonyms ) >= 4 ]
to_regex <- function ( x ) {
paste0 ( " ^(" ,
paste0 ( unique ( gsub ( " [^a-z0-9]" , " .*" , sort ( tolower ( x ) ) ) ) , collapse = " |" ) ,
" ).*" )
}
2020-06-26 12:31:27 +02:00
text_split_all <- strsplit ( text , " [ ;.,:/\\|-]" )
result <- lapply ( text_split_all , function ( text_split ) {
suppressWarnings (
out <- as.ab ( unique ( c ( text_split [grep ( to_regex ( abbr ) , text_split ) ] ,
text_split [grep ( to_regex ( names ) , text_split ) ] ,
# regular expression must not be too long, so split synonyms in two:
text_split [grep ( to_regex ( synonyms [c ( 1 : 0.5 * length ( synonyms ) ) ] ) , text_split ) ] ,
text_split [grep ( to_regex ( synonyms [c ( 0.5 * length ( synonyms ) : length ( synonyms ) ) ] ) , text_split ) ] ) ) ,
... ) )
out <- out [ ! is.na ( out ) ]
if ( length ( out ) == 0 ) {
as.ab ( NA )
} else {
if ( ! isFALSE ( translate_ab ) ) {
out <- ab_property ( out , property = translate_ab , initial = FALSE )
}
out
}
} )
2020-06-25 17:34:50 +02:00
if ( ! is.null ( collapse ) ) {
2020-06-26 12:31:27 +02:00
result <- sapply ( result , function ( x ) paste0 ( x , collapse = collapse ) )
2020-06-25 17:34:50 +02:00
}
2020-06-26 12:31:27 +02:00
2020-06-25 17:34:50 +02:00
result
}