mirror of
https://github.com/msberends/AMR.git
synced 2025-07-09 06:02:01 +02:00
(v1.2.0.9014) ab_from_text()
This commit is contained in:
286
R/ab.R
286
R/ab.R
@ -30,6 +30,7 @@
|
||||
#' @details All entries in the [antibiotics] data set have three different identifiers: a human readable EARS-Net code (column `ab`, used by ECDC and WHONET), an ATC code (column `atc`, used by WHO), and a CID code (column `cid`, Compound ID, used by PubChem). The data set contains more than 5,000 official brand names from many different countries, as found in PubChem.
|
||||
#'
|
||||
#' Use the [ab_property()] functions to get properties based on the returned antibiotic ID, see Examples.
|
||||
#'
|
||||
#' @section Source:
|
||||
#' World Health Organization (WHO) Collaborating Centre for Drug Statistics Methodology: \url{https://www.whocc.no/atc_ddd_index/}
|
||||
#'
|
||||
@ -38,7 +39,9 @@
|
||||
#' European Commission Public Health PHARMACEUTICALS - COMMUNITY REGISTER: \url{http://ec.europa.eu/health/documents/community-register/html/atc.htm}
|
||||
#' @aliases ab
|
||||
#' @return Character (vector) with class [`ab`]. Unknown values will return `NA`.
|
||||
#' @seealso [antibiotics] for the dataframe that is being used to determine ATCs.
|
||||
#' @seealso
|
||||
#' * [antibiotics] for the dataframe that is being used to determine ATCs
|
||||
#' * [ab_from_text()] for a function to retrieve antimicrobial drugs from clinical text (from health care records)
|
||||
#' @inheritSection AMR Read more on our website!
|
||||
#' @export
|
||||
#' @examples
|
||||
@ -72,6 +75,9 @@ as.ab <- function(x, ...) {
|
||||
return(x)
|
||||
}
|
||||
|
||||
initial <- is.null(list(...)$initial)
|
||||
already_regex <- isTRUE(list(...)$already_regex)
|
||||
|
||||
if (all(toupper(x) %in% antibiotics$ab)) {
|
||||
# valid AB code, but not yet right class
|
||||
return(structure(.Data = toupper(x),
|
||||
@ -79,26 +85,30 @@ as.ab <- function(x, ...) {
|
||||
}
|
||||
|
||||
x_bak <- x
|
||||
x <- toupper(x)
|
||||
# remove diacritics
|
||||
x <- iconv(x, from = "UTF-8", to = "ASCII//TRANSLIT")
|
||||
x <- gsub('"', "", x, fixed = TRUE)
|
||||
# remove suffices
|
||||
x_bak_clean <- gsub("_(mic|rsi|dis[ck])$", "", x, ignore.case = TRUE)
|
||||
# remove disk concentrations, like LVX_NM -> LVX
|
||||
x_bak_clean <- gsub("_[A-Z]{2}[0-9_.]{0,3}$", "", x_bak_clean, ignore.case = TRUE)
|
||||
# remove part between brackets if that's followed by another string
|
||||
x_bak_clean <- gsub("(.*)+ [(].*[)]", "\\1", x_bak_clean)
|
||||
# keep only max 1 space
|
||||
x_bak_clean <- trimws(gsub(" +", " ", x_bak_clean, ignore.case = TRUE))
|
||||
# non-character, space or number should be a slash
|
||||
x_bak_clean <- gsub("[^A-Za-z0-9 -]", "/", x_bak_clean)
|
||||
# spaces around non-characters must be removed: amox + clav -> amox/clav
|
||||
x_bak_clean <- gsub("(.*[a-zA-Z0-9]) ([^a-zA-Z0-9].*)", "\\1\\2", x_bak_clean)
|
||||
x_bak_clean <- gsub("(.*[^a-zA-Z0-9]) ([a-zA-Z0-9].*)", "\\1\\2", x_bak_clean)
|
||||
# remove hyphen after a starting "co"
|
||||
x_bak_clean <- gsub("^co-", "co", x_bak_clean, ignore.case = TRUE)
|
||||
# replace text 'and' with a slash
|
||||
x_bak_clean <- gsub(" and ", "/", x_bak_clean, ignore.case = TRUE)
|
||||
x_bak_clean <- x
|
||||
if (already_regex == FALSE) {
|
||||
# remove suffices
|
||||
x_bak_clean <- gsub("_(MIC|RSI|DIS[CK])$", "", x_bak_clean)
|
||||
# remove disk concentrations, like LVX_NM -> LVX
|
||||
x_bak_clean <- gsub("_[A-Z]{2}[0-9_.]{0,3}$", "", x_bak_clean)
|
||||
# remove part between brackets if that's followed by another string
|
||||
x_bak_clean <- gsub("(.*)+ [(].*[)]", "\\1", x_bak_clean)
|
||||
# keep only max 1 space
|
||||
x_bak_clean <- trimws(gsub(" +", " ", x_bak_clean))
|
||||
# non-character, space or number should be a slash
|
||||
x_bak_clean <- gsub("[^A-Z0-9 -]", "/", x_bak_clean)
|
||||
# spaces around non-characters must be removed: amox + clav -> amox/clav
|
||||
x_bak_clean <- gsub("(.*[A-Z0-9]) ([^A-Z0-9].*)", "\\1\\2", x_bak_clean)
|
||||
x_bak_clean <- gsub("(.*[^A-Z0-9]) ([A-Z0-9].*)", "\\1\\2", x_bak_clean)
|
||||
# remove hyphen after a starting "co"
|
||||
x_bak_clean <- gsub("^CO-", "CO", x_bak_clean)
|
||||
# replace text 'and' with a slash
|
||||
x_bak_clean <- gsub(" AND ", "/", x_bak_clean)
|
||||
}
|
||||
|
||||
x <- unique(x_bak_clean)
|
||||
x_new <- rep(NA_character_, length(x))
|
||||
@ -118,14 +128,14 @@ as.ab <- function(x, ...) {
|
||||
}
|
||||
|
||||
# exact AB code
|
||||
found <- antibiotics[which(antibiotics$ab == toupper(x[i])), ]$ab
|
||||
found <- antibiotics[which(antibiotics$ab == x[i]), ]$ab
|
||||
if (length(found) > 0) {
|
||||
x_new[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
|
||||
# exact ATC code
|
||||
found <- antibiotics[which(antibiotics$atc == toupper(x[i])), ]$ab
|
||||
found <- antibiotics[which(antibiotics$atc == x[i]), ]$ab
|
||||
if (length(found) > 0) {
|
||||
x_new[i] <- found[1L]
|
||||
next
|
||||
@ -139,7 +149,7 @@ as.ab <- function(x, ...) {
|
||||
}
|
||||
|
||||
# exact name
|
||||
found <- antibiotics[which(toupper(antibiotics$name) == toupper(x[i])), ]$ab
|
||||
found <- antibiotics[which(toupper(antibiotics$name) == x[i]), ]$ab
|
||||
if (length(found) > 0) {
|
||||
x_new[i] <- found[1L]
|
||||
next
|
||||
@ -147,11 +157,7 @@ as.ab <- function(x, ...) {
|
||||
|
||||
# exact LOINC code
|
||||
loinc_found <- unlist(lapply(antibiotics$loinc,
|
||||
function(s) if (x[i] %in% s) {
|
||||
TRUE
|
||||
} else {
|
||||
FALSE
|
||||
}))
|
||||
function(s) x[i] %in% s))
|
||||
found <- antibiotics$ab[loinc_found == TRUE]
|
||||
if (length(found) > 0) {
|
||||
x_new[i] <- found[1L]
|
||||
@ -160,11 +166,7 @@ as.ab <- function(x, ...) {
|
||||
|
||||
# exact synonym
|
||||
synonym_found <- unlist(lapply(antibiotics$synonyms,
|
||||
function(s) if (toupper(x[i]) %in% toupper(s)) {
|
||||
TRUE
|
||||
} else {
|
||||
FALSE
|
||||
}))
|
||||
function(s) x[i] %in% toupper(s)))
|
||||
found <- antibiotics$ab[synonym_found == TRUE]
|
||||
if (length(found) > 0) {
|
||||
x_new[i] <- found[1L]
|
||||
@ -173,90 +175,87 @@ as.ab <- function(x, ...) {
|
||||
|
||||
# exact abbreviation
|
||||
abbr_found <- unlist(lapply(antibiotics$abbreviations,
|
||||
function(a) if (toupper(x[i]) %in% toupper(a)) {
|
||||
TRUE
|
||||
} else {
|
||||
FALSE
|
||||
}))
|
||||
function(a) x[i] %in% toupper(a)))
|
||||
found <- antibiotics$ab[abbr_found == TRUE]
|
||||
if (length(found) > 0) {
|
||||
x_new[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
|
||||
# first >=4 characters of name
|
||||
if (nchar(x[i]) >= 4) {
|
||||
found <- antibiotics[which(toupper(antibiotics$name) %like% paste0("^", x[i])), ]$ab
|
||||
if (length(found) > 0) {
|
||||
x_new[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
}
|
||||
|
||||
# allow characters that resemble others, but only continue when having more than 3 characters
|
||||
if (nchar(x[i]) <= 3) {
|
||||
x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1])
|
||||
next
|
||||
}
|
||||
x_spelling <- tolower(x[i])
|
||||
x_spelling <- gsub("[iy]+", "[iy]+", x_spelling)
|
||||
x_spelling <- gsub("(c|k|q|qu|s|z|x|ks)+", "(c|k|q|qu|s|z|x|ks)+", x_spelling)
|
||||
x_spelling <- gsub("(ph|f|v)+", "(ph|f|v)+", x_spelling)
|
||||
x_spelling <- gsub("(th|t)+", "(th|t)+", x_spelling)
|
||||
x_spelling <- gsub("a+", "a+", x_spelling)
|
||||
x_spelling <- gsub("e+", "e+", x_spelling)
|
||||
x_spelling <- gsub("o+", "o+", x_spelling)
|
||||
# allow any ending of -in/-ine and -im/-ime
|
||||
x_spelling <- gsub("(\\[iy\\]\\+(n|m)|\\[iy\\]\\+(n|m)e\\+)$", "[iy]+(n|m)e*", x_spelling)
|
||||
# allow any ending of -ol/-ole
|
||||
x_spelling <- gsub("(o\\+l|o\\+le\\+)$", "o+le*", x_spelling)
|
||||
# allow any ending of -on/-one
|
||||
x_spelling <- gsub("(o\\+n|o\\+ne\\+)$", "o+ne*", x_spelling)
|
||||
# replace multiple same characters to single one with '+', like "ll" -> "l+"
|
||||
x_spelling <- gsub("(.)\\1+", "\\1+", x_spelling)
|
||||
# replace spaces and slashes with a possibility on both
|
||||
x_spelling <- gsub("[ /]", "( .*|.*/)", x_spelling)
|
||||
|
||||
x_spelling <- x[i]
|
||||
if (already_regex == FALSE) {
|
||||
x_spelling <- gsub("[IY]+", "[IY]+", x_spelling)
|
||||
x_spelling <- gsub("(C|K|Q|QU|S|Z|X|KS)+", "(C|K|Q|QU|S|Z|X|KS)+", x_spelling)
|
||||
x_spelling <- gsub("(PH|F|V)+", "(PH|F|V)+", x_spelling)
|
||||
x_spelling <- gsub("(TH|T)+", "(TH|T)+", x_spelling)
|
||||
x_spelling <- gsub("A+", "A+", x_spelling)
|
||||
x_spelling <- gsub("E+", "E+", x_spelling)
|
||||
x_spelling <- gsub("O+", "O+", x_spelling)
|
||||
# allow any ending of -in/-ine and -im/-ime
|
||||
x_spelling <- gsub("(\\[IY\\]\\+(N|M)|\\[IY\\]\\+(N|M)E\\+)$", "[IY]+(N|M)E*", x_spelling)
|
||||
# allow any ending of -ol/-ole
|
||||
x_spelling <- gsub("(O\\+L|O\\+LE\\+)$", "O+LE*", x_spelling)
|
||||
# allow any ending of -on/-one
|
||||
x_spelling <- gsub("(O\\+N|O\\+NE\\+)$", "O+NE*", x_spelling)
|
||||
# replace multiple same characters to single one with '+', like "ll" -> "l+"
|
||||
x_spelling <- gsub("(.)\\1+", "\\1+", x_spelling)
|
||||
# replace spaces and slashes with a possibility on both
|
||||
x_spelling <- gsub("[ /]", "( .*|.*/)", x_spelling)
|
||||
# correct for digital reading text (OCR)
|
||||
x_spelling <- gsub("[NRD]", "[NRD]", x_spelling)
|
||||
}
|
||||
|
||||
# try if name starts with it
|
||||
found <- antibiotics[which(antibiotics$name %like% paste0("^", x_spelling)), ]$ab
|
||||
if (length(found) > 0) {
|
||||
x_new[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
# try if name ends with it
|
||||
found <- antibiotics[which(antibiotics$name %like% paste0(x_spelling, "$")), ]$ab
|
||||
if (nchar(x[i]) >= 4 & length(found) > 0) {
|
||||
x_new[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
# and try if any synonym starts with it
|
||||
synonym_found <- unlist(lapply(antibiotics$synonyms,
|
||||
function(s) if (any(s %like% paste0("^", x_spelling))) {
|
||||
TRUE
|
||||
} else {
|
||||
FALSE
|
||||
}))
|
||||
function(s) any(s %like% paste0("^", x_spelling))))
|
||||
found <- antibiotics$ab[synonym_found == TRUE]
|
||||
if (length(found) > 0) {
|
||||
x_new[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
|
||||
# try by removing all spaces
|
||||
if (x[i] %like% " ") {
|
||||
found <- suppressWarnings(as.ab(gsub(" +", "", x[i])))
|
||||
if (length(found) > 0 & !is.na(found)) {
|
||||
x_new[i] <- found[1L]
|
||||
next
|
||||
|
||||
# INITIAL - More uncertain results ----
|
||||
|
||||
if (initial == TRUE) {
|
||||
# only run on first try
|
||||
|
||||
# try by removing all spaces
|
||||
if (x[i] %like% " ") {
|
||||
found <- suppressWarnings(as.ab(gsub(" +", "", x[i]), initial = FALSE))
|
||||
if (length(found) > 0 & !is.na(found)) {
|
||||
x_new[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# try by removing all spaces and numbers
|
||||
if (x[i] %like% " " | x[i] %like% "[0-9]") {
|
||||
found <- suppressWarnings(as.ab(gsub("[ 0-9]", "", x[i])))
|
||||
if (length(found) > 0 & !is.na(found)) {
|
||||
x_new[i] <- found[1L]
|
||||
next
|
||||
|
||||
# try by removing all spaces and numbers
|
||||
if (x[i] %like% " " | x[i] %like% "[0-9]") {
|
||||
found <- suppressWarnings(as.ab(gsub("[ 0-9]", "", x[i]), initial = FALSE))
|
||||
if (length(found) > 0 & !is.na(found)) {
|
||||
x_new[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!isFALSE(list(...)$initial_search)) {
|
||||
|
||||
# transform back from other languages and try again
|
||||
x_translated <- paste(lapply(strsplit(x[i], "[^a-zA-Z0-9 ]"),
|
||||
x_translated <- paste(lapply(strsplit(x[i], "[^A-Z0-9 ]"),
|
||||
function(y) {
|
||||
for (i in seq_len(length(y))) {
|
||||
y[i] <- ifelse(tolower(y[i]) %in% tolower(translations_file$replacement),
|
||||
@ -267,41 +266,102 @@ as.ab <- function(x, ...) {
|
||||
y
|
||||
})[[1]],
|
||||
collapse = "/")
|
||||
x_translated_guess <- suppressWarnings(as.ab(x_translated, initial_search = FALSE))
|
||||
x_translated_guess <- suppressWarnings(as.ab(x_translated, initial = FALSE))
|
||||
if (!is.na(x_translated_guess)) {
|
||||
x_new[i] <- x_translated_guess
|
||||
next
|
||||
}
|
||||
|
||||
if (!isFALSE(list(...)$initial_search2)) {
|
||||
# now also try to coerce brandname combinations like "Amoxy/clavulanic acid"
|
||||
x_translated <- paste(lapply(strsplit(x_translated, "[^a-zA-Z0-9 ]"),
|
||||
function(y) {
|
||||
for (i in seq_len(length(y))) {
|
||||
y_name <- suppressWarnings(ab_name(y[i], language = NULL, initial_search = FALSE, initial_search2 = FALSE))
|
||||
y[i] <- ifelse(!is.na(y_name),
|
||||
y_name,
|
||||
y[i])
|
||||
}
|
||||
y
|
||||
})[[1]],
|
||||
collapse = "/")
|
||||
x_translated_guess <- suppressWarnings(as.ab(x_translated, initial_search = FALSE))
|
||||
if (!is.na(x_translated_guess)) {
|
||||
x_new[i] <- x_translated_guess
|
||||
# now also try to coerce brandname combinations like "Amoxy/clavulanic acid"
|
||||
x_translated <- paste(lapply(strsplit(x_translated, "[^A-Z0-9 ]"),
|
||||
function(y) {
|
||||
for (i in seq_len(length(y))) {
|
||||
y_name <- suppressWarnings(ab_name(y[i], language = NULL, initial = FALSE))
|
||||
y[i] <- ifelse(!is.na(y_name),
|
||||
y_name,
|
||||
y[i])
|
||||
}
|
||||
y
|
||||
})[[1]],
|
||||
collapse = "/")
|
||||
x_translated_guess <- suppressWarnings(as.ab(x_translated, initial = FALSE))
|
||||
if (!is.na(x_translated_guess)) {
|
||||
x_new[i] <- x_translated_guess
|
||||
next
|
||||
}
|
||||
|
||||
# try by removing all trailing capitals
|
||||
if (x[i] %like_case% "[a-z]+[A-Z]+$") {
|
||||
found <- suppressWarnings(as.ab(gsub("[A-Z]+$", "", x[i]), initial = FALSE))
|
||||
if (!is.na(found)) {
|
||||
x_new[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# try by removing all trailing capitals
|
||||
if (x[i] %like_case% "[a-z]+[A-Z]+$") {
|
||||
found <- suppressWarnings(as.ab(gsub("[A-Z]+$", "", x[i])))
|
||||
if (length(found) > 0 & !is.na(found)) {
|
||||
|
||||
# keep only letters
|
||||
found <- suppressWarnings(as.ab(gsub("[^A-Z]", "", x[i]), initial = FALSE))
|
||||
if (!is.na(found)) {
|
||||
x_new[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
}
|
||||
|
||||
# try from a bigger text, like from a health care record, see ?ab_from_text
|
||||
found <- suppressWarnings(ab_from_text(x[i], initial = FALSE, translate_ab = FALSE)[1L])
|
||||
if (!is.na(found)) {
|
||||
x_new[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
|
||||
# first 5 except for cephalosporins, then first 7 (those cephalosporins all start quite the same!)
|
||||
found <- suppressWarnings(as.ab(substr(x[i], 1, 5), initial = FALSE))
|
||||
if (!is.na(found) && !ab_group(found, initial = FALSE) %like% "cephalosporins") {
|
||||
x_new[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
found <- suppressWarnings(as.ab(substr(x[i], 1, 7), initial = FALSE))
|
||||
if (!is.na(found)) {
|
||||
x_new[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
|
||||
# make all consonants facultative
|
||||
search_str <- gsub("([BCDFGHJKLMNPQRSTVWXZ])", "\\1*", x[i])
|
||||
found <- suppressWarnings(as.ab(search_str, initial = FALSE, already_regex = TRUE))
|
||||
# keep at least 4 normal characters
|
||||
if (nchar(gsub(".\\*", "", search_str)) < 4) {
|
||||
found <- NA
|
||||
}
|
||||
if (!is.na(found)) {
|
||||
x_new[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
|
||||
# make all vowels facultative
|
||||
search_str <- gsub("([AEIOUY])", "\\1*", x[i])
|
||||
found <- suppressWarnings(as.ab(search_str, initial = FALSE, already_regex = TRUE))
|
||||
# keep at least 5 normal characters
|
||||
if (nchar(gsub(".\\*", "", search_str)) < 5) {
|
||||
found <- NA
|
||||
}
|
||||
if (!is.na(found)) {
|
||||
x_new[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
|
||||
# allow misspelling of vowels
|
||||
x_spelling <- gsub("A+", "[AEIOU]+", x_spelling, fixed = TRUE)
|
||||
x_spelling <- gsub("E+", "[AEIOU]+", x_spelling, fixed = TRUE)
|
||||
x_spelling <- gsub("I+", "[AEIOU]+", x_spelling, fixed = TRUE)
|
||||
x_spelling <- gsub("O+", "[AEIOU]+", x_spelling, fixed = TRUE)
|
||||
x_spelling <- gsub("U+", "[AEIOU]+", x_spelling, fixed = TRUE)
|
||||
found <- suppressWarnings(as.ab(x_spelling, initial = FALSE, already_regex = TRUE))
|
||||
if (!is.na(found)) {
|
||||
x_new[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
|
||||
} # end of initial = TRUE
|
||||
|
||||
# not found
|
||||
x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1])
|
||||
@ -316,7 +376,7 @@ as.ab <- function(x, ...) {
|
||||
".",
|
||||
call. = FALSE)
|
||||
}
|
||||
|
||||
|
||||
if (length(x_unknown) > 0) {
|
||||
warning("These values could not be coerced to a valid antimicrobial ID: ",
|
||||
paste('"', sort(unique(x_unknown)), '"', sep = "", collapse = ", "),
|
||||
|
74
R/ab_from_text.R
Normal file
74
R/ab_from_text.R
Normal file
@ -0,0 +1,74 @@
|
||||
# ==================================================================== #
|
||||
# TITLE #
|
||||
# Antimicrobial Resistance (AMR) Analysis #
|
||||
# #
|
||||
# SOURCE #
|
||||
# https://gitlab.com/msberends/AMR #
|
||||
# #
|
||||
# LICENCE #
|
||||
# (c) 2018-2020 Berends MS, Luz CF et al. #
|
||||
# #
|
||||
# This R package is free software; you can freely use and distribute #
|
||||
# it for both personal and commercial purposes under the terms of the #
|
||||
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
|
||||
# the Free Software Foundation. #
|
||||
# #
|
||||
# We created this package for both routine data analysis and academic #
|
||||
# research and it was publicly released in the hope that it will be #
|
||||
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
|
||||
# Visit our website for more info: https://msberends.gitlab.io/AMR. #
|
||||
# ==================================================================== #
|
||||
|
||||
#' Retrieve antimicrobial drugs from text
|
||||
#'
|
||||
#' Use this function on e.g. clinical texts from health care records. It returns a vector of antimicrobial drugs found in the texts.
|
||||
#' @param text text to analyse
|
||||
#' @param collapse character to pass on to `paste(..., collapse = ...)` to only return one character per element of `text`, see Examples
|
||||
#' @param translate_ab a column name of the [antibiotics] data set to translate the antibiotic abbreviations to, using [ab_property()]. Defaults to "name", which is equal to using `TRUE`. Use a value `FALSE`, `NULL` or `NA` to prevent translation of the `<ab>` code.
|
||||
#' @param ... parameters passed on to [as.ab()]
|
||||
#' @details To use this for creating a new variable in a data set (e.g. with `mutate()`), it could be convenient to paste the outcome together with the `collapse` parameter so every value in your new variable will be a character of length 1:\cr
|
||||
#' `df %>% mutate(abx = ab_from_text(clinical_text, collapse = "|"))`
|
||||
#'
|
||||
#' This function is also internally used by [as.ab()], although it then only returns the first hit.
|
||||
#' @examples
|
||||
#' # mind the bad spelling of amoxicillin in this line,
|
||||
#' # straight from a true health care record:
|
||||
#' ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds")
|
||||
#'
|
||||
#' ab_from_text("administered amoxi/clav and cipro")
|
||||
#' ab_from_text("administered amoxi/clav and cipro", collapse = ", ")
|
||||
#'
|
||||
#' # if you want to know which antibiotic groups were administered, check it:
|
||||
#' abx <- ab_from_text("administered amoxi/clav and cipro")
|
||||
#' ab_group(abx)
|
||||
ab_from_text <- function(text, collapse = NULL, translate_ab = "name", ...) {
|
||||
|
||||
text <- tolower(text)
|
||||
|
||||
abbr <- unlist(antibiotics$abbreviations)
|
||||
abbr <- abbr[nchar(abbr) >= 4]
|
||||
names <- substr(antibiotics$name, 1, 5)
|
||||
synonyms <- unlist(antibiotics$synonyms)
|
||||
synonyms <- synonyms[nchar(synonyms) >= 4]
|
||||
to_regex <- function(x) {
|
||||
paste0("^(",
|
||||
paste0(unique(gsub("[^a-z0-9]", ".*", sort(tolower(x)))), collapse = "|"),
|
||||
").*")
|
||||
}
|
||||
|
||||
text_split <- unlist(strsplit(text, "[ ;.,:/\\|-]"))
|
||||
result <- as.ab(unique(c(text_split[grep(to_regex(abbr), text_split)],
|
||||
text_split[grep(to_regex(names), text_split)],
|
||||
# regular expression must not be too long, so split synonyms in two:
|
||||
text_split[grep(to_regex(synonyms[c(1:0.5 * length(synonyms))]), text_split)],
|
||||
text_split[grep(to_regex(synonyms[c(0.5 * length(synonyms):length(synonyms))]), text_split)])),
|
||||
...)
|
||||
translate_ab <- get_translate_ab(translate_ab)
|
||||
if (!isFALSE(translate_ab)) {
|
||||
result <- ab_property(result, property = translate_ab)
|
||||
}
|
||||
if (!is.null(collapse)) {
|
||||
result <- paste0(result, collapse = collapse)
|
||||
}
|
||||
result
|
||||
}
|
2
R/like.R
2
R/like.R
@ -64,7 +64,7 @@
|
||||
#' }
|
||||
like <- function(x, pattern, ignore.case = TRUE) {
|
||||
# set to fixed if no regex found
|
||||
fixed <- all(!grepl("[$.^*?+}{|)(]", pattern))
|
||||
fixed <- all(!grepl("[\\[$.^*?+-}{|)(]", pattern))
|
||||
if (ignore.case == TRUE) {
|
||||
# set here, otherwise if fixed = TRUE, this warning will be thrown: argument 'ignore.case = TRUE' will be ignored
|
||||
x <- tolower(x)
|
||||
|
@ -30,7 +30,7 @@
|
||||
#' @param as_percent a logical to indicate whether the output must be returned as a hundred fold with % sign (a character). A value of `0.123456` will then be returned as `"12.3%"`.
|
||||
#' @param only_all_tested (for combination therapies, i.e. using more than one variable for `...`): a logical to indicate that isolates must be tested for all antibiotics, see section *Combination therapy* below
|
||||
#' @param data a [`data.frame`] containing columns with class [`rsi`] (see [as.rsi()])
|
||||
#' @param translate_ab a column name of the [antibiotics] data set to translate the antibiotic abbreviations to, using [ab_property()]
|
||||
#' @param translate_ab a column name of the [antibiotics] data set to translate the antibiotic abbreviations to, using [ab_property()]. Use a value
|
||||
#' @inheritParams ab_property
|
||||
#' @param combine_SI a logical to indicate whether all values of S and I must be merged into one, so the output only consists of S+I vs. R (susceptible vs. resistant). This used to be the parameter `combine_IR`, but this now follows the redefinition by EUCAST about the interpretion of I (increased exposure) in 2019, see section 'Interpretation of S, I and R' below. Default is `TRUE`.
|
||||
#' @param combine_IR a logical to indicate whether all values of I and R must be merged into one, so the output only consists of S vs. I+R (susceptible vs. non-susceptible). This is outdated, see parameter `combine_SI`.
|
||||
|
26
R/rsi_calc.R
26
R/rsi_calc.R
@ -56,8 +56,10 @@ rsi_calc <- function(...,
|
||||
# for complete data.frames, like example_isolates %>% select(AMC, GEN) %>% proportion_S()
|
||||
# and the old rsi function, which has "df" as name of the first parameter
|
||||
x <- dots_df
|
||||
} else if (length(dots) == 1 | all(!dots %in% colnames(dots_df))) {
|
||||
x <- dots_df
|
||||
} else {
|
||||
x <- dots_df[, dots[dots %in% colnames(dots_df)]]
|
||||
x <- dots_df[, dots[dots %in% colnames(dots_df)], drop = FALSE]
|
||||
}
|
||||
} else if (ndots == 1) {
|
||||
# only 1 variable passed (can also be data.frame), like: proportion_S(example_isolates$AMC) and example_isolates$AMC %>% proportion_S()
|
||||
@ -111,7 +113,7 @@ rsi_calc <- function(...,
|
||||
base::all(y %in% other_values) & base::any(is.na(y))
|
||||
})
|
||||
numerator <- sum(as.logical(by(x, seq_len(nrow(x)), function(row) any(unlist(row) %in% ab_result, na.rm = TRUE))))
|
||||
denominator <- nrow(x[!other_values_filter, ])
|
||||
denominator <- nrow(x[!other_values_filter, , drop = FALSE])
|
||||
}
|
||||
} else {
|
||||
# x is not a data.frame
|
||||
@ -168,9 +170,7 @@ rsi_calc_df <- function(type, # "proportion", "count" or "both"
|
||||
}
|
||||
stop_if(isTRUE(combine_SI) & isTRUE(combine_IR), "either `combine_SI` or `combine_IR` can be TRUE, not both", call = -2)
|
||||
|
||||
if (as.character(translate_ab) %in% c("TRUE", "official")) {
|
||||
translate_ab <- "name"
|
||||
}
|
||||
translate_ab <- get_translate_ab(translate_ab)
|
||||
|
||||
# select only groups and antibiotics
|
||||
if (has_groups(data)) {
|
||||
@ -292,3 +292,19 @@ rsi_calc_df <- function(type, # "proportion", "count" or "both"
|
||||
rownames(out) <- NULL
|
||||
out
|
||||
}
|
||||
|
||||
get_translate_ab <- function(translate_ab) {
|
||||
translate_ab <- as.character(translate_ab)[1L]
|
||||
if (translate_ab %in% c("TRUE", "official")) {
|
||||
return("name")
|
||||
} else if (translate_ab %in% c(NA_character_, "FALSE")) {
|
||||
return(FALSE)
|
||||
} else {
|
||||
translate_ab <- tolower(translate_ab)
|
||||
stop_ifnot(translate_ab %in% colnames(AMR::antibiotics),
|
||||
"invalid value for 'translate_ab', this must be a column name of the antibiotics data set\n",
|
||||
"or TRUE (equals 'name') or FALSE to not translate at all.",
|
||||
call = FALSE)
|
||||
translate_ab
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user