1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-08 10:31:53 +02:00

(v1.3.0.9022) mo_matching_score(), poorman update, as.rsi() fix

This commit is contained in:
2020-09-18 16:05:53 +02:00
parent 89401ede9f
commit 4e40e42011
138 changed files with 2923 additions and 1472 deletions

80
R/ab.R
View File

@ -47,9 +47,9 @@
#'
#' European Commission Public Health PHARMACEUTICALS - COMMUNITY REGISTER: \url{http://ec.europa.eu/health/documents/community-register/html/atc.htm}
#' @aliases ab
#' @return Character (vector) with class [`ab`]. Unknown values will return `NA`.
#' @return A [character] [vector] with additional class [`ab`]
#' @seealso
#' * [antibiotics] for the dataframe that is being used to determine ATCs
#' * [antibiotics] for the [data.frame] that is being used to determine ATCs
#' * [ab_from_text()] for a function to retrieve antimicrobial drugs from clinical text (from health care records)
#' @inheritSection AMR Reference data publicly available
#' @inheritSection AMR Read more on our website!
@ -101,23 +101,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
x <- gsub('"', "", x, fixed = TRUE)
x_bak_clean <- x
if (already_regex == FALSE) {
# remove suffices
x_bak_clean <- gsub("_(MIC|RSI|DIS[CK])$", "", x_bak_clean)
# remove disk concentrations, like LVX_NM -> LVX
x_bak_clean <- gsub("_[A-Z]{2}[0-9_.]{0,3}$", "", x_bak_clean)
# remove part between brackets if that's followed by another string
x_bak_clean <- gsub("(.*)+ [(].*[)]", "\\1", x_bak_clean)
# keep only max 1 space
x_bak_clean <- trimws(gsub(" +", " ", x_bak_clean))
# non-character, space or number should be a slash
x_bak_clean <- gsub("[^A-Z0-9 -]", "/", x_bak_clean)
# spaces around non-characters must be removed: amox + clav -> amox/clav
x_bak_clean <- gsub("(.*[A-Z0-9]) ([^A-Z0-9].*)", "\\1\\2", x_bak_clean)
x_bak_clean <- gsub("(.*[^A-Z0-9]) ([A-Z0-9].*)", "\\1\\2", x_bak_clean)
# remove hyphen after a starting "co"
x_bak_clean <- gsub("^CO-", "CO", x_bak_clean)
# replace text 'and' with a slash
x_bak_clean <- gsub(" AND ", "/", x_bak_clean)
x_bak_clean <- generalise_antibiotic_name(x_bak_clean)
}
x <- unique(x_bak_clean)
@ -133,7 +117,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
}
if (initial_search == TRUE) {
progress <- progress_estimated(n = length(x), n_min = ifelse(isTRUE(info), 25, length(x) + 1)) # start if n >= 25
progress <- progress_ticker(n = length(x), n_min = ifelse(isTRUE(info), 25, length(x) + 1)) # start if n >= 25
on.exit(close(progress))
}
@ -161,7 +145,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
}
# exact name
found <- antibiotics[which(toupper(antibiotics$name) == x[i]), ]$ab
found <- antibiotics[which(AB_lookup$generalised_name == x[i]), ]$ab
if (length(found) > 0) {
x_new[i] <- found[1L]
next
@ -189,8 +173,8 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
}
# exact LOINC code
loinc_found <- unlist(lapply(antibiotics$loinc,
function(s) x[i] %in% s))
loinc_found <- unlist(lapply(AB_lookup$generalised_loinc,
function(s) generalise_antibiotic_name(x[i]) %in% s))
found <- antibiotics$ab[loinc_found == TRUE]
if (length(found) > 0) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
@ -198,8 +182,8 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
}
# exact synonym
synonym_found <- unlist(lapply(antibiotics$synonyms,
function(s) x[i] %in% toupper(s)))
synonym_found <- unlist(lapply(AB_lookup$generalised_synonyms,
function(s) generalise_antibiotic_name(x[i]) %in% s))
found <- antibiotics$ab[synonym_found == TRUE]
if (length(found) > 0) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
@ -207,8 +191,8 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
}
# exact abbreviation
abbr_found <- unlist(lapply(antibiotics$abbreviations,
function(a) x[i] %in% toupper(a)))
abbr_found <- unlist(lapply(AB_lookup$generalised_abbreviations,
function(s) generalise_antibiotic_name(x[i]) %in% s))
found <- antibiotics$ab[abbr_found == TRUE]
if (length(found) > 0) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
@ -246,21 +230,21 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
}
# try if name starts with it
found <- antibiotics[which(antibiotics$name %like% paste0("^", x_spelling)), ]$ab
found <- antibiotics[which(AB_lookup$generalised_name %like% paste0("^", x_spelling)), ]$ab
if (length(found) > 0) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
# try if name ends with it
found <- antibiotics[which(antibiotics$name %like% paste0(x_spelling, "$")), ]$ab
found <- antibiotics[which(AB_lookup$generalised_name %like% paste0(x_spelling, "$")), ]$ab
if (nchar(x[i]) >= 4 & length(found) > 0) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
# and try if any synonym starts with it
synonym_found <- unlist(lapply(antibiotics$synonyms,
function(s) any(s %like% paste0("^", x_spelling))))
synonym_found <- unlist(lapply(AB_lookup$generalised_synonyms,
function(s) any(generalise_antibiotic_name(s) %like% paste0("^", x_spelling))))
found <- antibiotics$ab[synonym_found == TRUE]
if (length(found) > 0) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
@ -291,7 +275,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
}
# transform back from other languages and try again
x_translated <- paste(lapply(strsplit(x[i], "[^A-Z0-9 ]"),
x_translated <- paste(lapply(strsplit(x[i], "[^A-Z0-9]"),
function(y) {
for (i in seq_len(length(y))) {
y[i] <- ifelse(tolower(y[i]) %in% tolower(translations_file$replacement),
@ -299,7 +283,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
!isFALSE(translations_file$fixed)), "pattern"],
y[i])
}
y
generalise_antibiotic_name(y)
})[[1]],
collapse = "/")
x_translated_guess <- suppressWarnings(as.ab(x_translated, initial_search = FALSE))
@ -317,7 +301,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
y_name,
y[i])
}
y
generalise_antibiotic_name(y)
})[[1]],
collapse = "/")
x_translated_guess <- suppressWarnings(as.ab(x_translated, initial_search = FALSE))
@ -449,9 +433,9 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
call. = FALSE)
}
x_result <- data.frame(x = x_bak_clean, stringsAsFactors = FALSE) %>%
left_join(data.frame(x = x, x_new = x_new, stringsAsFactors = FALSE), by = "x") %>%
pull(x_new)
x_result <- data.frame(x = x_bak_clean, stringsAsFactors = FALSE) %pm>%
pm_left_join(data.frame(x = x, x_new = x_new, stringsAsFactors = FALSE), by = "x") %pm>%
pm_pull(x_new)
if (length(x_result) == 0) {
x_result <- NA_character_
@ -538,3 +522,25 @@ c.ab <- function(x, ...) {
attributes(y) <- attributes(x)
class_integrity_check(y, "antimicrobial code", antibiotics$ab)
}
generalise_antibiotic_name <- function(x) {
x <- toupper(x)
# remove suffices
x <- gsub("_(MIC|RSI|DIS[CK])$", "", x)
# remove disk concentrations, like LVX_NM -> LVX
x <- gsub("_[A-Z]{2}[0-9_.]{0,3}$", "", x)
# remove part between brackets if that's followed by another string
x <- gsub("(.*)+ [(].*[)]", "\\1", x)
# keep only max 1 space
x <- trimws(gsub(" +", " ", x))
# non-character, space or number should be a slash
x <- gsub("[^A-Z0-9 -]", "/", x)
# spaces around non-characters must be removed: amox + clav -> amox/clav
x <- gsub("(.*[A-Z0-9]) ([^A-Z0-9].*)", "\\1\\2", x)
x <- gsub("(.*[^A-Z0-9]) ([A-Z0-9].*)", "\\1\\2", x)
# remove hyphen after a starting "co"
x <- gsub("^CO-", "CO", x)
# replace operators with a space
x <- gsub("(/| AND | WITH | W/|[+]|[-])+", " ", x)
x
}