1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-09 08:52:15 +02:00

(v1.2.0.9034) code cleaning

This commit is contained in:
2020-07-13 09:17:24 +02:00
parent c0cf7ab02b
commit 6ab468362d
36 changed files with 266 additions and 265 deletions

40
R/ab.R
View File

@ -82,16 +82,16 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
if (is.ab(x)) {
return(x)
}
initial_search <- is.null(list(...)$initial_search)
already_regex <- isTRUE(list(...)$already_regex)
if (all(toupper(x) %in% antibiotics$ab)) {
# valid AB code, but not yet right class
return(structure(.Data = toupper(x),
class = c("ab", "character")))
}
x_bak <- x
x <- toupper(x)
# remove diacritics
@ -117,7 +117,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
# replace text 'and' with a slash
x_bak_clean <- gsub(" AND ", "/", x_bak_clean)
}
x <- unique(x_bak_clean)
x_new <- rep(NA_character_, length(x))
x_unknown <- character(0)
@ -164,21 +164,21 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
# exact ATC code
found <- antibiotics[which(antibiotics$atc == x[i]), ]$ab
if (length(found) > 0) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
# exact CID code
found <- antibiotics[which(antibiotics$cid == x[i]), ]$ab
if (length(found) > 0) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
# exact name
found <- antibiotics[which(toupper(antibiotics$name) == x[i]), ]$ab
if (length(found) > 0) {
@ -188,13 +188,13 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
# exact LOINC code
loinc_found <- unlist(lapply(antibiotics$loinc,
function(s) x[i] %in% s))
function(s) x[i] %in% s))
found <- antibiotics$ab[loinc_found == TRUE]
if (length(found) > 0) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
# exact synonym
synonym_found <- unlist(lapply(antibiotics$synonyms,
function(s) x[i] %in% toupper(s)))
@ -203,7 +203,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
# exact abbreviation
abbr_found <- unlist(lapply(antibiotics$abbreviations,
function(a) x[i] %in% toupper(a)))
@ -212,7 +212,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
# allow characters that resemble others, but only continue when having more than 3 characters
if (nchar(x[i]) <= 3) {
x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1])
@ -242,7 +242,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
x_spelling <- gsub("(O|0)", "(O|0)+", x_spelling)
x_spelling <- gsub("++", "+", x_spelling, fixed = TRUE)
}
# try if name starts with it
found <- antibiotics[which(antibiotics$name %like% paste0("^", x_spelling)), ]$ab
if (length(found) > 0) {
@ -255,7 +255,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
# and try if any synonym starts with it
synonym_found <- unlist(lapply(antibiotics$synonyms,
function(s) any(s %like% paste0("^", x_spelling))))
@ -264,7 +264,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
# INITIAL SEARCH - More uncertain results ----
if (initial_search == TRUE) {
@ -351,7 +351,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
# first 5 except for cephalosporins, then first 7 (those cephalosporins all start quite the same!)
found <- suppressWarnings(as.ab(substr(x[i], 1, 5), initial_search = FALSE))
if (!is.na(found) && !ab_group(found, initial_search = FALSE) %like% "cephalosporins") {
@ -375,7 +375,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
# make all vowels facultative
search_str <- gsub("([AEIOUY])", "\\1*", x[i])
found <- suppressWarnings(as.ab(search_str, initial_search = FALSE, already_regex = TRUE))
@ -429,7 +429,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
if (initial_search == TRUE) {
close(progress)
}
# take failed ATC codes apart from rest
x_unknown_ATCs <- x_unknown[x_unknown %like% "[A-Z][0-9][0-9][A-Z][A-Z][0-9][0-9]"]
x_unknown <- x_unknown[!x_unknown %in% x_unknown_ATCs]
@ -446,15 +446,15 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
".",
call. = FALSE)
}
x_result <- data.frame(x = x_bak_clean, stringsAsFactors = FALSE) %>%
left_join(data.frame(x = x, x_new = x_new, stringsAsFactors = FALSE), by = "x") %>%
pull(x_new)
if (length(x_result) == 0) {
x_result <- NA_character_
}
structure(.Data = x_result,
class = c("ab", "character"))
}