improved algorithm of `as.ab()`

This commit is contained in:
dr. M.S. (Matthijs) Berends 2024-05-12 16:24:44 +02:00
parent 1bce7ed3d3
commit b68f47d985
3 changed files with 33 additions and 29 deletions

View File

@ -1,6 +1,6 @@
Package: AMR
Version: 2.1.1.9028
Date: 2024-04-24
Version: 2.1.1.9029
Date: 2024-05-12
Title: Antimicrobial Resistance Data Analysis
Description: Functions to simplify and standardise antimicrobial resistance (AMR)
data analysis and to work with microbial and antimicrobial properties by

View File

@ -1,4 +1,4 @@
# AMR 2.1.1.9028
# AMR 2.1.1.9029
*(this beta version will eventually become v3.0. We're happy to reach a new major milestone soon, which will be all about the new One Health support!)*
@ -31,6 +31,7 @@ This package now supports not only tools for AMR data analysis in clinical setti
* Updated all antibiotic DDDs from WHOCC
* Fix for using a manual value for `mo_transform` in `antibiogram()`
* Fix for mapping 'high level' antibiotics in `as.ab()` (amphotericin B-high, gentamicin-high, kanamycin-high, streptomycin-high, tobramycin-high)
* Improved overall algorithm of `as.ab()` for better performance and accuracy
## Other
* Added Jordan Stull, Matthew Saab, and Javier Sanchez as contributors, to thank them for their valuable input

55
R/ab.R
View File

@ -107,7 +107,10 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
))
}
initial_search <- is.null(list(...)$initial_search)
loop_time <- list(...)$loop_time
if (is.null(loop_time)) {
loop_time <- 1
}
already_regex <- isTRUE(list(...)$already_regex)
fast_mode <- isTRUE(list(...)$fast_mode)
@ -131,8 +134,8 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
x_unknown_ATCs <- character(0)
note_if_more_than_one_found <- function(found, index, from_text) {
if (isTRUE(initial_search) && isTRUE(length(from_text) > 1)) {
abnames <- ab_name(from_text, tolower = TRUE, initial_search = FALSE)
if (loop_time == 1 && isTRUE(length(from_text) > 1)) {
abnames <- ab_name(from_text, tolower = TRUE, loop_time = loop_time + 1)
if (ab_name(found[1L], language = NULL) %like% "(clavulanic acid|(avi|tazo|mono|vabor)bactam)") {
abnames <- abnames[!abnames %in% c("clavulanic acid", "avibactam", "tazobactam", "vaborbactam", "monobactam")]
}
@ -174,13 +177,13 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
x_new[is.na(x)] <- NA
already_known[is.na(x)] <- FALSE
if (isTRUE(initial_search) && sum(already_known) < length(x)) {
if (loop_time == 1 && sum(already_known) < length(x)) {
progress <- progress_ticker(n = sum(!already_known), n_min = 25, print = info) # start if n >= 25
on.exit(close(progress))
}
for (i in which(!already_known)) {
if (isTRUE(initial_search)) {
if (loop_time == 1) {
progress$tick()
}
@ -202,7 +205,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
}
if (fast_mode == FALSE && flag_multiple_results == TRUE && x[i] %like% "[ ]") {
from_text <- tryCatch(suppressWarnings(ab_from_text(x[i], initial_search = FALSE, translate_ab = FALSE)[[1]]),
from_text <- tryCatch(suppressWarnings(ab_from_text(x[i], loop_time = loop_time + 1, translate_ab = FALSE)[[1]]),
error = function(e) character(0)
)
} else {
@ -315,12 +318,12 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
# INITIAL SEARCH - More uncertain results ----
if (isTRUE(initial_search) && fast_mode == FALSE) {
# only run on first try
if (loop_time <= 2 && fast_mode == FALSE) {
# only run on first and second try
# try by removing all spaces
if (x[i] %like% " ") {
found <- suppressWarnings(as.ab(gsub(" +", "", x[i], perl = TRUE), initial_search = FALSE))
found <- suppressWarnings(as.ab(gsub(" +", "", x[i], perl = TRUE), loop_time = loop_time + 1))
if (length(found) > 0 && !is.na(found)) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
@ -329,7 +332,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
# try by removing all spaces and numbers
if (x[i] %like% " " || x[i] %like% "[0-9]") {
found <- suppressWarnings(as.ab(gsub("[ 0-9]", "", x[i], perl = TRUE), initial_search = FALSE))
found <- suppressWarnings(as.ab(gsub("[ 0-9]", "", x[i], perl = TRUE), loop_time = loop_time + 1))
if (length(found) > 0 && !is.na(found)) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
@ -355,7 +358,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
)[[1]],
collapse = "/"
)
x_translated_guess <- suppressWarnings(as.ab(x_translated, initial_search = FALSE))
x_translated_guess <- suppressWarnings(as.ab(x_translated, loop_time = loop_time + 1))
if (!is.na(x_translated_guess)) {
x_new[i] <- x_translated_guess
next
@ -367,7 +370,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
strsplit(x_translated, "[^A-Z0-9 ]"),
function(y) {
for (i in seq_len(length(y))) {
y_name <- suppressWarnings(ab_name(y[i], language = NULL, initial_search = FALSE))
y_name <- suppressWarnings(ab_name(y[i], language = NULL, loop_time = loop_time + 1))
y[i] <- ifelse(!is.na(y_name),
y_name,
y[i]
@ -378,7 +381,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
)[[1]],
collapse = "/"
)
x_translated_guess <- suppressWarnings(as.ab(x_translated, initial_search = FALSE))
x_translated_guess <- suppressWarnings(as.ab(x_translated, loop_time = loop_time + 1))
if (!is.na(x_translated_guess)) {
x_new[i] <- x_translated_guess
next
@ -386,7 +389,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
# try by removing all trailing capitals
if (x[i] %like_case% "[a-z]+[A-Z]+$") {
found <- suppressWarnings(as.ab(gsub("[A-Z]+$", "", x[i], perl = TRUE), initial_search = FALSE))
found <- suppressWarnings(as.ab(gsub("[A-Z]+$", "", x[i], perl = TRUE), loop_time = loop_time + 1))
if (!is.na(found)) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
@ -394,7 +397,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
}
# keep only letters
found <- suppressWarnings(as.ab(gsub("[^A-Z]", "", x[i], perl = TRUE), initial_search = FALSE))
found <- suppressWarnings(as.ab(gsub("[^A-Z]", "", x[i], perl = TRUE), loop_time = loop_time + 1))
if (!is.na(found)) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
@ -405,7 +408,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
if (flag_multiple_results == TRUE) {
found <- from_text[1L]
} else {
found <- tryCatch(suppressWarnings(ab_from_text(x[i], initial_search = FALSE, translate_ab = FALSE)[[1]][1L]),
found <- tryCatch(suppressWarnings(ab_from_text(x[i], loop_time = loop_time + 1, translate_ab = FALSE)[[1]][1L]),
error = function(e) NA_character_
)
}
@ -415,12 +418,12 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
}
# first 5 except for cephalosporins, then first 7 (those cephalosporins all start quite the same!)
found <- suppressWarnings(as.ab(substr(x[i], 1, 5), initial_search = FALSE))
if (!is.na(found) && ab_group(found, initial_search = FALSE) %unlike% "cephalosporins") {
found <- suppressWarnings(as.ab(substr(x[i], 1, 5), loop_time = loop_time + 1))
if (!is.na(found) && ab_group(found, loop_time = loop_time + 1) %unlike% "cephalosporins") {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
found <- suppressWarnings(as.ab(substr(x[i], 1, 7), initial_search = FALSE))
found <- suppressWarnings(as.ab(substr(x[i], 1, 7), loop_time = loop_time + 1))
if (!is.na(found)) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
@ -428,7 +431,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
# make all consonants facultative
search_str <- gsub("([BCDFGHJKLMNPQRSTVWXZ])", "\\1*", x[i], perl = TRUE)
found <- suppressWarnings(as.ab(search_str, initial_search = FALSE, already_regex = TRUE))
found <- suppressWarnings(as.ab(search_str, loop_time = loop_time + 1, already_regex = TRUE))
# keep at least 4 normal characters
if (nchar(gsub(".\\*", "", search_str, perl = TRUE)) < 4) {
found <- NA
@ -440,7 +443,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
# make all vowels facultative
search_str <- gsub("([AEIOUY])", "\\1*", x[i], perl = TRUE)
found <- suppressWarnings(as.ab(search_str, initial_search = FALSE, already_regex = TRUE))
found <- suppressWarnings(as.ab(search_str, loop_time = loop_time + 1, already_regex = TRUE))
# keep at least 5 normal characters
if (nchar(gsub(".\\*", "", search_str, perl = TRUE)) < 5) {
found <- NA
@ -456,7 +459,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
x_spelling <- gsub("I+", "[AEIOU]+", x_spelling, fixed = TRUE)
x_spelling <- gsub("O+", "[AEIOU]+", x_spelling, fixed = TRUE)
x_spelling <- gsub("U+", "[AEIOU]+", x_spelling, fixed = TRUE)
found <- suppressWarnings(as.ab(x_spelling, initial_search = FALSE, already_regex = TRUE))
found <- suppressWarnings(as.ab(x_spelling, loop_time = loop_time + 1, already_regex = TRUE))
if (!is.na(found)) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
@ -473,7 +476,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
# ending part:
substr(x[i], j + 2, nchar(x[i]))
)
found <- suppressWarnings(as.ab(x_switched, initial_search = FALSE))
found <- suppressWarnings(as.ab(x_switched, loop_time = loop_time + 1))
if (!is.na(found)) {
break
}
@ -482,18 +485,18 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
x_new[i] <- found[1L]
next
}
} # end of initial_search = TRUE
} # end of loop_time <= 2
# not found
x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1])
}
if (isTRUE(initial_search) && sum(already_known) < length(x)) {
if (loop_time == 1 && sum(already_known) < length(x)) {
close(progress)
}
# save to package env to save time for next time
if (isTRUE(initial_search)) {
if (loop_time == 1) {
AMR_env$ab_previously_coerced <- AMR_env$ab_previously_coerced[which(!AMR_env$ab_previously_coerced$x %in% x), , drop = FALSE]
AMR_env$ab_previously_coerced <- unique(rbind_AMR(
AMR_env$ab_previously_coerced,