mirror of
https://github.com/msberends/AMR.git
synced 2024-12-27 08:06:13 +01:00
improved algorithm of as.ab()
This commit is contained in:
parent
1bce7ed3d3
commit
b68f47d985
@ -1,6 +1,6 @@
|
|||||||
Package: AMR
|
Package: AMR
|
||||||
Version: 2.1.1.9028
|
Version: 2.1.1.9029
|
||||||
Date: 2024-04-24
|
Date: 2024-05-12
|
||||||
Title: Antimicrobial Resistance Data Analysis
|
Title: Antimicrobial Resistance Data Analysis
|
||||||
Description: Functions to simplify and standardise antimicrobial resistance (AMR)
|
Description: Functions to simplify and standardise antimicrobial resistance (AMR)
|
||||||
data analysis and to work with microbial and antimicrobial properties by
|
data analysis and to work with microbial and antimicrobial properties by
|
||||||
|
3
NEWS.md
3
NEWS.md
@ -1,4 +1,4 @@
|
|||||||
# AMR 2.1.1.9028
|
# AMR 2.1.1.9029
|
||||||
|
|
||||||
*(this beta version will eventually become v3.0. We're happy to reach a new major milestone soon, which will be all about the new One Health support!)*
|
*(this beta version will eventually become v3.0. We're happy to reach a new major milestone soon, which will be all about the new One Health support!)*
|
||||||
|
|
||||||
@ -31,6 +31,7 @@ This package now supports not only tools for AMR data analysis in clinical setti
|
|||||||
* Updated all antibiotic DDDs from WHOCC
|
* Updated all antibiotic DDDs from WHOCC
|
||||||
* Fix for using a manual value for `mo_transform` in `antibiogram()`
|
* Fix for using a manual value for `mo_transform` in `antibiogram()`
|
||||||
* Fix for mapping 'high level' antibiotics in `as.ab()` (amphotericin B-high, gentamicin-high, kanamycin-high, streptomycin-high, tobramycin-high)
|
* Fix for mapping 'high level' antibiotics in `as.ab()` (amphotericin B-high, gentamicin-high, kanamycin-high, streptomycin-high, tobramycin-high)
|
||||||
|
* Improved overall algorithm of `as.ab()` for better performance and accuracy
|
||||||
|
|
||||||
## Other
|
## Other
|
||||||
* Added Jordan Stull, Matthew Saab, and Javier Sanchez as contributors, to thank them for their valuable input
|
* Added Jordan Stull, Matthew Saab, and Javier Sanchez as contributors, to thank them for their valuable input
|
||||||
|
55
R/ab.R
55
R/ab.R
@ -107,7 +107,10 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
|||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
initial_search <- is.null(list(...)$initial_search)
|
loop_time <- list(...)$loop_time
|
||||||
|
if (is.null(loop_time)) {
|
||||||
|
loop_time <- 1
|
||||||
|
}
|
||||||
already_regex <- isTRUE(list(...)$already_regex)
|
already_regex <- isTRUE(list(...)$already_regex)
|
||||||
fast_mode <- isTRUE(list(...)$fast_mode)
|
fast_mode <- isTRUE(list(...)$fast_mode)
|
||||||
|
|
||||||
@ -131,8 +134,8 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
|||||||
x_unknown_ATCs <- character(0)
|
x_unknown_ATCs <- character(0)
|
||||||
|
|
||||||
note_if_more_than_one_found <- function(found, index, from_text) {
|
note_if_more_than_one_found <- function(found, index, from_text) {
|
||||||
if (isTRUE(initial_search) && isTRUE(length(from_text) > 1)) {
|
if (loop_time == 1 && isTRUE(length(from_text) > 1)) {
|
||||||
abnames <- ab_name(from_text, tolower = TRUE, initial_search = FALSE)
|
abnames <- ab_name(from_text, tolower = TRUE, loop_time = loop_time + 1)
|
||||||
if (ab_name(found[1L], language = NULL) %like% "(clavulanic acid|(avi|tazo|mono|vabor)bactam)") {
|
if (ab_name(found[1L], language = NULL) %like% "(clavulanic acid|(avi|tazo|mono|vabor)bactam)") {
|
||||||
abnames <- abnames[!abnames %in% c("clavulanic acid", "avibactam", "tazobactam", "vaborbactam", "monobactam")]
|
abnames <- abnames[!abnames %in% c("clavulanic acid", "avibactam", "tazobactam", "vaborbactam", "monobactam")]
|
||||||
}
|
}
|
||||||
@ -174,13 +177,13 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
|||||||
x_new[is.na(x)] <- NA
|
x_new[is.na(x)] <- NA
|
||||||
already_known[is.na(x)] <- FALSE
|
already_known[is.na(x)] <- FALSE
|
||||||
|
|
||||||
if (isTRUE(initial_search) && sum(already_known) < length(x)) {
|
if (loop_time == 1 && sum(already_known) < length(x)) {
|
||||||
progress <- progress_ticker(n = sum(!already_known), n_min = 25, print = info) # start if n >= 25
|
progress <- progress_ticker(n = sum(!already_known), n_min = 25, print = info) # start if n >= 25
|
||||||
on.exit(close(progress))
|
on.exit(close(progress))
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i in which(!already_known)) {
|
for (i in which(!already_known)) {
|
||||||
if (isTRUE(initial_search)) {
|
if (loop_time == 1) {
|
||||||
progress$tick()
|
progress$tick()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -202,7 +205,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (fast_mode == FALSE && flag_multiple_results == TRUE && x[i] %like% "[ ]") {
|
if (fast_mode == FALSE && flag_multiple_results == TRUE && x[i] %like% "[ ]") {
|
||||||
from_text <- tryCatch(suppressWarnings(ab_from_text(x[i], initial_search = FALSE, translate_ab = FALSE)[[1]]),
|
from_text <- tryCatch(suppressWarnings(ab_from_text(x[i], loop_time = loop_time + 1, translate_ab = FALSE)[[1]]),
|
||||||
error = function(e) character(0)
|
error = function(e) character(0)
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
@ -315,12 +318,12 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
|||||||
|
|
||||||
# INITIAL SEARCH - More uncertain results ----
|
# INITIAL SEARCH - More uncertain results ----
|
||||||
|
|
||||||
if (isTRUE(initial_search) && fast_mode == FALSE) {
|
if (loop_time <= 2 && fast_mode == FALSE) {
|
||||||
# only run on first try
|
# only run on first and second try
|
||||||
|
|
||||||
# try by removing all spaces
|
# try by removing all spaces
|
||||||
if (x[i] %like% " ") {
|
if (x[i] %like% " ") {
|
||||||
found <- suppressWarnings(as.ab(gsub(" +", "", x[i], perl = TRUE), initial_search = FALSE))
|
found <- suppressWarnings(as.ab(gsub(" +", "", x[i], perl = TRUE), loop_time = loop_time + 1))
|
||||||
if (length(found) > 0 && !is.na(found)) {
|
if (length(found) > 0 && !is.na(found)) {
|
||||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||||
next
|
next
|
||||||
@ -329,7 +332,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
|||||||
|
|
||||||
# try by removing all spaces and numbers
|
# try by removing all spaces and numbers
|
||||||
if (x[i] %like% " " || x[i] %like% "[0-9]") {
|
if (x[i] %like% " " || x[i] %like% "[0-9]") {
|
||||||
found <- suppressWarnings(as.ab(gsub("[ 0-9]", "", x[i], perl = TRUE), initial_search = FALSE))
|
found <- suppressWarnings(as.ab(gsub("[ 0-9]", "", x[i], perl = TRUE), loop_time = loop_time + 1))
|
||||||
if (length(found) > 0 && !is.na(found)) {
|
if (length(found) > 0 && !is.na(found)) {
|
||||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||||
next
|
next
|
||||||
@ -355,7 +358,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
|||||||
)[[1]],
|
)[[1]],
|
||||||
collapse = "/"
|
collapse = "/"
|
||||||
)
|
)
|
||||||
x_translated_guess <- suppressWarnings(as.ab(x_translated, initial_search = FALSE))
|
x_translated_guess <- suppressWarnings(as.ab(x_translated, loop_time = loop_time + 1))
|
||||||
if (!is.na(x_translated_guess)) {
|
if (!is.na(x_translated_guess)) {
|
||||||
x_new[i] <- x_translated_guess
|
x_new[i] <- x_translated_guess
|
||||||
next
|
next
|
||||||
@ -367,7 +370,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
|||||||
strsplit(x_translated, "[^A-Z0-9 ]"),
|
strsplit(x_translated, "[^A-Z0-9 ]"),
|
||||||
function(y) {
|
function(y) {
|
||||||
for (i in seq_len(length(y))) {
|
for (i in seq_len(length(y))) {
|
||||||
y_name <- suppressWarnings(ab_name(y[i], language = NULL, initial_search = FALSE))
|
y_name <- suppressWarnings(ab_name(y[i], language = NULL, loop_time = loop_time + 1))
|
||||||
y[i] <- ifelse(!is.na(y_name),
|
y[i] <- ifelse(!is.na(y_name),
|
||||||
y_name,
|
y_name,
|
||||||
y[i]
|
y[i]
|
||||||
@ -378,7 +381,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
|||||||
)[[1]],
|
)[[1]],
|
||||||
collapse = "/"
|
collapse = "/"
|
||||||
)
|
)
|
||||||
x_translated_guess <- suppressWarnings(as.ab(x_translated, initial_search = FALSE))
|
x_translated_guess <- suppressWarnings(as.ab(x_translated, loop_time = loop_time + 1))
|
||||||
if (!is.na(x_translated_guess)) {
|
if (!is.na(x_translated_guess)) {
|
||||||
x_new[i] <- x_translated_guess
|
x_new[i] <- x_translated_guess
|
||||||
next
|
next
|
||||||
@ -386,7 +389,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
|||||||
|
|
||||||
# try by removing all trailing capitals
|
# try by removing all trailing capitals
|
||||||
if (x[i] %like_case% "[a-z]+[A-Z]+$") {
|
if (x[i] %like_case% "[a-z]+[A-Z]+$") {
|
||||||
found <- suppressWarnings(as.ab(gsub("[A-Z]+$", "", x[i], perl = TRUE), initial_search = FALSE))
|
found <- suppressWarnings(as.ab(gsub("[A-Z]+$", "", x[i], perl = TRUE), loop_time = loop_time + 1))
|
||||||
if (!is.na(found)) {
|
if (!is.na(found)) {
|
||||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||||
next
|
next
|
||||||
@ -394,7 +397,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
# keep only letters
|
# keep only letters
|
||||||
found <- suppressWarnings(as.ab(gsub("[^A-Z]", "", x[i], perl = TRUE), initial_search = FALSE))
|
found <- suppressWarnings(as.ab(gsub("[^A-Z]", "", x[i], perl = TRUE), loop_time = loop_time + 1))
|
||||||
if (!is.na(found)) {
|
if (!is.na(found)) {
|
||||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||||
next
|
next
|
||||||
@ -405,7 +408,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
|||||||
if (flag_multiple_results == TRUE) {
|
if (flag_multiple_results == TRUE) {
|
||||||
found <- from_text[1L]
|
found <- from_text[1L]
|
||||||
} else {
|
} else {
|
||||||
found <- tryCatch(suppressWarnings(ab_from_text(x[i], initial_search = FALSE, translate_ab = FALSE)[[1]][1L]),
|
found <- tryCatch(suppressWarnings(ab_from_text(x[i], loop_time = loop_time + 1, translate_ab = FALSE)[[1]][1L]),
|
||||||
error = function(e) NA_character_
|
error = function(e) NA_character_
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -415,12 +418,12 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
# first 5 except for cephalosporins, then first 7 (those cephalosporins all start quite the same!)
|
# first 5 except for cephalosporins, then first 7 (those cephalosporins all start quite the same!)
|
||||||
found <- suppressWarnings(as.ab(substr(x[i], 1, 5), initial_search = FALSE))
|
found <- suppressWarnings(as.ab(substr(x[i], 1, 5), loop_time = loop_time + 1))
|
||||||
if (!is.na(found) && ab_group(found, initial_search = FALSE) %unlike% "cephalosporins") {
|
if (!is.na(found) && ab_group(found, loop_time = loop_time + 1) %unlike% "cephalosporins") {
|
||||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
found <- suppressWarnings(as.ab(substr(x[i], 1, 7), initial_search = FALSE))
|
found <- suppressWarnings(as.ab(substr(x[i], 1, 7), loop_time = loop_time + 1))
|
||||||
if (!is.na(found)) {
|
if (!is.na(found)) {
|
||||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||||
next
|
next
|
||||||
@ -428,7 +431,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
|||||||
|
|
||||||
# make all consonants facultative
|
# make all consonants facultative
|
||||||
search_str <- gsub("([BCDFGHJKLMNPQRSTVWXZ])", "\\1*", x[i], perl = TRUE)
|
search_str <- gsub("([BCDFGHJKLMNPQRSTVWXZ])", "\\1*", x[i], perl = TRUE)
|
||||||
found <- suppressWarnings(as.ab(search_str, initial_search = FALSE, already_regex = TRUE))
|
found <- suppressWarnings(as.ab(search_str, loop_time = loop_time + 1, already_regex = TRUE))
|
||||||
# keep at least 4 normal characters
|
# keep at least 4 normal characters
|
||||||
if (nchar(gsub(".\\*", "", search_str, perl = TRUE)) < 4) {
|
if (nchar(gsub(".\\*", "", search_str, perl = TRUE)) < 4) {
|
||||||
found <- NA
|
found <- NA
|
||||||
@ -440,7 +443,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
|||||||
|
|
||||||
# make all vowels facultative
|
# make all vowels facultative
|
||||||
search_str <- gsub("([AEIOUY])", "\\1*", x[i], perl = TRUE)
|
search_str <- gsub("([AEIOUY])", "\\1*", x[i], perl = TRUE)
|
||||||
found <- suppressWarnings(as.ab(search_str, initial_search = FALSE, already_regex = TRUE))
|
found <- suppressWarnings(as.ab(search_str, loop_time = loop_time + 1, already_regex = TRUE))
|
||||||
# keep at least 5 normal characters
|
# keep at least 5 normal characters
|
||||||
if (nchar(gsub(".\\*", "", search_str, perl = TRUE)) < 5) {
|
if (nchar(gsub(".\\*", "", search_str, perl = TRUE)) < 5) {
|
||||||
found <- NA
|
found <- NA
|
||||||
@ -456,7 +459,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
|||||||
x_spelling <- gsub("I+", "[AEIOU]+", x_spelling, fixed = TRUE)
|
x_spelling <- gsub("I+", "[AEIOU]+", x_spelling, fixed = TRUE)
|
||||||
x_spelling <- gsub("O+", "[AEIOU]+", x_spelling, fixed = TRUE)
|
x_spelling <- gsub("O+", "[AEIOU]+", x_spelling, fixed = TRUE)
|
||||||
x_spelling <- gsub("U+", "[AEIOU]+", x_spelling, fixed = TRUE)
|
x_spelling <- gsub("U+", "[AEIOU]+", x_spelling, fixed = TRUE)
|
||||||
found <- suppressWarnings(as.ab(x_spelling, initial_search = FALSE, already_regex = TRUE))
|
found <- suppressWarnings(as.ab(x_spelling, loop_time = loop_time + 1, already_regex = TRUE))
|
||||||
if (!is.na(found)) {
|
if (!is.na(found)) {
|
||||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||||
next
|
next
|
||||||
@ -473,7 +476,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
|||||||
# ending part:
|
# ending part:
|
||||||
substr(x[i], j + 2, nchar(x[i]))
|
substr(x[i], j + 2, nchar(x[i]))
|
||||||
)
|
)
|
||||||
found <- suppressWarnings(as.ab(x_switched, initial_search = FALSE))
|
found <- suppressWarnings(as.ab(x_switched, loop_time = loop_time + 1))
|
||||||
if (!is.na(found)) {
|
if (!is.na(found)) {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
@ -482,18 +485,18 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
|||||||
x_new[i] <- found[1L]
|
x_new[i] <- found[1L]
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
} # end of initial_search = TRUE
|
} # end of loop_time <= 2
|
||||||
|
|
||||||
# not found
|
# not found
|
||||||
x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1])
|
x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1])
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isTRUE(initial_search) && sum(already_known) < length(x)) {
|
if (loop_time == 1 && sum(already_known) < length(x)) {
|
||||||
close(progress)
|
close(progress)
|
||||||
}
|
}
|
||||||
|
|
||||||
# save to package env to save time for next time
|
# save to package env to save time for next time
|
||||||
if (isTRUE(initial_search)) {
|
if (loop_time == 1) {
|
||||||
AMR_env$ab_previously_coerced <- AMR_env$ab_previously_coerced[which(!AMR_env$ab_previously_coerced$x %in% x), , drop = FALSE]
|
AMR_env$ab_previously_coerced <- AMR_env$ab_previously_coerced[which(!AMR_env$ab_previously_coerced$x %in% x), , drop = FALSE]
|
||||||
AMR_env$ab_previously_coerced <- unique(rbind_AMR(
|
AMR_env$ab_previously_coerced <- unique(rbind_AMR(
|
||||||
AMR_env$ab_previously_coerced,
|
AMR_env$ab_previously_coerced,
|
||||||
|
Loading…
Reference in New Issue
Block a user