1
0
mirror of https://github.com/msberends/AMR.git synced 2025-04-19 08:33:49 +02:00

(v2.1.1.9159) new approach as.ab()

This commit is contained in:
dr. M.S. (Matthijs) Berends 2025-02-26 19:23:54 +01:00
parent 122bca0f95
commit 0c3ea4b538
No known key found for this signature in database
18 changed files with 130 additions and 284 deletions

View File

@ -61,7 +61,7 @@ jobs:
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
- uses: r-lib/actions/setup-r@v2 - uses: r-lib/actions/setup-r@v2
with: with:

View File

@ -42,7 +42,7 @@ jobs:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
CODECOV_TOKEN: ${{secrets.CODECOV_TOKEN}} CODECOV_TOKEN: ${{secrets.CODECOV_TOKEN}}
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
- uses: r-lib/actions/setup-pandoc@v2 - uses: r-lib/actions/setup-pandoc@v2

View File

@ -41,7 +41,7 @@ jobs:
env: env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
- uses: r-lib/actions/setup-pandoc@v2 - uses: r-lib/actions/setup-pandoc@v2
@ -59,13 +59,15 @@ jobs:
run: | run: |
# old: lintr::lint_package(linters = lintr::with_defaults(line_length_linter = NULL, trailing_whitespace_linter = NULL, object_name_linter = NULL, cyclocomp_linter = NULL, object_length_linter = lintr::object_length_linter(length = 50L)), exclusions = list("R/aa_helper_pm_functions.R")) # old: lintr::lint_package(linters = lintr::with_defaults(line_length_linter = NULL, trailing_whitespace_linter = NULL, object_name_linter = NULL, cyclocomp_linter = NULL, object_length_linter = lintr::object_length_linter(length = 50L)), exclusions = list("R/aa_helper_pm_functions.R"))
# now get ALL linters, not just default ones # now get ALL linters, not just default ones
linters <- ls(envir = asNamespace("lintr"), pattern = "_linter$") linters <- getNamespaceExports(asNamespace("lintr"))
linters <- sort(linters[grepl("_linter$", linters)])
# lose deprecated # lose deprecated
linters <- linters[!grepl("^(closed_curly|open_curly|paren_brace|semicolon_terminator)_linter$", linters)] linters <- linters[!grepl("^(closed_curly|open_curly|paren_brace|semicolon_terminator|consecutive_stopifnot|no_tab|single_quotes|unnecessary_nested_if|unneeded_concatenation)_linter$", linters)]
# and the ones we find unnnecessary # and the ones we find unnnecessary
linters <- linters[!grepl("^(extraction_operator|implicit_integer|line_length|object_name|nonportable_path|is)_linter$", linters)] linters <- linters[!grepl("^(extraction_operator|implicit_integer|line_length|object_name|nonportable_path|is)_linter$", linters)]
# put the functions in a list # put the functions in a list
linters <- lapply(linters, function(l) eval(parse(text = paste0("lintr::", l, "()")), envir = asNamespace("lintr"))) linters_list <- lapply(linters, function(l) eval(parse(text = paste0("lintr::", l, "()")), envir = asNamespace("lintr")))
names(linters_list) <- linters
# run them all! # run them all!
lintr::lint_package(linters = linters, exclusions = list("R/aa_helper_pm_functions.R")) lintr::lint_package(linters = linters_list, exclusions = list("R/aa_helper_pm_functions.R"))
shell: Rscript {0} shell: Rscript {0}

View File

@ -40,7 +40,7 @@ jobs:
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@v3 uses: actions/checkout@v4
- name: Set up Python - name: Set up Python
uses: actions/setup-python@v4 uses: actions/setup-python@v4

View File

@ -43,7 +43,7 @@ jobs:
continue-on-error: true continue-on-error: true
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
# this is to keep timestamps, the default fetch-depth: 1 gets the timestamps of the moment of cloning # this is to keep timestamps, the default fetch-depth: 1 gets the timestamps of the moment of cloning
# we need this for the download page on our website - dates must be of the files, not of the latest git push # we need this for the download page on our website - dates must be of the files, not of the latest git push

View File

@ -1,5 +1,5 @@
Package: AMR Package: AMR
Version: 2.1.1.9158 Version: 2.1.1.9159
Date: 2025-02-26 Date: 2025-02-26
Title: Antimicrobial Resistance Data Analysis Title: Antimicrobial Resistance Data Analysis
Description: Functions to simplify and standardise antimicrobial resistance (AMR) Description: Functions to simplify and standardise antimicrobial resistance (AMR)

View File

@ -1,4 +1,4 @@
# AMR 2.1.1.9158 # AMR 2.1.1.9159
*(this beta version will eventually become v3.0. We're happy to reach a new major milestone soon, which will be all about the new One Health support! Install this beta using [the instructions here](https://msberends.github.io/AMR/#latest-development-version).)* *(this beta version will eventually become v3.0. We're happy to reach a new major milestone soon, which will be all about the new One Health support! Install this beta using [the instructions here](https://msberends.github.io/AMR/#latest-development-version).)*

View File

@ -1,6 +1,6 @@
Metadata-Version: 2.2 Metadata-Version: 2.2
Name: AMR Name: AMR
Version: 2.1.1.9158 Version: 2.1.1.9159
Summary: A Python wrapper for the AMR R package Summary: A Python wrapper for the AMR R package
Home-page: https://github.com/msberends/AMR Home-page: https://github.com/msberends/AMR
Author: Matthijs Berends Author: Matthijs Berends

Binary file not shown.

Binary file not shown.

View File

@ -2,7 +2,7 @@ from setuptools import setup, find_packages
setup( setup(
name='AMR', name='AMR',
version='2.1.1.9158', version='2.1.1.9159',
packages=find_packages(), packages=find_packages(),
install_requires=[ install_requires=[
'rpy2', 'rpy2',

348
R/ab.R
View File

@ -32,6 +32,7 @@
#' Use this function to determine the antibiotic drug code of one or more antibiotics. The data set [antibiotics] will be searched for abbreviations, official names and synonyms (brand names). #' Use this function to determine the antibiotic drug code of one or more antibiotics. The data set [antibiotics] will be searched for abbreviations, official names and synonyms (brand names).
#' @param x a [character] vector to determine to antibiotic ID #' @param x a [character] vector to determine to antibiotic ID
#' @param flag_multiple_results a [logical] to indicate whether a note should be printed to the console that probably more than one antibiotic drug code or name can be retrieved from a single input value. #' @param flag_multiple_results a [logical] to indicate whether a note should be printed to the console that probably more than one antibiotic drug code or name can be retrieved from a single input value.
#' @param language language to coerce input values from any of the `r length(LANGUAGES_SUPPORTED)` supported languages - default to the system language if supported (see [get_AMR_locale()])
#' @param info a [logical] to indicate whether a progress bar should be printed - the default is `TRUE` only in interactive mode #' @param info a [logical] to indicate whether a progress bar should be printed - the default is `TRUE` only in interactive mode
#' @param ... arguments passed on to internal functions #' @param ... arguments passed on to internal functions
#' @rdname as.ab #' @rdname as.ab
@ -67,7 +68,6 @@
#' as.ab("J 01 FA 01") #' as.ab("J 01 FA 01")
#' as.ab("Erythromycin") #' as.ab("Erythromycin")
#' as.ab("eryt") #' as.ab("eryt")
#' as.ab(" eryt 123")
#' as.ab("ERYT") #' as.ab("ERYT")
#' as.ab("ERY") #' as.ab("ERY")
#' as.ab("eritromicine") # spelled wrong, yet works #' as.ab("eritromicine") # spelled wrong, yet works
@ -92,29 +92,30 @@
#' set_ab_names(where(is.sir), property = "atc") #' set_ab_names(where(is.sir), property = "atc")
#' } #' }
#' } #' }
as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { as.ab <- function(x, flag_multiple_results = TRUE, language = get_AMR_locale(), info = interactive(), ...) {
meet_criteria(x, allow_class = c("character", "numeric", "integer", "factor"), allow_NA = TRUE) meet_criteria(x, allow_class = c("character", "numeric", "integer", "factor"), allow_NA = TRUE)
meet_criteria(flag_multiple_results, allow_class = "logical", has_length = 1) meet_criteria(flag_multiple_results, allow_class = "logical", has_length = 1)
language <- validate_language(language)
meet_criteria(info, allow_class = "logical", has_length = 1) meet_criteria(info, allow_class = "logical", has_length = 1)
if (is.ab(x) || all(x %in% c(AMR_env$AB_lookup$ab, NA))) { if (is.ab(x) || all(x %in% c(AMR_env$AB_lookup$ab, NA))) {
# all valid AB codes, but not yet right class or might have additional attributes as AMR selector # all valid AB codes, but not yet right class or might have additional attributes as AMR selector
attributes(x) <- NULL attributes(x) <- NULL
return(set_clean_class(x, return(set_clean_class(x,
new_class = c("ab", "character") new_class = c("ab", "character")
)) ))
} }
loop_time <- list(...)$loop_time loop_time <- list(...)$loop_time
if (is.null(loop_time)) { if (is.null(loop_time)) {
loop_time <- 1 loop_time <- 1
} }
already_regex <- isTRUE(list(...)$already_regex) already_regex <- isTRUE(list(...)$already_regex)
fast_mode <- isTRUE(list(...)$fast_mode) fast_mode <- isTRUE(list(...)$fast_mode)
x_bak <- x x_bak <- x
x <- toupper(x) x <- toupper(x)
# remove diacritics # remove diacritics
x <- iconv(x, from = "UTF-8", to = "ASCII//TRANSLIT") x <- iconv(x, from = "UTF-8", to = "ASCII//TRANSLIT")
x <- gsub('"', "", x, fixed = TRUE) x <- gsub('"', "", x, fixed = TRUE)
@ -125,7 +126,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
if (already_regex == FALSE) { if (already_regex == FALSE) {
x_bak_clean <- generalise_antibiotic_name(x_bak_clean) x_bak_clean <- generalise_antibiotic_name(x_bak_clean)
} }
x <- unique(x_bak_clean) # this means that every x is in fact generalise_antibiotic_name(x) x <- unique(x_bak_clean) # this means that every x is in fact generalise_antibiotic_name(x)
x_new <- rep(NA_character_, length(x)) x_new <- rep(NA_character_, length(x))
x_uncertain <- character(0) x_uncertain <- character(0)
@ -152,17 +153,17 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
} }
found[1L] found[1L]
} }
# Fill in names, AB codes, CID codes and ATC codes directly (`x` is already clean and uppercase) # Fill in names, AB codes, CID codes and ATC codes directly (`x` is already clean and uppercase)
known_names <- x %in% AMR_env$AB_lookup$generalised_name known_names <- x %in% AMR_env$AB_lookup$generalised_name
x_new[known_names] <- AMR_env$AB_lookup$ab[match(x[known_names], AMR_env$AB_lookup$generalised_name)] x_new[known_names] <- AMR_env$AB_lookup$ab[match(x[known_names], AMR_env$AB_lookup$generalised_name)]
known_codes_ab <- x %in% AMR_env$AB_lookup$ab known_codes_ab <- x %in% AMR_env$AB_lookup$ab
known_codes_atc <- vapply(FUN.VALUE = logical(1), x, function(x_) x_ %in% unlist(AMR_env$AB_lookup$atc), USE.NAMES = FALSE) known_codes_atc <- vapply(FUN.VALUE = logical(1), gsub(" ", "", x), function(x_) x_ %in% unlist(AMR_env$AB_lookup$atc), USE.NAMES = FALSE)
known_codes_cid <- x %in% AMR_env$AB_lookup$cid known_codes_cid <- x %in% AMR_env$AB_lookup$cid
x_new[known_codes_ab] <- AMR_env$AB_lookup$ab[match(x[known_codes_ab], AMR_env$AB_lookup$ab)] x_new[known_codes_ab] <- AMR_env$AB_lookup$ab[match(x[known_codes_ab], AMR_env$AB_lookup$ab)]
x_new[known_codes_atc] <- AMR_env$AB_lookup$ab[vapply( x_new[known_codes_atc] <- AMR_env$AB_lookup$ab[vapply(
FUN.VALUE = integer(1), FUN.VALUE = integer(1),
x[known_codes_atc], gsub(" ", "", x[known_codes_atc]),
function(x_) { function(x_) {
which(vapply( which(vapply(
FUN.VALUE = logical(1), FUN.VALUE = logical(1),
@ -182,29 +183,29 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
" for ", vector_and(prev), ". Run `ab_reset_session()` to reset this. This note will be shown once per session for this input." " for ", vector_and(prev), ". Run `ab_reset_session()` to reset this. This note will be shown once per session for this input."
) )
} }
already_known <- known_names | known_codes_ab | known_codes_atc | known_codes_cid | previously_coerced already_known <- known_names | known_codes_ab | known_codes_atc | known_codes_cid | previously_coerced
# fix for NAs # fix for NAs
x_new[is.na(x)] <- NA x_new[is.na(x)] <- NA
already_known[is.na(x)] <- FALSE already_known[is.na(x)] <- FALSE
if (loop_time == 1 && sum(already_known) < length(x)) { if (loop_time == 1 && sum(already_known) < length(x)) {
progress <- progress_ticker(n = sum(!already_known), n_min = 25, print = info) # start if n >= 25 progress <- progress_ticker(n = sum(!already_known), n_min = 25, print = info) # start if n >= 25
on.exit(close(progress)) on.exit(close(progress))
} }
for (i in which(!already_known)) { for (i in which(!already_known)) {
if (loop_time == 1) { if (loop_time == 1) {
progress$tick() progress$tick()
} }
if (is.na(x[i]) || is.null(x[i])) { if (is.na(x[i]) || is.null(x[i])) {
next next
} }
if (identical(x[i], "") || if (identical(x[i], "") ||
# prevent "bacteria" from coercing to TMP, since Bacterial is a brand name of it: # prevent "bacteria" from coercing to TMP, since Bacterial is a brand name of it:
identical(tolower(x[i]), "bacteria")) { identical(tolower(x[i]), "bacteria")) {
x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1]) x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1])
next next
} }
@ -215,21 +216,21 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
x_new[i] <- NA_character_ x_new[i] <- NA_character_
next next
} }
if (fast_mode == FALSE && flag_multiple_results == TRUE && x[i] %like% "[ ]") { if (fast_mode == FALSE && flag_multiple_results == TRUE && x[i] %like% "[ ]") {
from_text <- tryCatch(suppressWarnings(ab_from_text(x[i], loop_time = loop_time + 1, translate_ab = FALSE)[[1]]), from_text <- tryCatch(suppressWarnings(ab_from_text(x[i], loop_time = loop_time + 1, translate_ab = FALSE)[[1]]),
error = function(e) character(0) error = function(e) character(0)
) )
} else { } else {
from_text <- character(0) from_text <- character(0)
} }
# old code for phenoxymethylpenicillin (Peni V) # old code for phenoxymethylpenicillin (Peni V)
if (x[i] == "PNV") { if (x[i] == "PNV") {
x_new[i] <- "PHN" x_new[i] <- "PHN"
next next
} }
# exact LOINC code # exact LOINC code
loinc_found <- unlist(lapply( loinc_found <- unlist(lapply(
AMR_env$AB_lookup$generalised_loinc, AMR_env$AB_lookup$generalised_loinc,
@ -240,7 +241,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text) x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next next
} }
# exact synonym # exact synonym
synonym_found <- unlist(lapply( synonym_found <- unlist(lapply(
AMR_env$AB_lookup$generalised_synonyms, AMR_env$AB_lookup$generalised_synonyms,
@ -251,7 +252,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text) x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next next
} }
# exact abbreviation # exact abbreviation
abbr_found <- unlist(lapply( abbr_found <- unlist(lapply(
AMR_env$AB_lookup$generalised_abbreviations, AMR_env$AB_lookup$generalised_abbreviations,
@ -263,7 +264,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text) x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next next
} }
# length of input is quite long, and Levenshtein distance is only max 2 # length of input is quite long, and Levenshtein distance is only max 2
if (nchar(x[i]) >= 10) { if (nchar(x[i]) >= 10) {
levenshtein <- as.double(utils::adist(x[i], AMR_env$AB_lookup$generalised_name)) levenshtein <- as.double(utils::adist(x[i], AMR_env$AB_lookup$generalised_name))
@ -273,7 +274,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
next next
} }
} }
# allow characters that resemble others, but only continue when having more than 3 characters # allow characters that resemble others, but only continue when having more than 3 characters
if (nchar(x[i]) <= 3) { if (nchar(x[i]) <= 3) {
x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1]) x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1])
@ -303,20 +304,22 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
x_spelling <- gsub("(O|0)", "(O|0)+", x_spelling, perl = TRUE) x_spelling <- gsub("(O|0)", "(O|0)+", x_spelling, perl = TRUE)
x_spelling <- gsub("++", "+", x_spelling, fixed = TRUE) x_spelling <- gsub("++", "+", x_spelling, fixed = TRUE)
} }
# try if name starts with it # try if name starts with it
found <- AMR_env$AB_lookup[which(AMR_env$AB_lookup$generalised_name %like% paste0("^", x_spelling)), "ab", drop = TRUE] found <- AMR_env$AB_lookup[which(AMR_env$AB_lookup$generalised_name %like% paste0("^", x_spelling)), "ab", drop = TRUE]
if (length(found) > 0) { if (length(found) > 0) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text) x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next next
} }
# try if name ends with it # try if name ends with it
found <- AMR_env$AB_lookup[which(AMR_env$AB_lookup$generalised_name %like% paste0(x_spelling, "$")), "ab", drop = TRUE] found <- AMR_env$AB_lookup[which(AMR_env$AB_lookup$generalised_name %like% paste0(x_spelling, "$")), "ab", drop = TRUE]
if (nchar(x[i]) >= 4 && length(found) > 0) { if (nchar(x[i]) >= 4 && length(found) > 0) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text) x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next next
} }
# and try if any synonym starts with it # and try if any synonym starts with it
synonym_found <- unlist(lapply( synonym_found <- unlist(lapply(
AMR_env$AB_lookup$generalised_synonyms, AMR_env$AB_lookup$generalised_synonyms,
@ -327,244 +330,71 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text) x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next next
} }
# INITIAL SEARCH - More uncertain results ---- # INITIAL SEARCH - More uncertain results ----
if (loop_time <= 2 && fast_mode == FALSE) { if (loop_time == 1 && fast_mode == FALSE) {
# only run on first and second try # only run on first and second try
# base on the Levensthein distance function if length >= 6 ab_df <- AMR_env$AB_lookup
if (nchar(x[i]) >= 6) { ab_df$length_name <- nchar(ab_df$generalised_name)
l_dist <- as.double(utils::adist(x[i], AMR_env$AB_lookup$generalised_name, # now retrieve Levensthein distance for name, synonyms, and translated names
ignore.case = FALSE, ab_df$lev_name <- as.double(utils::adist(x[i], ab_df$generalised_name,
fixed = TRUE, ignore.case = FALSE,
costs = c(insertions = 1, deletions = 2, substitutions = 2), fixed = TRUE,
counts = FALSE)) costs = c(insertions = 1, deletions = 1, substitutions = 2),
x_new[i] <- AMR_env$AB_lookup$ab[order(l_dist)][1] counts = FALSE))
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1]) ab_df$lev_syn <- vapply(FUN.VALUE = double(1),
next ab_df$generalised_synonyms,
} function(y) ifelse(length(y[nchar(y) >= 5]) == 0,
999,
# try by removing all spaces min(as.double(utils::adist(x[i], y[nchar(y) >= 5], ignore.case = FALSE,
if (x[i] %like% " ") { fixed = TRUE,
found <- suppressWarnings(as.ab(gsub(" +", "", x[i], perl = TRUE), loop_time = loop_time + 2)) costs = c(insertions = 1, deletions = 1, substitutions = 2),
if (length(found) > 0 && !is.na(found)) { counts = FALSE)), na.rm = TRUE)),
x_new[i] <- note_if_more_than_one_found(found, i, from_text) USE.NAMES = FALSE)
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1]) if (!is.null(language) && language != "en") {
next ab_df$trans <- generalise_antibiotic_name(translate_AMR(ab_df$name, language = language))
} ab_df$lev_trans <- as.double(utils::adist(x[i], ab_df$trans,
} ignore.case = FALSE,
fixed = TRUE,
# try by removing all spaces and numbers costs = c(insertions = 1, deletions = 1, substitutions = 2),
if (x[i] %like% " " || x[i] %like% "[0-9]") { counts = FALSE))
found <- suppressWarnings(as.ab(gsub("[ 0-9]", "", x[i], perl = TRUE), loop_time = loop_time + 2))
if (length(found) > 0 && !is.na(found)) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next
}
}
# reverse a combination, e.g. clavulanic acid/amoxicillin
if (x[i] %like% " ") {
split <- strsplit(x[i], " ")[[1]]
permute <- function(x) {
if (length(x) == 1) return(list(x))
result <- vector("list", factorial(length(x)))
index <- 1
for (i in seq_along(x)) {
sub_perms <- permute(x[-i]) # Recursively get permutations of remaining elements
for (sub in sub_perms) {
result[[index]] <- c(x[i], sub)
index <- index + 1
}
}
return(result)
}
permutations <- permute(split)
found_perms <- character(length(permutations))
for (s in seq_len(length(permutations))) {
concat <- paste0(permutations[[s]], collapse = " ")
if (concat %in% AMR_env$AB_lookup$generalised_name) {
found_perms[s] <- AMR_env$AB_lookup[which(AMR_env$AB_lookup$generalised_name == concat), "ab", drop = TRUE]
} else {
found_perms[s] <- suppressWarnings(as.ab(concat, loop_time = loop_time + 2))
}
}
found_perms <- found_perms[!is.na(found_perms)]
if (length(found_perms) > 0) {
found <- found_perms[order(nchar(found_perms), decreasing = TRUE)][1]
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next
}
}
# transform back from other languages and try again
x_translated <- paste(
lapply(
strsplit(x[i], "[^A-Z0-9]"),
function(y) {
for (i in seq_len(length(y))) {
for (lang in LANGUAGES_SUPPORTED[LANGUAGES_SUPPORTED != "en"]) {
y[i] <- ifelse(tolower(y[i]) %in% tolower(TRANSLATIONS[, lang, drop = TRUE]),
TRANSLATIONS[which(tolower(TRANSLATIONS[, lang, drop = TRUE]) == tolower(y[i]) &
!isFALSE(TRANSLATIONS$fixed)), "pattern"],
y[i]
)
}
}
generalise_antibiotic_name(y)
}
)[[1]],
collapse = "/"
)
x_translated_guess <- suppressWarnings(as.ab(x_translated, loop_time = loop_time + 2))
if (!is.na(x_translated_guess)) {
x_new[i] <- x_translated_guess
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next
}
# now also try to coerce brandname combinations like "Amoxy/clavulanic acid"
x_translated <- paste(
lapply(
strsplit(x_translated, "[^A-Z0-9 ]"),
function(y) {
for (i in seq_len(length(y))) {
y_name <- suppressWarnings(ab_name(y[i], language = NULL, loop_time = loop_time + 2))
y[i] <- ifelse(!is.na(y_name),
y_name,
y[i]
)
}
generalise_antibiotic_name(y)
}
)[[1]],
collapse = "/"
)
x_translated_guess <- suppressWarnings(as.ab(x_translated, loop_time = loop_time + 2))
if (!is.na(x_translated_guess)) {
x_new[i] <- x_translated_guess
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next
}
# try by removing all trailing capitals
if (x[i] %like_case% "[a-z]+[A-Z]+$") {
found <- suppressWarnings(as.ab(gsub("[A-Z]+$", "", x[i], perl = TRUE), loop_time = loop_time + 2))
if (!is.na(found)) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next
}
}
# keep only letters
found <- suppressWarnings(as.ab(gsub("[^A-Z]", "", x[i], perl = TRUE), loop_time = loop_time + 2))
if (!is.na(found)) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next
}
# try from a bigger text, like from a health care record, see ?ab_from_text
# already calculated above if flag_multiple_results = TRUE
if (flag_multiple_results == TRUE) {
found <- from_text[1L]
} else { } else {
found <- tryCatch(suppressWarnings(ab_from_text(x[i], loop_time = loop_time + 2, translate_ab = FALSE)[[1]][1L]), ab_df$lev_trans <- ab_df$lev_name
error = function(e) NA_character_
)
} }
if (!is.na(found)) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text) if (any(ab_df$lev_name < 5, na.rm = TRUE)) {
x_new[i] <- ab_df$ab[order(ab_df$lev_name)][1]
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1]) x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next next
} } else if (any(ab_df$lev_trans < 5, na.rm = TRUE)) {
x_new[i] <- ab_df$ab[order(ab_df$lev_trans)][1]
# first 5 except for cephalosporins, then first 7 (those cephalosporins all start quite the same!)
found <- suppressWarnings(as.ab(substr(x[i], 1, 5), loop_time = loop_time + 2))
if (!is.na(found) && ab_group(found, loop_time = loop_time + 1) %unlike% "cephalosporins") {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
found <- suppressWarnings(as.ab(substr(x[i], 1, 7), loop_time = loop_time + 2))
if (!is.na(found)) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1]) x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next next
} } else if (any(ab_df$lev_syn < 5, na.rm = TRUE)) {
x_new[i] <- ab_df$ab[order(ab_df$lev_syn)][1]
# make all consonants facultative
search_str <- gsub("([BCDFGHJKLMNPQRSTVWXZ])", "\\1*", x[i], perl = TRUE)
found <- suppressWarnings(as.ab(search_str, loop_time = loop_time + 2, already_regex = TRUE))
# keep at least 4 normal characters
if (nchar(gsub(".\\*", "", search_str, perl = TRUE)) < 4) {
found <- NA
}
if (!is.na(found)) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1]) x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next next
} } else {
# then just take name if Levensthein is max 100% of length of name
# make all vowels facultative ab_df$lev_len_ratio <- ab_df$lev_name / ab_df$length_name
search_str <- gsub("([AEIOUY])", "\\1*", x[i], perl = TRUE) if (any(ab_df$lev_len_ratio < 1)) {
found <- suppressWarnings(as.ab(search_str, loop_time = loop_time + 2, already_regex = TRUE)) ab_df <- ab_df[ab_df$lev_len_ratio < 1, , drop = FALSE]
# keep at least 5 normal characters x_new[i] <- ab_df$ab[order(ab_df$lev_name)][1]
if (nchar(gsub(".\\*", "", search_str, perl = TRUE)) < 5) { x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
found <- NA next
}
if (!is.na(found)) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next
}
# allow misspelling of vowels
x_spelling <- gsub("A+", "[AEIOU]+", x_spelling, fixed = TRUE)
x_spelling <- gsub("E+", "[AEIOU]+", x_spelling, fixed = TRUE)
x_spelling <- gsub("I+", "[AEIOU]+", x_spelling, fixed = TRUE)
x_spelling <- gsub("O+", "[AEIOU]+", x_spelling, fixed = TRUE)
x_spelling <- gsub("U+", "[AEIOU]+", x_spelling, fixed = TRUE)
found <- suppressWarnings(as.ab(x_spelling, loop_time = loop_time + 2, already_regex = TRUE))
if (!is.na(found)) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next
}
# try with switched character, like "mreopenem"
for (j in seq_len(nchar(x[i]))) {
x_switched <- paste0(
# beginning part:
substr(x[i], 1, j - 1),
# here is the switching of 2 characters:
substr(x[i], j + 1, j + 1),
substr(x[i], j, j),
# ending part:
substr(x[i], j + 2, nchar(x[i]))
)
found <- suppressWarnings(as.ab(x_switched, loop_time = loop_time + 1))
if (!is.na(found)) {
break
} }
} }
if (!is.na(found)) { }
x_new[i] <- found[1L]
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1]) # nothing found
next
}
} # end of loop_time <= 2
# not found
x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1]) x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1])
} }
if (loop_time == 1 && sum(already_known) < length(x)) { if (loop_time == 1 && sum(already_known) < length(x)) {
close(progress) close(progress)
} }
# save to package env to save time for next time # save to package env to save time for next time
if (loop_time == 1) { if (loop_time == 1) {
AMR_env$ab_previously_coerced <- AMR_env$ab_previously_coerced[which(!AMR_env$ab_previously_coerced$x %in% x), , drop = FALSE] AMR_env$ab_previously_coerced <- AMR_env$ab_previously_coerced[which(!AMR_env$ab_previously_coerced$x %in% x), , drop = FALSE]
@ -578,7 +408,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
) )
)) ))
} }
# take failed ATC codes apart from rest # take failed ATC codes apart from rest
if (length(x_unknown_ATCs) > 0 && fast_mode == FALSE) { if (length(x_unknown_ATCs) > 0 && fast_mode == FALSE) {
warning_( warning_(
@ -619,14 +449,14 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
". If required, use `add_custom_antimicrobials()` to add custom entries.") ". If required, use `add_custom_antimicrobials()` to add custom entries.")
} }
} }
x_result <- x_new[match(x_bak_clean, x)] x_result <- x_new[match(x_bak_clean, x)]
if (length(x_result) == 0) { if (length(x_result) == 0) {
x_result <- NA_character_ x_result <- NA_character_
} }
set_clean_class(x_result, set_clean_class(x_result,
new_class = c("ab", "character") new_class = c("ab", "character")
) )
} }
@ -767,16 +597,20 @@ generalise_antibiotic_name <- function(x) {
x <- gsub("[^A-Z0-9 -)(]", "/", x, perl = TRUE) x <- gsub("[^A-Z0-9 -)(]", "/", x, perl = TRUE)
# correct for 'high level' antibiotics # correct for 'high level' antibiotics
x <- trimws(gsub("([^A-Z0-9/ -]+)?(HIGH(.?LE?VE?L)?|[^A-Z0-9/]H[^A-Z0-9]?L)([^A-Z0-9 -]+)?", "-HIGH", x, perl = TRUE)) x <- trimws(gsub("([^A-Z0-9/ -]+)?(HIGH(.?LE?VE?L)?|[^A-Z0-9/]H[^A-Z0-9]?L)([^A-Z0-9 -]+)?", "-HIGH", x, perl = TRUE))
x <- trimws(gsub("^(-HIGH)(.*)", "\\2\\1", x)) x <- trimws(gsub("^(-HIGH)(.*)", "\\2\\1", x, perl = TRUE))
# remove part between brackets if that's followed by another string # remove part between brackets if that's followed by another string
x <- gsub("(.*)+ [(].*[)]", "\\1", x) x <- gsub("(.*)+ [(].*[)]", "\\1", x)
# spaces around non-characters must be removed: amox + clav -> amox/clav # spaces around non-characters must be removed: amox + clav -> amox clav
x <- gsub("(.*[A-Z0-9]) ([^A-Z0-9].*)", "\\1\\2", x, perl = TRUE) x <- gsub("(.*[A-Z0-9]) ([^A-Z0-9].*)", "\\1\\2", x, perl = TRUE)
x <- gsub("(.*[^A-Z0-9]) ([A-Z0-9].*)", "\\1\\2", x, perl = TRUE) x <- gsub("(.*[^A-Z0-9]) ([A-Z0-9].*)", "\\1\\2", x, perl = TRUE)
# remove hyphen after a starting "co" # remove hyphen after a starting "co"
x <- gsub("^CO-", "CO", x, perl = TRUE) x <- gsub("^CO-", "CO", x, perl = TRUE)
# replace operators with a space # replace operators with a space
x <- gsub("(/| AND | WITH | W/|[+]|[-])+", " ", x, perl = TRUE) x <- gsub("(/| AND | WITH | W/|[+]|[-])+", " ", x, perl = TRUE)
# replace more than 1 space
x <- trimws(gsub(" +", " ", x, perl = TRUE))
# move HIGH to end
x <- trimws(gsub("(.*) HIGH(.*)", "\\1\\2 HIGH", x, perl = TRUE))
x x
} }
@ -789,9 +623,9 @@ get_translate_ab <- function(translate_ab) {
} else { } else {
translate_ab <- tolower(translate_ab) translate_ab <- tolower(translate_ab)
stop_ifnot(translate_ab %in% colnames(AMR::antibiotics), stop_ifnot(translate_ab %in% colnames(AMR::antibiotics),
"invalid value for 'translate_ab', this must be a column name of the antibiotics data set\n", "invalid value for 'translate_ab', this must be a column name of the antibiotics data set\n",
"or TRUE (equals 'name') or FALSE to not translate at all.", "or TRUE (equals 'name') or FALSE to not translate at all.",
call = FALSE call = FALSE
) )
translate_ab translate_ab
} }

View File

@ -1,6 +1,6 @@
This knowledge base contains all context you must know about the AMR package for R. You are a GPT trained to be an assistant for the AMR package in R. You are an incredible R specialist, especially trained in this package and in the tidyverse. This knowledge base contains all context you must know about the AMR package for R. You are a GPT trained to be an assistant for the AMR package in R. You are an incredible R specialist, especially trained in this package and in the tidyverse.
First and foremost, you are trained on version 2.1.1.9158. Remember this whenever someone asks which AMR package version youre at. First and foremost, you are trained on version 2.1.1.9159. Remember this whenever someone asks which AMR package version youre at.
Below are the contents of the file, the file, and all the files (documentation) in the package. Every file content is split using 100 hypens. Below are the contents of the file, the file, and all the files (documentation) in the package. Every file content is split using 100 hypens.
---------------------------------------------------------------------------------------------------- ----------------------------------------------------------------------------------------------------
@ -2577,7 +2577,8 @@ THE PART HEREAFTER CONTAINS CONTENTS FROM FILE 'man/as.ab.Rd':
\alias{ab_reset_session} \alias{ab_reset_session}
\title{Transform Input to an Antibiotic ID} \title{Transform Input to an Antibiotic ID}
\usage{ \usage{
as.ab(x, flag_multiple_results = TRUE, info = interactive(), ...) as.ab(x, flag_multiple_results = TRUE, language = get_AMR_locale(),
info = interactive(), ...)
is.ab(x) is.ab(x)
@ -2588,6 +2589,8 @@ ab_reset_session()
\item{flag_multiple_results}{a \link{logical} to indicate whether a note should be printed to the console that probably more than one antibiotic drug code or name can be retrieved from a single input value.} \item{flag_multiple_results}{a \link{logical} to indicate whether a note should be printed to the console that probably more than one antibiotic drug code or name can be retrieved from a single input value.}
\item{language}{language to coerce input values from any of the 20 supported languages - default to the system language if supported (see \code{\link[=get_AMR_locale]{get_AMR_locale()}})}
\item{info}{a \link{logical} to indicate whether a progress bar should be printed - the default is \code{TRUE} only in interactive mode} \item{info}{a \link{logical} to indicate whether a progress bar should be printed - the default is \code{TRUE} only in interactive mode}
\item{...}{arguments passed on to internal functions} \item{...}{arguments passed on to internal functions}
@ -2644,7 +2647,6 @@ as.ab("J01FA01")
as.ab("J 01 FA 01") as.ab("J 01 FA 01")
as.ab("Erythromycin") as.ab("Erythromycin")
as.ab("eryt") as.ab("eryt")
as.ab(" eryt 123")
as.ab("ERYT") as.ab("ERYT")
as.ab("ERY") as.ab("ERY")
as.ab("eritromicine") # spelled wrong, yet works as.ab("eritromicine") # spelled wrong, yet works

View File

@ -7,7 +7,8 @@
\alias{ab_reset_session} \alias{ab_reset_session}
\title{Transform Input to an Antibiotic ID} \title{Transform Input to an Antibiotic ID}
\usage{ \usage{
as.ab(x, flag_multiple_results = TRUE, info = interactive(), ...) as.ab(x, flag_multiple_results = TRUE, language = get_AMR_locale(),
info = interactive(), ...)
is.ab(x) is.ab(x)
@ -18,6 +19,8 @@ ab_reset_session()
\item{flag_multiple_results}{a \link{logical} to indicate whether a note should be printed to the console that probably more than one antibiotic drug code or name can be retrieved from a single input value.} \item{flag_multiple_results}{a \link{logical} to indicate whether a note should be printed to the console that probably more than one antibiotic drug code or name can be retrieved from a single input value.}
\item{language}{language to coerce input values from any of the 20 supported languages - default to the system language if supported (see \code{\link[=get_AMR_locale]{get_AMR_locale()}})}
\item{info}{a \link{logical} to indicate whether a progress bar should be printed - the default is \code{TRUE} only in interactive mode} \item{info}{a \link{logical} to indicate whether a progress bar should be printed - the default is \code{TRUE} only in interactive mode}
\item{...}{arguments passed on to internal functions} \item{...}{arguments passed on to internal functions}
@ -74,7 +77,6 @@ as.ab("J01FA01")
as.ab("J 01 FA 01") as.ab("J 01 FA 01")
as.ab("Erythromycin") as.ab("Erythromycin")
as.ab("eryt") as.ab("eryt")
as.ab(" eryt 123")
as.ab("ERYT") as.ab("ERYT")
as.ab("ERY") as.ab("ERY")
as.ab("eritromicine") # spelled wrong, yet works as.ab("eritromicine") # spelled wrong, yet works

View File

@ -27,20 +27,21 @@
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ # # how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
# ==================================================================== # # ==================================================================== #
ab_reset_session()
expect_equal( expect_equal(
as.character(as.ab(c( as.character(as.ab(c(
"J01FA01", "J01FA01",
"J 01 FA 01", "J 01 FA 01",
"Erythromycin", "Erythromycin",
"eryt", "eryt",
" eryt 123",
"ERYT", "ERYT",
"ERY", "ERY",
"erytromicine", "erytromicine",
"Erythrocin", "Erythrocin",
"Romycin" "Romycin"
))), ))),
rep("ERY", 10) rep("ERY", 9)
) )
expect_identical(class(as.ab("amox")), c("ab", "character")) expect_identical(class(as.ab("amox")), c("ab", "character"))
@ -49,7 +50,7 @@ expect_true(is.ab(as.ab("amox")))
expect_stdout(print(as.ab("amox"))) expect_stdout(print(as.ab("amox")))
expect_stdout(print(data.frame(a = as.ab("amox")))) expect_stdout(print(data.frame(a = as.ab("amox"))))
# expect_warning(as.ab("J00AA00")) # ATC not yet available in data set expect_warning(as.ab("J00AA00")) # ATC not yet available in data set
# expect_warning(as.ab("UNKNOWN")) # expect_warning(as.ab("UNKNOWN"))
expect_stdout(print(as.ab("amox"))) expect_stdout(print(as.ab("amox")))

View File

@ -27,20 +27,22 @@
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ # # how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
# ==================================================================== # # ==================================================================== #
ab_reset_session()
expect_identical( expect_identical(
ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds")[[1]], ab_from_text("28/03/2020 amoxicilliin 500mg po tds")[[1]],
as.ab("Amoxicillin") as.ab("Amoxicillin")
) )
expect_identical( expect_identical(
ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", thorough_search = TRUE)[[1]], ab_from_text("28/03/2020 amoxicilliin 500mg po tds", thorough_search = TRUE)[[1]],
as.ab("Amoxicillin") as.ab("Amoxicillin")
) )
expect_identical( expect_identical(
ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", thorough_search = FALSE)[[1]], ab_from_text("28/03/2020 amoxicilliin 500mg po tds", thorough_search = FALSE)[[1]],
as.ab("Amoxicillin") as.ab("Amoxicillin")
) )
expect_identical( expect_identical(
ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", translate_ab = TRUE)[[1]], ab_from_text("28/03/2020 amoxicilliin 500mg po tds", translate_ab = TRUE)[[1]],
"Amoxicillin" "Amoxicillin"
) )
expect_identical( expect_identical(
@ -49,10 +51,10 @@ expect_identical(
) )
expect_identical( expect_identical(
ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", type = "dose")[[1]], ab_from_text("28/03/2020 amoxicilliin 500mg po tds", type = "dose")[[1]],
500 500
) )
expect_identical( expect_identical(
ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", type = "admin")[[1]], ab_from_text("28/03/2020 amoxicilliin 500mg po tds", type = "admin")[[1]],
"oral" "oral"
) )

View File

@ -27,6 +27,8 @@
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ # # how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
# ==================================================================== # # ==================================================================== #
ab_reset_session()
expect_identical(ab_name("AMX", language = NULL), "Amoxicillin") expect_identical(ab_name("AMX", language = NULL), "Amoxicillin")
expect_identical(ab_atc("AMX"), "J01CA04") expect_identical(ab_atc("AMX"), "J01CA04")
expect_identical(ab_cid("AMX"), as.integer(33613)) expect_identical(ab_cid("AMX"), as.integer(33613))
@ -94,3 +96,4 @@ if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0", also_load = TRUE)) {
set_ab_names(NIT:VAN) %>% set_ab_names(NIT:VAN) %>%
colnames()))) colnames())))
} }