1
0
mirror of https://github.com/msberends/AMR.git synced 2025-04-17 18:30:33 +02:00

(v2.1.1.9159) new approach as.ab()

This commit is contained in:
dr. M.S. (Matthijs) Berends 2025-02-26 19:23:54 +01:00
parent 122bca0f95
commit 0c3ea4b538
No known key found for this signature in database
18 changed files with 130 additions and 284 deletions

View File

@ -61,7 +61,7 @@ jobs:
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: r-lib/actions/setup-r@v2
with:

View File

@ -42,7 +42,7 @@ jobs:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
CODECOV_TOKEN: ${{secrets.CODECOV_TOKEN}}
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: r-lib/actions/setup-pandoc@v2

View File

@ -41,7 +41,7 @@ jobs:
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: r-lib/actions/setup-pandoc@v2
@ -59,13 +59,15 @@ jobs:
run: |
# old: lintr::lint_package(linters = lintr::with_defaults(line_length_linter = NULL, trailing_whitespace_linter = NULL, object_name_linter = NULL, cyclocomp_linter = NULL, object_length_linter = lintr::object_length_linter(length = 50L)), exclusions = list("R/aa_helper_pm_functions.R"))
# now get ALL linters, not just default ones
linters <- ls(envir = asNamespace("lintr"), pattern = "_linter$")
linters <- getNamespaceExports(asNamespace("lintr"))
linters <- sort(linters[grepl("_linter$", linters)])
# lose deprecated
linters <- linters[!grepl("^(closed_curly|open_curly|paren_brace|semicolon_terminator)_linter$", linters)]
linters <- linters[!grepl("^(closed_curly|open_curly|paren_brace|semicolon_terminator|consecutive_stopifnot|no_tab|single_quotes|unnecessary_nested_if|unneeded_concatenation)_linter$", linters)]
# and the ones we find unnnecessary
linters <- linters[!grepl("^(extraction_operator|implicit_integer|line_length|object_name|nonportable_path|is)_linter$", linters)]
# put the functions in a list
linters <- lapply(linters, function(l) eval(parse(text = paste0("lintr::", l, "()")), envir = asNamespace("lintr")))
linters_list <- lapply(linters, function(l) eval(parse(text = paste0("lintr::", l, "()")), envir = asNamespace("lintr")))
names(linters_list) <- linters
# run them all!
lintr::lint_package(linters = linters, exclusions = list("R/aa_helper_pm_functions.R"))
lintr::lint_package(linters = linters_list, exclusions = list("R/aa_helper_pm_functions.R"))
shell: Rscript {0}

View File

@ -40,7 +40,7 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4

View File

@ -43,7 +43,7 @@ jobs:
continue-on-error: true
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
# this is to keep timestamps, the default fetch-depth: 1 gets the timestamps of the moment of cloning
# we need this for the download page on our website - dates must be of the files, not of the latest git push

View File

@ -1,5 +1,5 @@
Package: AMR
Version: 2.1.1.9158
Version: 2.1.1.9159
Date: 2025-02-26
Title: Antimicrobial Resistance Data Analysis
Description: Functions to simplify and standardise antimicrobial resistance (AMR)

View File

@ -1,4 +1,4 @@
# AMR 2.1.1.9158
# AMR 2.1.1.9159
*(this beta version will eventually become v3.0. We're happy to reach a new major milestone soon, which will be all about the new One Health support! Install this beta using [the instructions here](https://msberends.github.io/AMR/#latest-development-version).)*

View File

@ -1,6 +1,6 @@
Metadata-Version: 2.2
Name: AMR
Version: 2.1.1.9158
Version: 2.1.1.9159
Summary: A Python wrapper for the AMR R package
Home-page: https://github.com/msberends/AMR
Author: Matthijs Berends

Binary file not shown.

Binary file not shown.

View File

@ -2,7 +2,7 @@ from setuptools import setup, find_packages
setup(
name='AMR',
version='2.1.1.9158',
version='2.1.1.9159',
packages=find_packages(),
install_requires=[
'rpy2',

348
R/ab.R
View File

@ -32,6 +32,7 @@
#' Use this function to determine the antibiotic drug code of one or more antibiotics. The data set [antibiotics] will be searched for abbreviations, official names and synonyms (brand names).
#' @param x a [character] vector to determine to antibiotic ID
#' @param flag_multiple_results a [logical] to indicate whether a note should be printed to the console that probably more than one antibiotic drug code or name can be retrieved from a single input value.
#' @param language language to coerce input values from any of the `r length(LANGUAGES_SUPPORTED)` supported languages - default to the system language if supported (see [get_AMR_locale()])
#' @param info a [logical] to indicate whether a progress bar should be printed - the default is `TRUE` only in interactive mode
#' @param ... arguments passed on to internal functions
#' @rdname as.ab
@ -67,7 +68,6 @@
#' as.ab("J 01 FA 01")
#' as.ab("Erythromycin")
#' as.ab("eryt")
#' as.ab(" eryt 123")
#' as.ab("ERYT")
#' as.ab("ERY")
#' as.ab("eritromicine") # spelled wrong, yet works
@ -92,29 +92,30 @@
#' set_ab_names(where(is.sir), property = "atc")
#' }
#' }
as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
as.ab <- function(x, flag_multiple_results = TRUE, language = get_AMR_locale(), info = interactive(), ...) {
meet_criteria(x, allow_class = c("character", "numeric", "integer", "factor"), allow_NA = TRUE)
meet_criteria(flag_multiple_results, allow_class = "logical", has_length = 1)
language <- validate_language(language)
meet_criteria(info, allow_class = "logical", has_length = 1)
if (is.ab(x) || all(x %in% c(AMR_env$AB_lookup$ab, NA))) {
# all valid AB codes, but not yet right class or might have additional attributes as AMR selector
attributes(x) <- NULL
return(set_clean_class(x,
new_class = c("ab", "character")
new_class = c("ab", "character")
))
}
loop_time <- list(...)$loop_time
if (is.null(loop_time)) {
loop_time <- 1
}
already_regex <- isTRUE(list(...)$already_regex)
fast_mode <- isTRUE(list(...)$fast_mode)
x_bak <- x
x <- toupper(x)
# remove diacritics
x <- iconv(x, from = "UTF-8", to = "ASCII//TRANSLIT")
x <- gsub('"', "", x, fixed = TRUE)
@ -125,7 +126,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
if (already_regex == FALSE) {
x_bak_clean <- generalise_antibiotic_name(x_bak_clean)
}
x <- unique(x_bak_clean) # this means that every x is in fact generalise_antibiotic_name(x)
x_new <- rep(NA_character_, length(x))
x_uncertain <- character(0)
@ -152,17 +153,17 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
}
found[1L]
}
# Fill in names, AB codes, CID codes and ATC codes directly (`x` is already clean and uppercase)
known_names <- x %in% AMR_env$AB_lookup$generalised_name
x_new[known_names] <- AMR_env$AB_lookup$ab[match(x[known_names], AMR_env$AB_lookup$generalised_name)]
known_codes_ab <- x %in% AMR_env$AB_lookup$ab
known_codes_atc <- vapply(FUN.VALUE = logical(1), x, function(x_) x_ %in% unlist(AMR_env$AB_lookup$atc), USE.NAMES = FALSE)
known_codes_atc <- vapply(FUN.VALUE = logical(1), gsub(" ", "", x), function(x_) x_ %in% unlist(AMR_env$AB_lookup$atc), USE.NAMES = FALSE)
known_codes_cid <- x %in% AMR_env$AB_lookup$cid
x_new[known_codes_ab] <- AMR_env$AB_lookup$ab[match(x[known_codes_ab], AMR_env$AB_lookup$ab)]
x_new[known_codes_atc] <- AMR_env$AB_lookup$ab[vapply(
FUN.VALUE = integer(1),
x[known_codes_atc],
gsub(" ", "", x[known_codes_atc]),
function(x_) {
which(vapply(
FUN.VALUE = logical(1),
@ -182,29 +183,29 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
" for ", vector_and(prev), ". Run `ab_reset_session()` to reset this. This note will be shown once per session for this input."
)
}
already_known <- known_names | known_codes_ab | known_codes_atc | known_codes_cid | previously_coerced
# fix for NAs
x_new[is.na(x)] <- NA
already_known[is.na(x)] <- FALSE
if (loop_time == 1 && sum(already_known) < length(x)) {
progress <- progress_ticker(n = sum(!already_known), n_min = 25, print = info) # start if n >= 25
on.exit(close(progress))
}
for (i in which(!already_known)) {
if (loop_time == 1) {
progress$tick()
}
if (is.na(x[i]) || is.null(x[i])) {
next
}
if (identical(x[i], "") ||
# prevent "bacteria" from coercing to TMP, since Bacterial is a brand name of it:
identical(tolower(x[i]), "bacteria")) {
# prevent "bacteria" from coercing to TMP, since Bacterial is a brand name of it:
identical(tolower(x[i]), "bacteria")) {
x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1])
next
}
@ -215,21 +216,21 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
x_new[i] <- NA_character_
next
}
if (fast_mode == FALSE && flag_multiple_results == TRUE && x[i] %like% "[ ]") {
from_text <- tryCatch(suppressWarnings(ab_from_text(x[i], loop_time = loop_time + 1, translate_ab = FALSE)[[1]]),
error = function(e) character(0)
error = function(e) character(0)
)
} else {
from_text <- character(0)
}
# old code for phenoxymethylpenicillin (Peni V)
if (x[i] == "PNV") {
x_new[i] <- "PHN"
next
}
# exact LOINC code
loinc_found <- unlist(lapply(
AMR_env$AB_lookup$generalised_loinc,
@ -240,7 +241,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
# exact synonym
synonym_found <- unlist(lapply(
AMR_env$AB_lookup$generalised_synonyms,
@ -251,7 +252,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
# exact abbreviation
abbr_found <- unlist(lapply(
AMR_env$AB_lookup$generalised_abbreviations,
@ -263,7 +264,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
# length of input is quite long, and Levenshtein distance is only max 2
if (nchar(x[i]) >= 10) {
levenshtein <- as.double(utils::adist(x[i], AMR_env$AB_lookup$generalised_name))
@ -273,7 +274,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
next
}
}
# allow characters that resemble others, but only continue when having more than 3 characters
if (nchar(x[i]) <= 3) {
x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1])
@ -303,20 +304,22 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
x_spelling <- gsub("(O|0)", "(O|0)+", x_spelling, perl = TRUE)
x_spelling <- gsub("++", "+", x_spelling, fixed = TRUE)
}
# try if name starts with it
found <- AMR_env$AB_lookup[which(AMR_env$AB_lookup$generalised_name %like% paste0("^", x_spelling)), "ab", drop = TRUE]
if (length(found) > 0) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
# try if name ends with it
found <- AMR_env$AB_lookup[which(AMR_env$AB_lookup$generalised_name %like% paste0(x_spelling, "$")), "ab", drop = TRUE]
if (nchar(x[i]) >= 4 && length(found) > 0) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
# and try if any synonym starts with it
synonym_found <- unlist(lapply(
AMR_env$AB_lookup$generalised_synonyms,
@ -327,244 +330,71 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
# INITIAL SEARCH - More uncertain results ----
if (loop_time <= 2 && fast_mode == FALSE) {
if (loop_time == 1 && fast_mode == FALSE) {
# only run on first and second try
# base on the Levensthein distance function if length >= 6
if (nchar(x[i]) >= 6) {
l_dist <- as.double(utils::adist(x[i], AMR_env$AB_lookup$generalised_name,
ignore.case = FALSE,
fixed = TRUE,
costs = c(insertions = 1, deletions = 2, substitutions = 2),
counts = FALSE))
x_new[i] <- AMR_env$AB_lookup$ab[order(l_dist)][1]
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next
}
# try by removing all spaces
if (x[i] %like% " ") {
found <- suppressWarnings(as.ab(gsub(" +", "", x[i], perl = TRUE), loop_time = loop_time + 2))
if (length(found) > 0 && !is.na(found)) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next
}
}
# try by removing all spaces and numbers
if (x[i] %like% " " || x[i] %like% "[0-9]") {
found <- suppressWarnings(as.ab(gsub("[ 0-9]", "", x[i], perl = TRUE), loop_time = loop_time + 2))
if (length(found) > 0 && !is.na(found)) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next
}
}
# reverse a combination, e.g. clavulanic acid/amoxicillin
if (x[i] %like% " ") {
split <- strsplit(x[i], " ")[[1]]
permute <- function(x) {
if (length(x) == 1) return(list(x))
result <- vector("list", factorial(length(x)))
index <- 1
for (i in seq_along(x)) {
sub_perms <- permute(x[-i]) # Recursively get permutations of remaining elements
for (sub in sub_perms) {
result[[index]] <- c(x[i], sub)
index <- index + 1
}
}
return(result)
}
permutations <- permute(split)
found_perms <- character(length(permutations))
for (s in seq_len(length(permutations))) {
concat <- paste0(permutations[[s]], collapse = " ")
if (concat %in% AMR_env$AB_lookup$generalised_name) {
found_perms[s] <- AMR_env$AB_lookup[which(AMR_env$AB_lookup$generalised_name == concat), "ab", drop = TRUE]
} else {
found_perms[s] <- suppressWarnings(as.ab(concat, loop_time = loop_time + 2))
}
}
found_perms <- found_perms[!is.na(found_perms)]
if (length(found_perms) > 0) {
found <- found_perms[order(nchar(found_perms), decreasing = TRUE)][1]
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next
}
}
# transform back from other languages and try again
x_translated <- paste(
lapply(
strsplit(x[i], "[^A-Z0-9]"),
function(y) {
for (i in seq_len(length(y))) {
for (lang in LANGUAGES_SUPPORTED[LANGUAGES_SUPPORTED != "en"]) {
y[i] <- ifelse(tolower(y[i]) %in% tolower(TRANSLATIONS[, lang, drop = TRUE]),
TRANSLATIONS[which(tolower(TRANSLATIONS[, lang, drop = TRUE]) == tolower(y[i]) &
!isFALSE(TRANSLATIONS$fixed)), "pattern"],
y[i]
)
}
}
generalise_antibiotic_name(y)
}
)[[1]],
collapse = "/"
)
x_translated_guess <- suppressWarnings(as.ab(x_translated, loop_time = loop_time + 2))
if (!is.na(x_translated_guess)) {
x_new[i] <- x_translated_guess
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next
}
# now also try to coerce brandname combinations like "Amoxy/clavulanic acid"
x_translated <- paste(
lapply(
strsplit(x_translated, "[^A-Z0-9 ]"),
function(y) {
for (i in seq_len(length(y))) {
y_name <- suppressWarnings(ab_name(y[i], language = NULL, loop_time = loop_time + 2))
y[i] <- ifelse(!is.na(y_name),
y_name,
y[i]
)
}
generalise_antibiotic_name(y)
}
)[[1]],
collapse = "/"
)
x_translated_guess <- suppressWarnings(as.ab(x_translated, loop_time = loop_time + 2))
if (!is.na(x_translated_guess)) {
x_new[i] <- x_translated_guess
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next
}
# try by removing all trailing capitals
if (x[i] %like_case% "[a-z]+[A-Z]+$") {
found <- suppressWarnings(as.ab(gsub("[A-Z]+$", "", x[i], perl = TRUE), loop_time = loop_time + 2))
if (!is.na(found)) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next
}
}
# keep only letters
found <- suppressWarnings(as.ab(gsub("[^A-Z]", "", x[i], perl = TRUE), loop_time = loop_time + 2))
if (!is.na(found)) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next
}
# try from a bigger text, like from a health care record, see ?ab_from_text
# already calculated above if flag_multiple_results = TRUE
if (flag_multiple_results == TRUE) {
found <- from_text[1L]
ab_df <- AMR_env$AB_lookup
ab_df$length_name <- nchar(ab_df$generalised_name)
# now retrieve Levensthein distance for name, synonyms, and translated names
ab_df$lev_name <- as.double(utils::adist(x[i], ab_df$generalised_name,
ignore.case = FALSE,
fixed = TRUE,
costs = c(insertions = 1, deletions = 1, substitutions = 2),
counts = FALSE))
ab_df$lev_syn <- vapply(FUN.VALUE = double(1),
ab_df$generalised_synonyms,
function(y) ifelse(length(y[nchar(y) >= 5]) == 0,
999,
min(as.double(utils::adist(x[i], y[nchar(y) >= 5], ignore.case = FALSE,
fixed = TRUE,
costs = c(insertions = 1, deletions = 1, substitutions = 2),
counts = FALSE)), na.rm = TRUE)),
USE.NAMES = FALSE)
if (!is.null(language) && language != "en") {
ab_df$trans <- generalise_antibiotic_name(translate_AMR(ab_df$name, language = language))
ab_df$lev_trans <- as.double(utils::adist(x[i], ab_df$trans,
ignore.case = FALSE,
fixed = TRUE,
costs = c(insertions = 1, deletions = 1, substitutions = 2),
counts = FALSE))
} else {
found <- tryCatch(suppressWarnings(ab_from_text(x[i], loop_time = loop_time + 2, translate_ab = FALSE)[[1]][1L]),
error = function(e) NA_character_
)
ab_df$lev_trans <- ab_df$lev_name
}
if (!is.na(found)) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
if (any(ab_df$lev_name < 5, na.rm = TRUE)) {
x_new[i] <- ab_df$ab[order(ab_df$lev_name)][1]
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next
}
# first 5 except for cephalosporins, then first 7 (those cephalosporins all start quite the same!)
found <- suppressWarnings(as.ab(substr(x[i], 1, 5), loop_time = loop_time + 2))
if (!is.na(found) && ab_group(found, loop_time = loop_time + 1) %unlike% "cephalosporins") {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
found <- suppressWarnings(as.ab(substr(x[i], 1, 7), loop_time = loop_time + 2))
if (!is.na(found)) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
} else if (any(ab_df$lev_trans < 5, na.rm = TRUE)) {
x_new[i] <- ab_df$ab[order(ab_df$lev_trans)][1]
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next
}
# make all consonants facultative
search_str <- gsub("([BCDFGHJKLMNPQRSTVWXZ])", "\\1*", x[i], perl = TRUE)
found <- suppressWarnings(as.ab(search_str, loop_time = loop_time + 2, already_regex = TRUE))
# keep at least 4 normal characters
if (nchar(gsub(".\\*", "", search_str, perl = TRUE)) < 4) {
found <- NA
}
if (!is.na(found)) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
} else if (any(ab_df$lev_syn < 5, na.rm = TRUE)) {
x_new[i] <- ab_df$ab[order(ab_df$lev_syn)][1]
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next
}
# make all vowels facultative
search_str <- gsub("([AEIOUY])", "\\1*", x[i], perl = TRUE)
found <- suppressWarnings(as.ab(search_str, loop_time = loop_time + 2, already_regex = TRUE))
# keep at least 5 normal characters
if (nchar(gsub(".\\*", "", search_str, perl = TRUE)) < 5) {
found <- NA
}
if (!is.na(found)) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next
}
# allow misspelling of vowels
x_spelling <- gsub("A+", "[AEIOU]+", x_spelling, fixed = TRUE)
x_spelling <- gsub("E+", "[AEIOU]+", x_spelling, fixed = TRUE)
x_spelling <- gsub("I+", "[AEIOU]+", x_spelling, fixed = TRUE)
x_spelling <- gsub("O+", "[AEIOU]+", x_spelling, fixed = TRUE)
x_spelling <- gsub("U+", "[AEIOU]+", x_spelling, fixed = TRUE)
found <- suppressWarnings(as.ab(x_spelling, loop_time = loop_time + 2, already_regex = TRUE))
if (!is.na(found)) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next
}
# try with switched character, like "mreopenem"
for (j in seq_len(nchar(x[i]))) {
x_switched <- paste0(
# beginning part:
substr(x[i], 1, j - 1),
# here is the switching of 2 characters:
substr(x[i], j + 1, j + 1),
substr(x[i], j, j),
# ending part:
substr(x[i], j + 2, nchar(x[i]))
)
found <- suppressWarnings(as.ab(x_switched, loop_time = loop_time + 1))
if (!is.na(found)) {
break
} else {
# then just take name if Levensthein is max 100% of length of name
ab_df$lev_len_ratio <- ab_df$lev_name / ab_df$length_name
if (any(ab_df$lev_len_ratio < 1)) {
ab_df <- ab_df[ab_df$lev_len_ratio < 1, , drop = FALSE]
x_new[i] <- ab_df$ab[order(ab_df$lev_name)][1]
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next
}
}
if (!is.na(found)) {
x_new[i] <- found[1L]
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
next
}
} # end of loop_time <= 2
# not found
}
# nothing found
x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1])
}
if (loop_time == 1 && sum(already_known) < length(x)) {
close(progress)
}
# save to package env to save time for next time
if (loop_time == 1) {
AMR_env$ab_previously_coerced <- AMR_env$ab_previously_coerced[which(!AMR_env$ab_previously_coerced$x %in% x), , drop = FALSE]
@ -578,7 +408,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
)
))
}
# take failed ATC codes apart from rest
if (length(x_unknown_ATCs) > 0 && fast_mode == FALSE) {
warning_(
@ -619,14 +449,14 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
". If required, use `add_custom_antimicrobials()` to add custom entries.")
}
}
x_result <- x_new[match(x_bak_clean, x)]
if (length(x_result) == 0) {
x_result <- NA_character_
}
set_clean_class(x_result,
new_class = c("ab", "character")
new_class = c("ab", "character")
)
}
@ -767,16 +597,20 @@ generalise_antibiotic_name <- function(x) {
x <- gsub("[^A-Z0-9 -)(]", "/", x, perl = TRUE)
# correct for 'high level' antibiotics
x <- trimws(gsub("([^A-Z0-9/ -]+)?(HIGH(.?LE?VE?L)?|[^A-Z0-9/]H[^A-Z0-9]?L)([^A-Z0-9 -]+)?", "-HIGH", x, perl = TRUE))
x <- trimws(gsub("^(-HIGH)(.*)", "\\2\\1", x))
x <- trimws(gsub("^(-HIGH)(.*)", "\\2\\1", x, perl = TRUE))
# remove part between brackets if that's followed by another string
x <- gsub("(.*)+ [(].*[)]", "\\1", x)
# spaces around non-characters must be removed: amox + clav -> amox/clav
# spaces around non-characters must be removed: amox + clav -> amox clav
x <- gsub("(.*[A-Z0-9]) ([^A-Z0-9].*)", "\\1\\2", x, perl = TRUE)
x <- gsub("(.*[^A-Z0-9]) ([A-Z0-9].*)", "\\1\\2", x, perl = TRUE)
# remove hyphen after a starting "co"
x <- gsub("^CO-", "CO", x, perl = TRUE)
# replace operators with a space
x <- gsub("(/| AND | WITH | W/|[+]|[-])+", " ", x, perl = TRUE)
# replace more than 1 space
x <- trimws(gsub(" +", " ", x, perl = TRUE))
# move HIGH to end
x <- trimws(gsub("(.*) HIGH(.*)", "\\1\\2 HIGH", x, perl = TRUE))
x
}
@ -789,9 +623,9 @@ get_translate_ab <- function(translate_ab) {
} else {
translate_ab <- tolower(translate_ab)
stop_ifnot(translate_ab %in% colnames(AMR::antibiotics),
"invalid value for 'translate_ab', this must be a column name of the antibiotics data set\n",
"or TRUE (equals 'name') or FALSE to not translate at all.",
call = FALSE
"invalid value for 'translate_ab', this must be a column name of the antibiotics data set\n",
"or TRUE (equals 'name') or FALSE to not translate at all.",
call = FALSE
)
translate_ab
}

View File

@ -1,6 +1,6 @@
This knowledge base contains all context you must know about the AMR package for R. You are a GPT trained to be an assistant for the AMR package in R. You are an incredible R specialist, especially trained in this package and in the tidyverse.
First and foremost, you are trained on version 2.1.1.9158. Remember this whenever someone asks which AMR package version youre at.
First and foremost, you are trained on version 2.1.1.9159. Remember this whenever someone asks which AMR package version youre at.
Below are the contents of the file, the file, and all the files (documentation) in the package. Every file content is split using 100 hypens.
----------------------------------------------------------------------------------------------------
@ -2577,7 +2577,8 @@ THE PART HEREAFTER CONTAINS CONTENTS FROM FILE 'man/as.ab.Rd':
\alias{ab_reset_session}
\title{Transform Input to an Antibiotic ID}
\usage{
as.ab(x, flag_multiple_results = TRUE, info = interactive(), ...)
as.ab(x, flag_multiple_results = TRUE, language = get_AMR_locale(),
info = interactive(), ...)
is.ab(x)
@ -2588,6 +2589,8 @@ ab_reset_session()
\item{flag_multiple_results}{a \link{logical} to indicate whether a note should be printed to the console that probably more than one antibiotic drug code or name can be retrieved from a single input value.}
\item{language}{language to coerce input values from any of the 20 supported languages - default to the system language if supported (see \code{\link[=get_AMR_locale]{get_AMR_locale()}})}
\item{info}{a \link{logical} to indicate whether a progress bar should be printed - the default is \code{TRUE} only in interactive mode}
\item{...}{arguments passed on to internal functions}
@ -2644,7 +2647,6 @@ as.ab("J01FA01")
as.ab("J 01 FA 01")
as.ab("Erythromycin")
as.ab("eryt")
as.ab(" eryt 123")
as.ab("ERYT")
as.ab("ERY")
as.ab("eritromicine") # spelled wrong, yet works

View File

@ -7,7 +7,8 @@
\alias{ab_reset_session}
\title{Transform Input to an Antibiotic ID}
\usage{
as.ab(x, flag_multiple_results = TRUE, info = interactive(), ...)
as.ab(x, flag_multiple_results = TRUE, language = get_AMR_locale(),
info = interactive(), ...)
is.ab(x)
@ -18,6 +19,8 @@ ab_reset_session()
\item{flag_multiple_results}{a \link{logical} to indicate whether a note should be printed to the console that probably more than one antibiotic drug code or name can be retrieved from a single input value.}
\item{language}{language to coerce input values from any of the 20 supported languages - default to the system language if supported (see \code{\link[=get_AMR_locale]{get_AMR_locale()}})}
\item{info}{a \link{logical} to indicate whether a progress bar should be printed - the default is \code{TRUE} only in interactive mode}
\item{...}{arguments passed on to internal functions}
@ -74,7 +77,6 @@ as.ab("J01FA01")
as.ab("J 01 FA 01")
as.ab("Erythromycin")
as.ab("eryt")
as.ab(" eryt 123")
as.ab("ERYT")
as.ab("ERY")
as.ab("eritromicine") # spelled wrong, yet works

View File

@ -27,20 +27,21 @@
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
# ==================================================================== #
ab_reset_session()
expect_equal(
as.character(as.ab(c(
"J01FA01",
"J 01 FA 01",
"Erythromycin",
"eryt",
" eryt 123",
"ERYT",
"ERY",
"erytromicine",
"Erythrocin",
"Romycin"
))),
rep("ERY", 10)
rep("ERY", 9)
)
expect_identical(class(as.ab("amox")), c("ab", "character"))
@ -49,7 +50,7 @@ expect_true(is.ab(as.ab("amox")))
expect_stdout(print(as.ab("amox")))
expect_stdout(print(data.frame(a = as.ab("amox"))))
# expect_warning(as.ab("J00AA00")) # ATC not yet available in data set
expect_warning(as.ab("J00AA00")) # ATC not yet available in data set
# expect_warning(as.ab("UNKNOWN"))
expect_stdout(print(as.ab("amox")))

View File

@ -27,20 +27,22 @@
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
# ==================================================================== #
ab_reset_session()
expect_identical(
ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds")[[1]],
ab_from_text("28/03/2020 amoxicilliin 500mg po tds")[[1]],
as.ab("Amoxicillin")
)
expect_identical(
ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", thorough_search = TRUE)[[1]],
ab_from_text("28/03/2020 amoxicilliin 500mg po tds", thorough_search = TRUE)[[1]],
as.ab("Amoxicillin")
)
expect_identical(
ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", thorough_search = FALSE)[[1]],
ab_from_text("28/03/2020 amoxicilliin 500mg po tds", thorough_search = FALSE)[[1]],
as.ab("Amoxicillin")
)
expect_identical(
ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", translate_ab = TRUE)[[1]],
ab_from_text("28/03/2020 amoxicilliin 500mg po tds", translate_ab = TRUE)[[1]],
"Amoxicillin"
)
expect_identical(
@ -49,10 +51,10 @@ expect_identical(
)
expect_identical(
ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", type = "dose")[[1]],
ab_from_text("28/03/2020 amoxicilliin 500mg po tds", type = "dose")[[1]],
500
)
expect_identical(
ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", type = "admin")[[1]],
ab_from_text("28/03/2020 amoxicilliin 500mg po tds", type = "admin")[[1]],
"oral"
)

View File

@ -27,6 +27,8 @@
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
# ==================================================================== #
ab_reset_session()
expect_identical(ab_name("AMX", language = NULL), "Amoxicillin")
expect_identical(ab_atc("AMX"), "J01CA04")
expect_identical(ab_cid("AMX"), as.integer(33613))
@ -94,3 +96,4 @@ if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0", also_load = TRUE)) {
set_ab_names(NIT:VAN) %>%
colnames())))
}