mirror of
https://github.com/msberends/AMR.git
synced 2025-04-17 18:30:33 +02:00
(v2.1.1.9159) new approach as.ab()
This commit is contained in:
parent
122bca0f95
commit
0c3ea4b538
2
.github/workflows/check-old-tinytest.yaml
vendored
2
.github/workflows/check-old-tinytest.yaml
vendored
@ -61,7 +61,7 @@ jobs:
|
||||
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: r-lib/actions/setup-r@v2
|
||||
with:
|
||||
|
2
.github/workflows/codecovr.yaml
vendored
2
.github/workflows/codecovr.yaml
vendored
@ -42,7 +42,7 @@ jobs:
|
||||
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
|
||||
CODECOV_TOKEN: ${{secrets.CODECOV_TOKEN}}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: r-lib/actions/setup-pandoc@v2
|
||||
|
||||
|
12
.github/workflows/lintr.yaml
vendored
12
.github/workflows/lintr.yaml
vendored
@ -41,7 +41,7 @@ jobs:
|
||||
env:
|
||||
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: r-lib/actions/setup-pandoc@v2
|
||||
|
||||
@ -59,13 +59,15 @@ jobs:
|
||||
run: |
|
||||
# old: lintr::lint_package(linters = lintr::with_defaults(line_length_linter = NULL, trailing_whitespace_linter = NULL, object_name_linter = NULL, cyclocomp_linter = NULL, object_length_linter = lintr::object_length_linter(length = 50L)), exclusions = list("R/aa_helper_pm_functions.R"))
|
||||
# now get ALL linters, not just default ones
|
||||
linters <- ls(envir = asNamespace("lintr"), pattern = "_linter$")
|
||||
linters <- getNamespaceExports(asNamespace("lintr"))
|
||||
linters <- sort(linters[grepl("_linter$", linters)])
|
||||
# lose deprecated
|
||||
linters <- linters[!grepl("^(closed_curly|open_curly|paren_brace|semicolon_terminator)_linter$", linters)]
|
||||
linters <- linters[!grepl("^(closed_curly|open_curly|paren_brace|semicolon_terminator|consecutive_stopifnot|no_tab|single_quotes|unnecessary_nested_if|unneeded_concatenation)_linter$", linters)]
|
||||
# and the ones we find unnnecessary
|
||||
linters <- linters[!grepl("^(extraction_operator|implicit_integer|line_length|object_name|nonportable_path|is)_linter$", linters)]
|
||||
# put the functions in a list
|
||||
linters <- lapply(linters, function(l) eval(parse(text = paste0("lintr::", l, "()")), envir = asNamespace("lintr")))
|
||||
linters_list <- lapply(linters, function(l) eval(parse(text = paste0("lintr::", l, "()")), envir = asNamespace("lintr")))
|
||||
names(linters_list) <- linters
|
||||
# run them all!
|
||||
lintr::lint_package(linters = linters, exclusions = list("R/aa_helper_pm_functions.R"))
|
||||
lintr::lint_package(linters = linters_list, exclusions = list("R/aa_helper_pm_functions.R"))
|
||||
shell: Rscript {0}
|
||||
|
2
.github/workflows/publish-to-pypi.yml
vendored
2
.github/workflows/publish-to-pypi.yml
vendored
@ -40,7 +40,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
|
2
.github/workflows/website.yaml
vendored
2
.github/workflows/website.yaml
vendored
@ -43,7 +43,7 @@ jobs:
|
||||
continue-on-error: true
|
||||
steps:
|
||||
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
# this is to keep timestamps, the default fetch-depth: 1 gets the timestamps of the moment of cloning
|
||||
# we need this for the download page on our website - dates must be of the files, not of the latest git push
|
||||
|
@ -1,5 +1,5 @@
|
||||
Package: AMR
|
||||
Version: 2.1.1.9158
|
||||
Version: 2.1.1.9159
|
||||
Date: 2025-02-26
|
||||
Title: Antimicrobial Resistance Data Analysis
|
||||
Description: Functions to simplify and standardise antimicrobial resistance (AMR)
|
||||
|
2
NEWS.md
2
NEWS.md
@ -1,4 +1,4 @@
|
||||
# AMR 2.1.1.9158
|
||||
# AMR 2.1.1.9159
|
||||
|
||||
*(this beta version will eventually become v3.0. We're happy to reach a new major milestone soon, which will be all about the new One Health support! Install this beta using [the instructions here](https://msberends.github.io/AMR/#latest-development-version).)*
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
Metadata-Version: 2.2
|
||||
Name: AMR
|
||||
Version: 2.1.1.9158
|
||||
Version: 2.1.1.9159
|
||||
Summary: A Python wrapper for the AMR R package
|
||||
Home-page: https://github.com/msberends/AMR
|
||||
Author: Matthijs Berends
|
||||
|
BIN
PythonPackage/AMR/dist/amr-2.1.1.9158.tar.gz
vendored
BIN
PythonPackage/AMR/dist/amr-2.1.1.9158.tar.gz
vendored
Binary file not shown.
Binary file not shown.
BIN
PythonPackage/AMR/dist/amr-2.1.1.9159.tar.gz
vendored
Normal file
BIN
PythonPackage/AMR/dist/amr-2.1.1.9159.tar.gz
vendored
Normal file
Binary file not shown.
@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
name='AMR',
|
||||
version='2.1.1.9158',
|
||||
version='2.1.1.9159',
|
||||
packages=find_packages(),
|
||||
install_requires=[
|
||||
'rpy2',
|
||||
|
348
R/ab.R
348
R/ab.R
@ -32,6 +32,7 @@
|
||||
#' Use this function to determine the antibiotic drug code of one or more antibiotics. The data set [antibiotics] will be searched for abbreviations, official names and synonyms (brand names).
|
||||
#' @param x a [character] vector to determine to antibiotic ID
|
||||
#' @param flag_multiple_results a [logical] to indicate whether a note should be printed to the console that probably more than one antibiotic drug code or name can be retrieved from a single input value.
|
||||
#' @param language language to coerce input values from any of the `r length(LANGUAGES_SUPPORTED)` supported languages - default to the system language if supported (see [get_AMR_locale()])
|
||||
#' @param info a [logical] to indicate whether a progress bar should be printed - the default is `TRUE` only in interactive mode
|
||||
#' @param ... arguments passed on to internal functions
|
||||
#' @rdname as.ab
|
||||
@ -67,7 +68,6 @@
|
||||
#' as.ab("J 01 FA 01")
|
||||
#' as.ab("Erythromycin")
|
||||
#' as.ab("eryt")
|
||||
#' as.ab(" eryt 123")
|
||||
#' as.ab("ERYT")
|
||||
#' as.ab("ERY")
|
||||
#' as.ab("eritromicine") # spelled wrong, yet works
|
||||
@ -92,29 +92,30 @@
|
||||
#' set_ab_names(where(is.sir), property = "atc")
|
||||
#' }
|
||||
#' }
|
||||
as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
||||
as.ab <- function(x, flag_multiple_results = TRUE, language = get_AMR_locale(), info = interactive(), ...) {
|
||||
meet_criteria(x, allow_class = c("character", "numeric", "integer", "factor"), allow_NA = TRUE)
|
||||
meet_criteria(flag_multiple_results, allow_class = "logical", has_length = 1)
|
||||
language <- validate_language(language)
|
||||
meet_criteria(info, allow_class = "logical", has_length = 1)
|
||||
|
||||
|
||||
if (is.ab(x) || all(x %in% c(AMR_env$AB_lookup$ab, NA))) {
|
||||
# all valid AB codes, but not yet right class or might have additional attributes as AMR selector
|
||||
attributes(x) <- NULL
|
||||
return(set_clean_class(x,
|
||||
new_class = c("ab", "character")
|
||||
new_class = c("ab", "character")
|
||||
))
|
||||
}
|
||||
|
||||
|
||||
loop_time <- list(...)$loop_time
|
||||
if (is.null(loop_time)) {
|
||||
loop_time <- 1
|
||||
}
|
||||
already_regex <- isTRUE(list(...)$already_regex)
|
||||
fast_mode <- isTRUE(list(...)$fast_mode)
|
||||
|
||||
|
||||
x_bak <- x
|
||||
x <- toupper(x)
|
||||
|
||||
|
||||
# remove diacritics
|
||||
x <- iconv(x, from = "UTF-8", to = "ASCII//TRANSLIT")
|
||||
x <- gsub('"', "", x, fixed = TRUE)
|
||||
@ -125,7 +126,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
||||
if (already_regex == FALSE) {
|
||||
x_bak_clean <- generalise_antibiotic_name(x_bak_clean)
|
||||
}
|
||||
|
||||
|
||||
x <- unique(x_bak_clean) # this means that every x is in fact generalise_antibiotic_name(x)
|
||||
x_new <- rep(NA_character_, length(x))
|
||||
x_uncertain <- character(0)
|
||||
@ -152,17 +153,17 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
||||
}
|
||||
found[1L]
|
||||
}
|
||||
|
||||
|
||||
# Fill in names, AB codes, CID codes and ATC codes directly (`x` is already clean and uppercase)
|
||||
known_names <- x %in% AMR_env$AB_lookup$generalised_name
|
||||
x_new[known_names] <- AMR_env$AB_lookup$ab[match(x[known_names], AMR_env$AB_lookup$generalised_name)]
|
||||
known_codes_ab <- x %in% AMR_env$AB_lookup$ab
|
||||
known_codes_atc <- vapply(FUN.VALUE = logical(1), x, function(x_) x_ %in% unlist(AMR_env$AB_lookup$atc), USE.NAMES = FALSE)
|
||||
known_codes_atc <- vapply(FUN.VALUE = logical(1), gsub(" ", "", x), function(x_) x_ %in% unlist(AMR_env$AB_lookup$atc), USE.NAMES = FALSE)
|
||||
known_codes_cid <- x %in% AMR_env$AB_lookup$cid
|
||||
x_new[known_codes_ab] <- AMR_env$AB_lookup$ab[match(x[known_codes_ab], AMR_env$AB_lookup$ab)]
|
||||
x_new[known_codes_atc] <- AMR_env$AB_lookup$ab[vapply(
|
||||
FUN.VALUE = integer(1),
|
||||
x[known_codes_atc],
|
||||
gsub(" ", "", x[known_codes_atc]),
|
||||
function(x_) {
|
||||
which(vapply(
|
||||
FUN.VALUE = logical(1),
|
||||
@ -182,29 +183,29 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
||||
" for ", vector_and(prev), ". Run `ab_reset_session()` to reset this. This note will be shown once per session for this input."
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
already_known <- known_names | known_codes_ab | known_codes_atc | known_codes_cid | previously_coerced
|
||||
|
||||
|
||||
# fix for NAs
|
||||
x_new[is.na(x)] <- NA
|
||||
already_known[is.na(x)] <- FALSE
|
||||
|
||||
|
||||
if (loop_time == 1 && sum(already_known) < length(x)) {
|
||||
progress <- progress_ticker(n = sum(!already_known), n_min = 25, print = info) # start if n >= 25
|
||||
on.exit(close(progress))
|
||||
}
|
||||
|
||||
|
||||
for (i in which(!already_known)) {
|
||||
if (loop_time == 1) {
|
||||
progress$tick()
|
||||
}
|
||||
|
||||
|
||||
if (is.na(x[i]) || is.null(x[i])) {
|
||||
next
|
||||
}
|
||||
if (identical(x[i], "") ||
|
||||
# prevent "bacteria" from coercing to TMP, since Bacterial is a brand name of it:
|
||||
identical(tolower(x[i]), "bacteria")) {
|
||||
# prevent "bacteria" from coercing to TMP, since Bacterial is a brand name of it:
|
||||
identical(tolower(x[i]), "bacteria")) {
|
||||
x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1])
|
||||
next
|
||||
}
|
||||
@ -215,21 +216,21 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
||||
x_new[i] <- NA_character_
|
||||
next
|
||||
}
|
||||
|
||||
|
||||
if (fast_mode == FALSE && flag_multiple_results == TRUE && x[i] %like% "[ ]") {
|
||||
from_text <- tryCatch(suppressWarnings(ab_from_text(x[i], loop_time = loop_time + 1, translate_ab = FALSE)[[1]]),
|
||||
error = function(e) character(0)
|
||||
error = function(e) character(0)
|
||||
)
|
||||
} else {
|
||||
from_text <- character(0)
|
||||
}
|
||||
|
||||
|
||||
# old code for phenoxymethylpenicillin (Peni V)
|
||||
if (x[i] == "PNV") {
|
||||
x_new[i] <- "PHN"
|
||||
next
|
||||
}
|
||||
|
||||
|
||||
# exact LOINC code
|
||||
loinc_found <- unlist(lapply(
|
||||
AMR_env$AB_lookup$generalised_loinc,
|
||||
@ -240,7 +241,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||
next
|
||||
}
|
||||
|
||||
|
||||
# exact synonym
|
||||
synonym_found <- unlist(lapply(
|
||||
AMR_env$AB_lookup$generalised_synonyms,
|
||||
@ -251,7 +252,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||
next
|
||||
}
|
||||
|
||||
|
||||
# exact abbreviation
|
||||
abbr_found <- unlist(lapply(
|
||||
AMR_env$AB_lookup$generalised_abbreviations,
|
||||
@ -263,7 +264,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||
next
|
||||
}
|
||||
|
||||
|
||||
# length of input is quite long, and Levenshtein distance is only max 2
|
||||
if (nchar(x[i]) >= 10) {
|
||||
levenshtein <- as.double(utils::adist(x[i], AMR_env$AB_lookup$generalised_name))
|
||||
@ -273,7 +274,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
||||
next
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# allow characters that resemble others, but only continue when having more than 3 characters
|
||||
if (nchar(x[i]) <= 3) {
|
||||
x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1])
|
||||
@ -303,20 +304,22 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
||||
x_spelling <- gsub("(O|0)", "(O|0)+", x_spelling, perl = TRUE)
|
||||
x_spelling <- gsub("++", "+", x_spelling, fixed = TRUE)
|
||||
}
|
||||
|
||||
|
||||
# try if name starts with it
|
||||
found <- AMR_env$AB_lookup[which(AMR_env$AB_lookup$generalised_name %like% paste0("^", x_spelling)), "ab", drop = TRUE]
|
||||
if (length(found) > 0) {
|
||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||
next
|
||||
}
|
||||
|
||||
|
||||
# try if name ends with it
|
||||
found <- AMR_env$AB_lookup[which(AMR_env$AB_lookup$generalised_name %like% paste0(x_spelling, "$")), "ab", drop = TRUE]
|
||||
if (nchar(x[i]) >= 4 && length(found) > 0) {
|
||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||
next
|
||||
}
|
||||
|
||||
|
||||
# and try if any synonym starts with it
|
||||
synonym_found <- unlist(lapply(
|
||||
AMR_env$AB_lookup$generalised_synonyms,
|
||||
@ -327,244 +330,71 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||
next
|
||||
}
|
||||
|
||||
|
||||
# INITIAL SEARCH - More uncertain results ----
|
||||
if (loop_time <= 2 && fast_mode == FALSE) {
|
||||
if (loop_time == 1 && fast_mode == FALSE) {
|
||||
# only run on first and second try
|
||||
|
||||
# base on the Levensthein distance function if length >= 6
|
||||
if (nchar(x[i]) >= 6) {
|
||||
l_dist <- as.double(utils::adist(x[i], AMR_env$AB_lookup$generalised_name,
|
||||
ignore.case = FALSE,
|
||||
fixed = TRUE,
|
||||
costs = c(insertions = 1, deletions = 2, substitutions = 2),
|
||||
counts = FALSE))
|
||||
x_new[i] <- AMR_env$AB_lookup$ab[order(l_dist)][1]
|
||||
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
|
||||
next
|
||||
}
|
||||
|
||||
# try by removing all spaces
|
||||
if (x[i] %like% " ") {
|
||||
found <- suppressWarnings(as.ab(gsub(" +", "", x[i], perl = TRUE), loop_time = loop_time + 2))
|
||||
if (length(found) > 0 && !is.na(found)) {
|
||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
|
||||
next
|
||||
}
|
||||
}
|
||||
|
||||
# try by removing all spaces and numbers
|
||||
if (x[i] %like% " " || x[i] %like% "[0-9]") {
|
||||
found <- suppressWarnings(as.ab(gsub("[ 0-9]", "", x[i], perl = TRUE), loop_time = loop_time + 2))
|
||||
if (length(found) > 0 && !is.na(found)) {
|
||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
|
||||
next
|
||||
}
|
||||
}
|
||||
|
||||
# reverse a combination, e.g. clavulanic acid/amoxicillin
|
||||
if (x[i] %like% " ") {
|
||||
split <- strsplit(x[i], " ")[[1]]
|
||||
permute <- function(x) {
|
||||
if (length(x) == 1) return(list(x))
|
||||
result <- vector("list", factorial(length(x)))
|
||||
index <- 1
|
||||
for (i in seq_along(x)) {
|
||||
sub_perms <- permute(x[-i]) # Recursively get permutations of remaining elements
|
||||
for (sub in sub_perms) {
|
||||
result[[index]] <- c(x[i], sub)
|
||||
index <- index + 1
|
||||
}
|
||||
}
|
||||
return(result)
|
||||
}
|
||||
permutations <- permute(split)
|
||||
found_perms <- character(length(permutations))
|
||||
for (s in seq_len(length(permutations))) {
|
||||
concat <- paste0(permutations[[s]], collapse = " ")
|
||||
if (concat %in% AMR_env$AB_lookup$generalised_name) {
|
||||
found_perms[s] <- AMR_env$AB_lookup[which(AMR_env$AB_lookup$generalised_name == concat), "ab", drop = TRUE]
|
||||
} else {
|
||||
found_perms[s] <- suppressWarnings(as.ab(concat, loop_time = loop_time + 2))
|
||||
}
|
||||
}
|
||||
found_perms <- found_perms[!is.na(found_perms)]
|
||||
if (length(found_perms) > 0) {
|
||||
found <- found_perms[order(nchar(found_perms), decreasing = TRUE)][1]
|
||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
|
||||
next
|
||||
}
|
||||
}
|
||||
|
||||
# transform back from other languages and try again
|
||||
x_translated <- paste(
|
||||
lapply(
|
||||
strsplit(x[i], "[^A-Z0-9]"),
|
||||
function(y) {
|
||||
for (i in seq_len(length(y))) {
|
||||
for (lang in LANGUAGES_SUPPORTED[LANGUAGES_SUPPORTED != "en"]) {
|
||||
y[i] <- ifelse(tolower(y[i]) %in% tolower(TRANSLATIONS[, lang, drop = TRUE]),
|
||||
TRANSLATIONS[which(tolower(TRANSLATIONS[, lang, drop = TRUE]) == tolower(y[i]) &
|
||||
!isFALSE(TRANSLATIONS$fixed)), "pattern"],
|
||||
y[i]
|
||||
)
|
||||
}
|
||||
}
|
||||
generalise_antibiotic_name(y)
|
||||
}
|
||||
)[[1]],
|
||||
collapse = "/"
|
||||
)
|
||||
x_translated_guess <- suppressWarnings(as.ab(x_translated, loop_time = loop_time + 2))
|
||||
if (!is.na(x_translated_guess)) {
|
||||
x_new[i] <- x_translated_guess
|
||||
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
|
||||
next
|
||||
}
|
||||
|
||||
# now also try to coerce brandname combinations like "Amoxy/clavulanic acid"
|
||||
x_translated <- paste(
|
||||
lapply(
|
||||
strsplit(x_translated, "[^A-Z0-9 ]"),
|
||||
function(y) {
|
||||
for (i in seq_len(length(y))) {
|
||||
y_name <- suppressWarnings(ab_name(y[i], language = NULL, loop_time = loop_time + 2))
|
||||
y[i] <- ifelse(!is.na(y_name),
|
||||
y_name,
|
||||
y[i]
|
||||
)
|
||||
}
|
||||
generalise_antibiotic_name(y)
|
||||
}
|
||||
)[[1]],
|
||||
collapse = "/"
|
||||
)
|
||||
x_translated_guess <- suppressWarnings(as.ab(x_translated, loop_time = loop_time + 2))
|
||||
if (!is.na(x_translated_guess)) {
|
||||
x_new[i] <- x_translated_guess
|
||||
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
|
||||
next
|
||||
}
|
||||
|
||||
# try by removing all trailing capitals
|
||||
if (x[i] %like_case% "[a-z]+[A-Z]+$") {
|
||||
found <- suppressWarnings(as.ab(gsub("[A-Z]+$", "", x[i], perl = TRUE), loop_time = loop_time + 2))
|
||||
if (!is.na(found)) {
|
||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
|
||||
next
|
||||
}
|
||||
}
|
||||
|
||||
# keep only letters
|
||||
found <- suppressWarnings(as.ab(gsub("[^A-Z]", "", x[i], perl = TRUE), loop_time = loop_time + 2))
|
||||
if (!is.na(found)) {
|
||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
|
||||
next
|
||||
}
|
||||
|
||||
# try from a bigger text, like from a health care record, see ?ab_from_text
|
||||
# already calculated above if flag_multiple_results = TRUE
|
||||
if (flag_multiple_results == TRUE) {
|
||||
found <- from_text[1L]
|
||||
ab_df <- AMR_env$AB_lookup
|
||||
ab_df$length_name <- nchar(ab_df$generalised_name)
|
||||
# now retrieve Levensthein distance for name, synonyms, and translated names
|
||||
ab_df$lev_name <- as.double(utils::adist(x[i], ab_df$generalised_name,
|
||||
ignore.case = FALSE,
|
||||
fixed = TRUE,
|
||||
costs = c(insertions = 1, deletions = 1, substitutions = 2),
|
||||
counts = FALSE))
|
||||
ab_df$lev_syn <- vapply(FUN.VALUE = double(1),
|
||||
ab_df$generalised_synonyms,
|
||||
function(y) ifelse(length(y[nchar(y) >= 5]) == 0,
|
||||
999,
|
||||
min(as.double(utils::adist(x[i], y[nchar(y) >= 5], ignore.case = FALSE,
|
||||
fixed = TRUE,
|
||||
costs = c(insertions = 1, deletions = 1, substitutions = 2),
|
||||
counts = FALSE)), na.rm = TRUE)),
|
||||
USE.NAMES = FALSE)
|
||||
if (!is.null(language) && language != "en") {
|
||||
ab_df$trans <- generalise_antibiotic_name(translate_AMR(ab_df$name, language = language))
|
||||
ab_df$lev_trans <- as.double(utils::adist(x[i], ab_df$trans,
|
||||
ignore.case = FALSE,
|
||||
fixed = TRUE,
|
||||
costs = c(insertions = 1, deletions = 1, substitutions = 2),
|
||||
counts = FALSE))
|
||||
} else {
|
||||
found <- tryCatch(suppressWarnings(ab_from_text(x[i], loop_time = loop_time + 2, translate_ab = FALSE)[[1]][1L]),
|
||||
error = function(e) NA_character_
|
||||
)
|
||||
ab_df$lev_trans <- ab_df$lev_name
|
||||
}
|
||||
if (!is.na(found)) {
|
||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||
|
||||
if (any(ab_df$lev_name < 5, na.rm = TRUE)) {
|
||||
x_new[i] <- ab_df$ab[order(ab_df$lev_name)][1]
|
||||
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
|
||||
next
|
||||
}
|
||||
|
||||
# first 5 except for cephalosporins, then first 7 (those cephalosporins all start quite the same!)
|
||||
found <- suppressWarnings(as.ab(substr(x[i], 1, 5), loop_time = loop_time + 2))
|
||||
if (!is.na(found) && ab_group(found, loop_time = loop_time + 1) %unlike% "cephalosporins") {
|
||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||
next
|
||||
}
|
||||
found <- suppressWarnings(as.ab(substr(x[i], 1, 7), loop_time = loop_time + 2))
|
||||
if (!is.na(found)) {
|
||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||
} else if (any(ab_df$lev_trans < 5, na.rm = TRUE)) {
|
||||
x_new[i] <- ab_df$ab[order(ab_df$lev_trans)][1]
|
||||
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
|
||||
next
|
||||
}
|
||||
|
||||
# make all consonants facultative
|
||||
search_str <- gsub("([BCDFGHJKLMNPQRSTVWXZ])", "\\1*", x[i], perl = TRUE)
|
||||
found <- suppressWarnings(as.ab(search_str, loop_time = loop_time + 2, already_regex = TRUE))
|
||||
# keep at least 4 normal characters
|
||||
if (nchar(gsub(".\\*", "", search_str, perl = TRUE)) < 4) {
|
||||
found <- NA
|
||||
}
|
||||
if (!is.na(found)) {
|
||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||
} else if (any(ab_df$lev_syn < 5, na.rm = TRUE)) {
|
||||
x_new[i] <- ab_df$ab[order(ab_df$lev_syn)][1]
|
||||
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
|
||||
next
|
||||
}
|
||||
|
||||
# make all vowels facultative
|
||||
search_str <- gsub("([AEIOUY])", "\\1*", x[i], perl = TRUE)
|
||||
found <- suppressWarnings(as.ab(search_str, loop_time = loop_time + 2, already_regex = TRUE))
|
||||
# keep at least 5 normal characters
|
||||
if (nchar(gsub(".\\*", "", search_str, perl = TRUE)) < 5) {
|
||||
found <- NA
|
||||
}
|
||||
if (!is.na(found)) {
|
||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
|
||||
next
|
||||
}
|
||||
|
||||
# allow misspelling of vowels
|
||||
x_spelling <- gsub("A+", "[AEIOU]+", x_spelling, fixed = TRUE)
|
||||
x_spelling <- gsub("E+", "[AEIOU]+", x_spelling, fixed = TRUE)
|
||||
x_spelling <- gsub("I+", "[AEIOU]+", x_spelling, fixed = TRUE)
|
||||
x_spelling <- gsub("O+", "[AEIOU]+", x_spelling, fixed = TRUE)
|
||||
x_spelling <- gsub("U+", "[AEIOU]+", x_spelling, fixed = TRUE)
|
||||
found <- suppressWarnings(as.ab(x_spelling, loop_time = loop_time + 2, already_regex = TRUE))
|
||||
if (!is.na(found)) {
|
||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
|
||||
next
|
||||
}
|
||||
|
||||
# try with switched character, like "mreopenem"
|
||||
for (j in seq_len(nchar(x[i]))) {
|
||||
x_switched <- paste0(
|
||||
# beginning part:
|
||||
substr(x[i], 1, j - 1),
|
||||
# here is the switching of 2 characters:
|
||||
substr(x[i], j + 1, j + 1),
|
||||
substr(x[i], j, j),
|
||||
# ending part:
|
||||
substr(x[i], j + 2, nchar(x[i]))
|
||||
)
|
||||
found <- suppressWarnings(as.ab(x_switched, loop_time = loop_time + 1))
|
||||
if (!is.na(found)) {
|
||||
break
|
||||
} else {
|
||||
# then just take name if Levensthein is max 100% of length of name
|
||||
ab_df$lev_len_ratio <- ab_df$lev_name / ab_df$length_name
|
||||
if (any(ab_df$lev_len_ratio < 1)) {
|
||||
ab_df <- ab_df[ab_df$lev_len_ratio < 1, , drop = FALSE]
|
||||
x_new[i] <- ab_df$ab[order(ab_df$lev_name)][1]
|
||||
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
|
||||
next
|
||||
}
|
||||
}
|
||||
if (!is.na(found)) {
|
||||
x_new[i] <- found[1L]
|
||||
x_uncertain <- c(x_uncertain, x_bak[x[i] == x_bak_clean][1])
|
||||
next
|
||||
}
|
||||
} # end of loop_time <= 2
|
||||
|
||||
# not found
|
||||
}
|
||||
|
||||
# nothing found
|
||||
x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1])
|
||||
}
|
||||
|
||||
|
||||
if (loop_time == 1 && sum(already_known) < length(x)) {
|
||||
close(progress)
|
||||
}
|
||||
|
||||
|
||||
# save to package env to save time for next time
|
||||
if (loop_time == 1) {
|
||||
AMR_env$ab_previously_coerced <- AMR_env$ab_previously_coerced[which(!AMR_env$ab_previously_coerced$x %in% x), , drop = FALSE]
|
||||
@ -578,7 +408,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
||||
)
|
||||
))
|
||||
}
|
||||
|
||||
|
||||
# take failed ATC codes apart from rest
|
||||
if (length(x_unknown_ATCs) > 0 && fast_mode == FALSE) {
|
||||
warning_(
|
||||
@ -619,14 +449,14 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
|
||||
". If required, use `add_custom_antimicrobials()` to add custom entries.")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
x_result <- x_new[match(x_bak_clean, x)]
|
||||
if (length(x_result) == 0) {
|
||||
x_result <- NA_character_
|
||||
}
|
||||
|
||||
|
||||
set_clean_class(x_result,
|
||||
new_class = c("ab", "character")
|
||||
new_class = c("ab", "character")
|
||||
)
|
||||
}
|
||||
|
||||
@ -767,16 +597,20 @@ generalise_antibiotic_name <- function(x) {
|
||||
x <- gsub("[^A-Z0-9 -)(]", "/", x, perl = TRUE)
|
||||
# correct for 'high level' antibiotics
|
||||
x <- trimws(gsub("([^A-Z0-9/ -]+)?(HIGH(.?LE?VE?L)?|[^A-Z0-9/]H[^A-Z0-9]?L)([^A-Z0-9 -]+)?", "-HIGH", x, perl = TRUE))
|
||||
x <- trimws(gsub("^(-HIGH)(.*)", "\\2\\1", x))
|
||||
x <- trimws(gsub("^(-HIGH)(.*)", "\\2\\1", x, perl = TRUE))
|
||||
# remove part between brackets if that's followed by another string
|
||||
x <- gsub("(.*)+ [(].*[)]", "\\1", x)
|
||||
# spaces around non-characters must be removed: amox + clav -> amox/clav
|
||||
# spaces around non-characters must be removed: amox + clav -> amox clav
|
||||
x <- gsub("(.*[A-Z0-9]) ([^A-Z0-9].*)", "\\1\\2", x, perl = TRUE)
|
||||
x <- gsub("(.*[^A-Z0-9]) ([A-Z0-9].*)", "\\1\\2", x, perl = TRUE)
|
||||
# remove hyphen after a starting "co"
|
||||
x <- gsub("^CO-", "CO", x, perl = TRUE)
|
||||
# replace operators with a space
|
||||
x <- gsub("(/| AND | WITH | W/|[+]|[-])+", " ", x, perl = TRUE)
|
||||
# replace more than 1 space
|
||||
x <- trimws(gsub(" +", " ", x, perl = TRUE))
|
||||
# move HIGH to end
|
||||
x <- trimws(gsub("(.*) HIGH(.*)", "\\1\\2 HIGH", x, perl = TRUE))
|
||||
x
|
||||
}
|
||||
|
||||
@ -789,9 +623,9 @@ get_translate_ab <- function(translate_ab) {
|
||||
} else {
|
||||
translate_ab <- tolower(translate_ab)
|
||||
stop_ifnot(translate_ab %in% colnames(AMR::antibiotics),
|
||||
"invalid value for 'translate_ab', this must be a column name of the antibiotics data set\n",
|
||||
"or TRUE (equals 'name') or FALSE to not translate at all.",
|
||||
call = FALSE
|
||||
"invalid value for 'translate_ab', this must be a column name of the antibiotics data set\n",
|
||||
"or TRUE (equals 'name') or FALSE to not translate at all.",
|
||||
call = FALSE
|
||||
)
|
||||
translate_ab
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
This knowledge base contains all context you must know about the AMR package for R. You are a GPT trained to be an assistant for the AMR package in R. You are an incredible R specialist, especially trained in this package and in the tidyverse.
|
||||
|
||||
First and foremost, you are trained on version 2.1.1.9158. Remember this whenever someone asks which AMR package version you’re at.
|
||||
First and foremost, you are trained on version 2.1.1.9159. Remember this whenever someone asks which AMR package version you’re at.
|
||||
|
||||
Below are the contents of the file, the file, and all the files (documentation) in the package. Every file content is split using 100 hypens.
|
||||
----------------------------------------------------------------------------------------------------
|
||||
@ -2577,7 +2577,8 @@ THE PART HEREAFTER CONTAINS CONTENTS FROM FILE 'man/as.ab.Rd':
|
||||
\alias{ab_reset_session}
|
||||
\title{Transform Input to an Antibiotic ID}
|
||||
\usage{
|
||||
as.ab(x, flag_multiple_results = TRUE, info = interactive(), ...)
|
||||
as.ab(x, flag_multiple_results = TRUE, language = get_AMR_locale(),
|
||||
info = interactive(), ...)
|
||||
|
||||
is.ab(x)
|
||||
|
||||
@ -2588,6 +2589,8 @@ ab_reset_session()
|
||||
|
||||
\item{flag_multiple_results}{a \link{logical} to indicate whether a note should be printed to the console that probably more than one antibiotic drug code or name can be retrieved from a single input value.}
|
||||
|
||||
\item{language}{language to coerce input values from any of the 20 supported languages - default to the system language if supported (see \code{\link[=get_AMR_locale]{get_AMR_locale()}})}
|
||||
|
||||
\item{info}{a \link{logical} to indicate whether a progress bar should be printed - the default is \code{TRUE} only in interactive mode}
|
||||
|
||||
\item{...}{arguments passed on to internal functions}
|
||||
@ -2644,7 +2647,6 @@ as.ab("J01FA01")
|
||||
as.ab("J 01 FA 01")
|
||||
as.ab("Erythromycin")
|
||||
as.ab("eryt")
|
||||
as.ab(" eryt 123")
|
||||
as.ab("ERYT")
|
||||
as.ab("ERY")
|
||||
as.ab("eritromicine") # spelled wrong, yet works
|
@ -7,7 +7,8 @@
|
||||
\alias{ab_reset_session}
|
||||
\title{Transform Input to an Antibiotic ID}
|
||||
\usage{
|
||||
as.ab(x, flag_multiple_results = TRUE, info = interactive(), ...)
|
||||
as.ab(x, flag_multiple_results = TRUE, language = get_AMR_locale(),
|
||||
info = interactive(), ...)
|
||||
|
||||
is.ab(x)
|
||||
|
||||
@ -18,6 +19,8 @@ ab_reset_session()
|
||||
|
||||
\item{flag_multiple_results}{a \link{logical} to indicate whether a note should be printed to the console that probably more than one antibiotic drug code or name can be retrieved from a single input value.}
|
||||
|
||||
\item{language}{language to coerce input values from any of the 20 supported languages - default to the system language if supported (see \code{\link[=get_AMR_locale]{get_AMR_locale()}})}
|
||||
|
||||
\item{info}{a \link{logical} to indicate whether a progress bar should be printed - the default is \code{TRUE} only in interactive mode}
|
||||
|
||||
\item{...}{arguments passed on to internal functions}
|
||||
@ -74,7 +77,6 @@ as.ab("J01FA01")
|
||||
as.ab("J 01 FA 01")
|
||||
as.ab("Erythromycin")
|
||||
as.ab("eryt")
|
||||
as.ab(" eryt 123")
|
||||
as.ab("ERYT")
|
||||
as.ab("ERY")
|
||||
as.ab("eritromicine") # spelled wrong, yet works
|
||||
|
@ -27,20 +27,21 @@
|
||||
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
|
||||
# ==================================================================== #
|
||||
|
||||
ab_reset_session()
|
||||
|
||||
expect_equal(
|
||||
as.character(as.ab(c(
|
||||
"J01FA01",
|
||||
"J 01 FA 01",
|
||||
"Erythromycin",
|
||||
"eryt",
|
||||
" eryt 123",
|
||||
"ERYT",
|
||||
"ERY",
|
||||
"erytromicine",
|
||||
"Erythrocin",
|
||||
"Romycin"
|
||||
))),
|
||||
rep("ERY", 10)
|
||||
rep("ERY", 9)
|
||||
)
|
||||
|
||||
expect_identical(class(as.ab("amox")), c("ab", "character"))
|
||||
@ -49,7 +50,7 @@ expect_true(is.ab(as.ab("amox")))
|
||||
expect_stdout(print(as.ab("amox")))
|
||||
expect_stdout(print(data.frame(a = as.ab("amox"))))
|
||||
|
||||
# expect_warning(as.ab("J00AA00")) # ATC not yet available in data set
|
||||
expect_warning(as.ab("J00AA00")) # ATC not yet available in data set
|
||||
# expect_warning(as.ab("UNKNOWN"))
|
||||
|
||||
expect_stdout(print(as.ab("amox")))
|
||||
|
@ -27,20 +27,22 @@
|
||||
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
|
||||
# ==================================================================== #
|
||||
|
||||
ab_reset_session()
|
||||
|
||||
expect_identical(
|
||||
ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds")[[1]],
|
||||
ab_from_text("28/03/2020 amoxicilliin 500mg po tds")[[1]],
|
||||
as.ab("Amoxicillin")
|
||||
)
|
||||
expect_identical(
|
||||
ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", thorough_search = TRUE)[[1]],
|
||||
ab_from_text("28/03/2020 amoxicilliin 500mg po tds", thorough_search = TRUE)[[1]],
|
||||
as.ab("Amoxicillin")
|
||||
)
|
||||
expect_identical(
|
||||
ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", thorough_search = FALSE)[[1]],
|
||||
ab_from_text("28/03/2020 amoxicilliin 500mg po tds", thorough_search = FALSE)[[1]],
|
||||
as.ab("Amoxicillin")
|
||||
)
|
||||
expect_identical(
|
||||
ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", translate_ab = TRUE)[[1]],
|
||||
ab_from_text("28/03/2020 amoxicilliin 500mg po tds", translate_ab = TRUE)[[1]],
|
||||
"Amoxicillin"
|
||||
)
|
||||
expect_identical(
|
||||
@ -49,10 +51,10 @@ expect_identical(
|
||||
)
|
||||
|
||||
expect_identical(
|
||||
ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", type = "dose")[[1]],
|
||||
ab_from_text("28/03/2020 amoxicilliin 500mg po tds", type = "dose")[[1]],
|
||||
500
|
||||
)
|
||||
expect_identical(
|
||||
ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", type = "admin")[[1]],
|
||||
ab_from_text("28/03/2020 amoxicilliin 500mg po tds", type = "admin")[[1]],
|
||||
"oral"
|
||||
)
|
||||
|
@ -27,6 +27,8 @@
|
||||
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
|
||||
# ==================================================================== #
|
||||
|
||||
ab_reset_session()
|
||||
|
||||
expect_identical(ab_name("AMX", language = NULL), "Amoxicillin")
|
||||
expect_identical(ab_atc("AMX"), "J01CA04")
|
||||
expect_identical(ab_cid("AMX"), as.integer(33613))
|
||||
@ -94,3 +96,4 @@ if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0", also_load = TRUE)) {
|
||||
set_ab_names(NIT:VAN) %>%
|
||||
colnames())))
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user