1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-08 10:31:53 +02:00

(v1.3.0.9026) eucast expert rules 3.2

This commit is contained in:
2020-09-24 00:30:11 +02:00
parent a1411ddafc
commit c19095a3d5
107 changed files with 48638 additions and 3953 deletions

View File

@ -87,6 +87,8 @@ check_dataset_integrity <- function() {
data_in_pkg <- data(package = "AMR", envir = asNamespace("AMR"))$results[, "Item", drop = TRUE]
data_in_globalenv <- ls(envir = globalenv())
overwritten <- data_in_pkg[data_in_pkg %in% data_in_globalenv]
# exception for example_isolates
overwritten <- overwritten[overwritten != "example_isolates"]
stop_if(length(overwritten) > 0,
"the following data set is overwritten by your global environment and prevents the AMR package from working correctly:\n",
paste0("'", overwritten, "'", collapse = ", "),
@ -110,7 +112,7 @@ check_dataset_integrity <- function() {
invisible(TRUE)
}
search_type_in_df <- function(x, type) {
search_type_in_df <- function(x, type, info = TRUE) {
# try to find columns based on type
found <- NULL
@ -187,7 +189,7 @@ search_type_in_df <- function(x, type) {
}
}
if (!is.null(found)) {
if (!is.null(found) & info == TRUE) {
msg <- paste0("NOTE: Using column `", font_bold(found), "` as input for `col_", type, "`.")
if (type %in% c("keyantibiotics", "specimen")) {
msg <- paste(msg, "Use", font_bold(paste0("col_", type), "= FALSE"), "to prevent this.")
@ -197,6 +199,11 @@ search_type_in_df <- function(x, type) {
found
}
is_possibly_regex <- function(x) {
sapply(strsplit(x, ""),
function(y) any(y %in% c("$", "(", ")", "*", "+", "-", ".", "?", "[", "]", "^", "{", "|", "}", "\\"), na.rm = TRUE))
}
stop_ifnot_installed <- function(package) {
# no "utils::installed.packages()" since it requires non-staged install since R 3.6.0
# https://developer.r-project.org/Blog/public/2019/02/14/staged-install/index.html
@ -259,7 +266,7 @@ stop_if <- function(expr, ..., call = TRUE) {
}
stop_ifnot <- function(expr, ..., call = TRUE) {
if (!isTRUE(expr)) {
if (isFALSE(expr)) {
if (isTRUE(call)) {
call <- -1
}
@ -317,6 +324,18 @@ dataset_UTF8_to_ASCII <- function(df) {
df
}
create_ab_documentation <- function(ab) {
ab_names <- ab_name(ab, language = NULL, tolower = TRUE)
ab <- ab[order(ab_names)]
ab_names <- ab_names[order(ab_names)]
atcs <- ab_atc(ab)
atcs[!is.na(atcs)] <- paste0("[", atcs[!is.na(atcs)], "](", ab_url(ab[!is.na(atcs)]), ")")
atcs[is.na(atcs)] <- "no ATC code"
out <- paste0(ab_names, " (`", ab, "`, ", atcs, ")", collapse = ", ")
substr(out, 1, 1) <- toupper(substr(out, 1, 1))
out
}
has_colour <- function() {
# this is a base R version of crayon::has_color
enabled <- getOption("crayon.enabled")

10
R/ab.R
View File

@ -110,8 +110,14 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
note_if_more_than_one_found <- function(found, index, from_text) {
if (initial_search == TRUE & isTRUE(length(from_text) > 1)) {
message(font_blue(paste0("NOTE: more than one result was found for item ", index, ": ",
paste0(ab_name(from_text, tolower = TRUE, initial_search = FALSE), collapse = ", "))))
abnames <- ab_name(from_text, tolower = TRUE, initial_search = FALSE)
if (ab_name(found[1L], language = NULL) %like% "clavulanic acid") {
abnames <- abnames[!abnames == "clavulanic acid"]
}
if (length(abnames) > 1) {
message(font_blue(paste0("NOTE: more than one result was found for item ", index, ": ",
paste0(abnames, collapse = ", "))))
}
}
found[1L]
}

View File

@ -149,7 +149,7 @@ ab_selector <- function(ab_class, function_name) {
vars_vct <- peek_vars_tidyselect(fn = function_name)
vars_df <- data.frame(as.list(vars_vct))[0, , drop = FALSE]
colnames(vars_df) <- vars_vct
ab_in_data <- suppressMessages(get_column_abx(vars_df))
ab_in_data <- get_column_abx(vars_df, info = FALSE)
if (length(ab_in_data) == 0) {
message(font_blue("NOTE: no antimicrobial agents found."))

View File

@ -254,7 +254,7 @@ catalogue_of_life <- list(
#' - `antibiotic`\cr Name of the antibiotic drug
#' @details The repository of this `AMR` package contains a file comprising this exact data set: <https://github.com/msberends/AMR/blob/master/data-raw/intrinsic_resistant.txt>. This file **allows for machine reading EUCAST guidelines about intrinsic resistance**, which is almost impossible with the Excel and PDF files distributed by EUCAST. The file is updated automatically.
#'
#' This data set is based on 'EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes', version `r EUCAST_VERSION_EXPERT_RULES`.
#' This data set is based on '`r EUCAST_VERSION_EXPERT_RULES[["3.2"]]$title`', `r EUCAST_VERSION_EXPERT_RULES[["3.2"]]$version_txt` from `r EUCAST_VERSION_EXPERT_RULES[["3.2"]]$year`.
#' @inheritSection AMR Reference data publicly available
#' @inheritSection AMR Read more on our website!
#' @examples

File diff suppressed because it is too large Load Diff

View File

@ -93,7 +93,7 @@ filter_ab_class <- function(x,
stop_ifnot(all(scope %in% c("any", "all")), "`scope` must be one of: 'any', 'all'")
# get all columns in data with names that resemble antibiotics
ab_in_data <- suppressMessages(get_column_abx(x))
ab_in_data <- get_column_abx(x, info = FALSE)
if (length(ab_in_data) == 0) {
message(font_blue("NOTE: no columns with class <rsi> found (see ?as.rsi), data left unchanged."))
return(x.bak)

View File

@ -110,16 +110,21 @@ get_column_abx <- function(x,
soft_dependencies = NULL,
hard_dependencies = NULL,
verbose = FALSE,
info = TRUE,
...) {
message(font_blue("NOTE: Auto-guessing columns suitable for analysis"), appendLF = FALSE)
if (info == TRUE) {
message(font_blue("NOTE: Auto-guessing columns suitable for analysis"), appendLF = FALSE)
}
x <- as.data.frame(x, stringsAsFactors = FALSE)
if (NROW(x) > 10000) {
# only test maximum of 10,000 values per column
message(font_blue(paste0(" (using only ", font_bold("the first 10,000 rows"), ")...")), appendLF = FALSE)
if (info == TRUE) {
message(font_blue(paste0(" (using only ", font_bold("the first 10,000 rows"), ")...")), appendLF = FALSE)
}
x <- x[1:10000, , drop = FALSE]
} else {
} else if (info == TRUE) {
message(font_blue("..."), appendLF = FALSE)
}
x_bak <- x
@ -130,8 +135,8 @@ get_column_abx <- function(x,
vectr_antibiotics <- vectr_antibiotics[!is.na(vectr_antibiotics) & nchar(vectr_antibiotics) >= 3]
x_columns <- sapply(colnames(x), function(col, df = x_bak) {
if (toupper(col) %in% vectr_antibiotics |
is.rsi(as.data.frame(df)[, col]) |
is.rsi.eligible(as.data.frame(df)[, col], threshold = 0.5)) {
is.rsi(as.data.frame(df)[, col, drop = TRUE]) |
is.rsi.eligible(as.data.frame(df)[, col, drop = TRUE], threshold = 0.5)) {
return(col)
} else {
return(NA_character_)
@ -142,7 +147,7 @@ get_column_abx <- function(x,
df_trans <- data.frame(colnames = colnames(x),
abcode = suppressWarnings(as.ab(colnames(x), info = FALSE)))
df_trans <- df_trans[!is.na(df_trans$abcode), ]
df_trans <- df_trans[!is.na(df_trans$abcode), , drop = FALSE]
x <- as.character(df_trans$colnames)
names(x) <- df_trans$abcode
@ -166,7 +171,9 @@ get_column_abx <- function(x,
}
if (length(x) == 0) {
message(font_blue("No columns found."))
if (info == TRUE) {
message(font_blue("No columns found."))
}
return(x)
}
@ -178,14 +185,16 @@ get_column_abx <- function(x,
x <- x[order(names(x), x)]
# succeeded with auto-guessing
message(font_blue("OK."))
if (info == TRUE) {
message(font_blue("OK."))
}
for (i in seq_len(length(x))) {
if (verbose == TRUE & !names(x[i]) %in% names(duplicates)) {
if (info == TRUE & verbose == TRUE & !names(x[i]) %in% names(duplicates)) {
message(font_blue(paste0("NOTE: Using column `", font_bold(x[i]), "` as input for `", names(x)[i],
"` (", ab_name(names(x)[i], tolower = TRUE, language = NULL), ").")))
}
if (names(x[i]) %in% names(duplicates)) {
if (info == TRUE & names(x[i]) %in% names(duplicates)) {
warning(font_red(paste0("Using column `", font_bold(x[i]), "` as input for `", names(x)[i],
"` (", ab_name(names(x)[i], tolower = TRUE, language = NULL),
"), although it was matched for multiple antibiotics or columns.")),
@ -206,14 +215,19 @@ get_column_abx <- function(x,
}
if (!is.null(soft_dependencies)) {
soft_dependencies <- unique(soft_dependencies)
if (!all(soft_dependencies %in% names(x))) {
if (info == TRUE & !all(soft_dependencies %in% names(x))) {
# missing a soft dependency may lower the reliability
missing <- soft_dependencies[!soft_dependencies %in% names(x)]
missing_txt <- paste(paste0(ab_name(missing, tolower = TRUE, language = NULL),
" (", font_bold(missing, collapse = NULL), ")"),
missing_msg <- paste(paste0(ab_name(missing, tolower = TRUE, language = NULL),
" (", missing, ")"),
collapse = ", ")
message(font_blue("NOTE: Reliability would be improved if these antimicrobial results would be available too:",
missing_txt))
missing_msg <- paste("NOTE: Reliability would be improved if these antimicrobial results would be available too:",
missing_msg)
wrapped <- strwrap(missing_msg,
width = 0.95 * getOption("width"),
exdent = 6)
wrapped <- gsub("\\((.*?)\\)", paste0("(", font_bold("\\1"), ")"), wrapped) # add bold abbreviations
message(font_blue(wrapped, collapse = "\n"))
}
}
x

View File

@ -64,7 +64,7 @@
#' }
like <- function(x, pattern, ignore.case = TRUE) {
# set to fixed if no regex found
fixed <- all(!grepl("[\\[$.^*?+-}{|)(]", pattern))
fixed <- !any(is_possibly_regex(pattern))
if (ignore.case == TRUE) {
# set here, otherwise if fixed = TRUE, this warning will be thrown: argument 'ignore.case = TRUE' will be ignored
x <- tolower(x)
@ -140,3 +140,13 @@ like <- function(x, pattern, ignore.case = TRUE) {
"%like_case%" <- function(x, pattern) {
like(x, pattern, ignore.case = FALSE)
}
# don't export his one, it's just for convenience in eucast_rules()
# match all Klebsiella and Raoultella, but not K. aerogenes: fullname %like_perl% "^(Klebsiella(?! aerogenes)|Raoultella)"
"%like_perl%" <- function(x, pattern) {
grepl(x = tolower(x),
pattern = tolower(pattern),
perl = TRUE,
fixed = FALSE,
ignore.case = TRUE)
}

View File

@ -141,7 +141,7 @@ mdro <- function(x,
# try to find columns based on type
# -- mo
if (is.null(col_mo)) {
col_mo <- search_type_in_df(x = x, type = "mo")
col_mo <- search_type_in_df(x = x, type = "mo", info = info)
}
if (is.null(col_mo) & guideline$code == "tb") {
message(font_blue("NOTE: No column found as input for `col_mo`,",
@ -303,7 +303,9 @@ mdro <- function(x,
"TCY",
"DOX",
"MNO"),
verbose = verbose, ...)
verbose = verbose,
info = info,
...)
} else if (guideline$code == "tb") {
cols_ab <- get_column_abx(x = x,
soft_dependencies = c("CAP",
@ -314,7 +316,9 @@ mdro <- function(x,
"RIF",
"RIB",
"RFP"),
verbose = verbose, ...)
info = info,
verbose = verbose,
...)
} else if (guideline$code == "mrgn") {
cols_ab <- get_column_abx(x = x,
soft_dependencies = c("PIP",
@ -323,9 +327,14 @@ mdro <- function(x,
"IPM",
"MEM",
"CIP"),
verbose = verbose, ...)
verbose = verbose,
info = info,
...)
} else {
cols_ab <- get_column_abx(x = x, verbose = verbose, ...)
cols_ab <- get_column_abx(x = x,
verbose = verbose,
info = info,
...)
}
AMC <- cols_ab["AMC"]

2
R/mo.R
View File

@ -109,7 +109,7 @@
#' 3. The level of uncertainty \eqn{U} needed to get to the result, as stated above (1 to 3);
#' 4. The [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) \eqn{L} is the distance between the user input and all taxonomic full names, with the text length of the user input being the maximum distance. A modified version of the Levenshtein distance \eqn{L'} based on the text length of the full name \eqn{F} is calculated as:
#'
#' \deqn{L' = F - \frac{0.5L}{F}}{L' = (F - 0.5L) / F}
#' \deqn{L' = 1 - \frac{0.5L}{F}}{L' = 1 - ((0.5 * L) / F)}
#'
#' The final matching score \eqn{M} is calculated as:
#' \deqn{M = L' \times \frac{1}{P K U} = \frac{F - 0.5L}{F P K U}}{M = L' * (1 / (P * K * U)) = (F - 0.5L) / (F * P * K * U)}

View File

@ -32,7 +32,7 @@
#' 3. The level of uncertainty \eqn{U} that is needed to get to a result (1 to 3, see [as.mo()]);
#' 4. The [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) \eqn{L} is the distance between the user input and all taxonomic full names, with the text length of the user input being the maximum distance. A modified version of the Levenshtein distance \eqn{L'} based on the text length of the full name \eqn{F} is calculated as:
#'
#' \deqn{L' = F - \frac{0.5L}{F}}{L' = (F - 0.5L) / F}
#' \deqn{L' = 1 - \frac{0.5L}{F}}{L' = 1 - ((0.5 * L) / F)}
#'
#' The final matching score \eqn{M} is calculated as:
#' \deqn{M = L' \times \frac{1}{P K U} = \frac{F - 0.5L}{F P K U}}{M = L' * (1 / (P * K * U)) = (F - 0.5L) / (F * P * K * U)}

View File

@ -35,7 +35,7 @@
#'
#' The created compressed data file `"~/.mo_source.rds"` will be used at default for MO determination (function [as.mo()] and consequently all `mo_*` functions like [mo_genus()] and [mo_gramstain()]). The location of the original file will be saved as an R option with `options(mo_source = path)`. Its timestamp will be saved with `options(mo_source_datetime = ...)`.
#'
#' The function [get_mo_source()] will return the data set by reading `"~/.mo_source.rds"` with [readRDS()]. If the original file has changed (by checking the aforementioned options `mo_source` and `mo_source_datetime`), it will call [set_mo_source()] to update the data file automatically.
#' The function [get_mo_source()] will return the data set by reading `"~/.mo_source.rds"` with [readRDS()]. If the original file has changed (by checking the aforementioned options `mo_source` and `mo_source_datetime`), it will call [set_mo_source()] to update the data file automatically if used in an interactive session.
#'
#' Reading an Excel file (`.xlsx`) with only one row has a size of 8-9 kB. The compressed file created with [set_mo_source()] will then have a size of 0.1 kB and can be read by [get_mo_source()] in only a couple of microseconds (millionths of a second).
#'
@ -224,7 +224,7 @@ get_mo_source <- function() {
set_mo_source("")
return(NULL)
}
if (new_time != old_time) {
if (interactive() && new_time != old_time) {
# set updated source
set_mo_source(getOption("mo_source"))
}

14
R/rsi.R
View File

@ -187,7 +187,6 @@ is.rsi <- function(x) {
#' @export
is.rsi.eligible <- function(x, threshold = 0.05) {
stop_if(NCOL(x) > 1, "`x` must be a one-dimensional vector.")
if (any(c("logical",
"numeric",
"integer",
@ -226,15 +225,12 @@ as.rsi.default <- function(x, ...) {
class = c("rsi", "ordered", "factor"))
} else {
ab <- deparse(substitute(x))
if (!any(x %like% "(R|S|I)", na.rm = TRUE)) {
if (!is.na(suppressWarnings(as.ab(ab)))) {
# check if they are actually MICs or disks now that the antibiotic name is valid
if (all_valid_mics(x)) {
as.rsi(as.mic(x), ab = ab, ...)
} else if (all_valid_disks(x)) {
as.rsi(as.disk(x), ab = ab, ...)
}
# check if they are actually MICs or disks now that the antibiotic name is valid
if (all_valid_mics(x)) {
warning("The input seems to be MIC values. Transform them with as.mic() before running as.rsi() to interpret them.")
} else if (all_valid_disks(x)) {
warning("The input seems to be disk diffusion values. Transform them with as.disk() before running as.rsi() to interpret them.")
}
}

Binary file not shown.