AMR/R/eucast_rules.R

1121 lines
53 KiB
R
Raw Normal View History

2018-02-21 11:52:31 +01:00
# ==================================================================== #
# TITLE #
2020-10-08 11:16:03 +02:00
# Antimicrobial Resistance (AMR) Analysis for R #
2018-02-21 11:52:31 +01:00
# #
2019-01-02 23:24:07 +01:00
# SOURCE #
2020-07-08 14:48:06 +02:00
# https://github.com/msberends/AMR #
2018-02-21 11:52:31 +01:00
# #
# LICENCE #
# (c) 2018-2020 Berends MS, Luz CF et al. #
2020-10-08 11:16:03 +02:00
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
2018-02-21 11:52:31 +01:00
# #
2019-01-02 23:24:07 +01:00
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
2020-10-08 11:16:03 +02:00
# #
# Visit our website for the full manual and a complete tutorial about #
# how to conduct AMR analysis: https://msberends.github.io/AMR/ #
2018-02-21 11:52:31 +01:00
# ==================================================================== #
2020-09-24 00:30:11 +02:00
# add new version numbers here, and add the rules themselves to "data-raw/eucast_rules.tsv"
# (running "data-raw/internals.R" will process that TSV file)
EUCAST_VERSION_BREAKPOINTS <- list("10.0" = list(version_txt = "v10.0",
year = 2020,
2020-11-12 11:07:23 +01:00
title = "'EUCAST Clinical Breakpoints'",
url = "https://www.eucast.org/clinical_breakpoints/"))
2020-09-24 00:30:11 +02:00
EUCAST_VERSION_EXPERT_RULES <- list("3.1" = list(version_txt = "v3.1",
year = 2016,
2020-11-12 11:07:23 +01:00
title = "'EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes'",
url = "https://www.eucast.org/expert_rules_and_intrinsic_resistance/"),
2020-09-24 00:30:11 +02:00
"3.2" = list(version_txt = "v3.2",
year = 2020,
2020-11-12 11:07:23 +01:00
title = "'EUCAST Expert Rules' and 'EUCAST Intrinsic Resistance and Unusual Phenotypes'",
url = "https://www.eucast.org/expert_rules_and_intrinsic_resistance/"))
format_eucast_version_nr <- function(version) {
# for documentation - adds title, version number, year and url in markdown language
lst <- c(EUCAST_VERSION_BREAKPOINTS, EUCAST_VERSION_EXPERT_RULES)
version <- format(version, nsmall = 1)
paste0("[", lst[[version]]$title, " ", lst[[version]]$version_txt, "](", lst[[version]]$url, ")",
" from ", lst[[version]]$year)
}
2019-04-05 18:47:39 +02:00
2020-01-26 20:20:00 +01:00
#' Apply EUCAST rules
2019-11-15 15:25:03 +01:00
#'
#' @description
2020-09-24 00:30:11 +02:00
#' Apply rules for clinical breakpoints and intrinsic resistance as defined by the European Committee on Antimicrobial Susceptibility Testing (EUCAST, <https://eucast.org>), see *Source*.
2019-11-15 15:25:03 +01:00
#'
2020-09-24 00:30:11 +02:00
#' To improve the interpretation of the antibiogram before EUCAST rules are applied, some non-EUCAST rules can applied at default, see Details.
#' @inheritSection lifecycle Stable lifecycle
#' @param x data with antibiotic columns, such as `amox`, `AMX` and `AMC`
2020-10-04 19:26:43 +02:00
#' @param info a logical to indicate whether progress should be printed to the console, defaults to only print while in interactive sessions
2020-09-24 00:30:11 +02:00
#' @param rules a character vector that specifies which rules should be applied. Must be one or more of `"breakpoints"`, `"expert"`, `"other"`, `"all"`, and defaults to `c("breakpoints", "expert")`. The default value can be set to another value, e.g. using `options(AMR_eucastrules = "all")`.
#' @param verbose a [logical] to turn Verbose mode on and off (default is off). In Verbose mode, the function does not apply rules to the data, but instead returns a data set in logbook form with extensive info about which rows and columns would be effected and in which way. Using Verbose mode takes a lot more time.
2020-09-28 11:00:59 +02:00
#' @param version_breakpoints the version number to use for the EUCAST Clinical Breakpoints guideline. Currently supported: `r paste0(names(EUCAST_VERSION_BREAKPOINTS), collapse = ", ")`.
#' @param version_expertrules the version number to use for the EUCAST Expert Rules and Intrinsic Resistance guideline. Currently supported: `r paste0(names(EUCAST_VERSION_EXPERT_RULES), collapse = ", ")`.
#' @param ... column name of an antibiotic, please see section *Antibiotics* below
2018-11-01 20:50:10 +01:00
#' @inheritParams first_isolate
2019-04-05 18:47:39 +02:00
#' @details
#' **Note:** This function does not translate MIC values to RSI values. Use [as.rsi()] for that. \cr
#' **Note:** When ampicillin (AMP, J01CA01) is not available but amoxicillin (AMX, J01CA04) is, the latter will be used for all rules where there is a dependency on ampicillin. These drugs are interchangeable when it comes to expression of antimicrobial resistance.
2019-04-09 14:59:17 +02:00
#'
2020-09-24 00:30:11 +02:00
#' The file containing all EUCAST rules is located here: <https://github.com/msberends/AMR/blob/master/data-raw/eucast_rules.tsv>.
2020-01-26 20:20:00 +01:00
#'
2020-09-24 00:30:11 +02:00
#' ## 'Other' rules
#'
2020-09-24 00:30:11 +02:00
#' Before further processing, two non-EUCAST rules about drug combinations can be applied to improve the efficacy of the EUCAST rules, and the reliability of your data (analysis). These rules are:
#'
#' 1. A drug **with** enzyme inhibitor will be set to S if the same drug **without** enzyme inhibitor is S
#' 2. A drug **without** enzyme inhibitor will be set to R if the same drug **with** enzyme inhibitor is R
#'
#' Important examples include amoxicillin and amoxicillin/clavulanic acid, and trimethoprim and trimethoprim/sulfamethoxazole. Needless to say, for these rules to work, both drugs must be available in the data set.
#'
#' Since these rules are not officially approved by EUCAST, they are not applied at default. To use these rules, include `"other"` to the `rules` parameter, or use `eucast_rules(..., rules = "all")`.
2018-08-31 13:36:19 +02:00
#' @section Antibiotics:
#' To define antibiotics column names, leave as it is to determine it automatically with [guess_ab_col()] or input a text (case-insensitive), or use `NULL` to skip a column (e.g. `TIC = NULL` to skip ticarcillin). Manually defined but non-existing columns will be skipped with a warning.
2018-12-07 12:04:55 +01:00
#'
2020-09-24 00:30:11 +02:00
#' The following antibiotics are used for the functions [eucast_rules()] and [mdro()]. These are shown below in the format 'name (`antimicrobial ID`, [ATC code](https://www.whocc.no/atc/structure_and_principles/))', sorted alphabetically:
2018-07-26 16:30:42 +02:00
#'
#' `r create_ab_documentation(c("AMC", "AMK", "AMP", "AMX", "ATM", "AVO", "AZL", "AZM", "BAM", "BPR", "CAC", "CAP", "CAT", "CAZ", "CCV", "CDR", "CDZ", "CEC", "CED", "CEI", "CEP", "CFM", "CFM1", "CFP", "CFR", "CFS", "CHL", "CID", "CIP", "CLI", "CLR", "CMX", "CMZ", "CND", "COL", "CPD", "CPM", "CPO", "CPR", "CPT", "CRB", "CRD", "CRN", "CRO", "CSL", "CTB", "CTF", "CTL", "CTT", "CTX", "CTZ", "CXM", "CYC", "CZD", "CZO", "CZX", "DAL", "DAP", "DIR", "DIT", "DIZ", "DKB", "DOR", "DOX", "ENX", "EPC", "ERV", "ERY", "ETH", "ETP", "FDX", "FEP", "FLC", "FLE", "FLR1", "FOS", "FOX", "FOX1", "FUS", "GAT", "GEH", "GEM", "GEN", "GRX", "HAP", "HET", "INH", "IPM", "ISE", "JOS", "KAN", "LEX", "LIN", "LNZ", "LOM", "LOR", "LTM", "LVX", "MAN", "MCM", "MEC", "MEM", "MEV", "MEZ", "MFX", "MID", "MNO", "MTM", "MTR", "NAL", "NEO", "NET", "NIT", "NOR", "NOV", "NVA", "OFX", "OLE", "OMC", "ORI", "OXA", "PAZ", "PEF", "PEN", "PHN", "PIP", "PLB", "PME", "PRI", "PRL", "PRU", "PVM", "PZA", "QDA", "RAM", "RFL", "RFP", "RIB", "RID", "RIF", "ROK", "RST", "RXT", "SAM", "SBC", "SDI", "SDM", "SIS", "SLF", "SLF1", "SLF10", "SLF11", "SLF12", "SLF13", "SLF2", "SLF3", "SLF4", "SLF5", "SLF6", "SLF7", "SLF8", "SLF9", "SLT1", "SLT2", "SLT3", "SLT4", "SLT5", "SMX", "SPI", "SPT", "SPX", "STH", "STR", "STR1", "SUD", "SUT", "SXT", "SZO", "TAL", "TCC", "TCM", "TCY", "TEC", "TEM", "TGC", "THA", "TIC", "TLT", "TLV", "TMP", "TMX", "TOB", "TRL", "TVA", "TZD", "TZP", "VAN"))`
2019-11-06 14:43:23 +01:00
#' @aliases EUCAST
2018-11-16 20:50:50 +01:00
#' @rdname eucast_rules
2018-02-21 11:52:31 +01:00
#' @export
#' @return The input of `x`, possibly with edited values of antibiotics. Or, if `verbose = TRUE`, a [data.frame] with all original and new values of the affected bug-drug combinations.
2018-02-21 11:52:31 +01:00
#' @source
#' - EUCAST Expert Rules. Version 2.0, 2012.\cr
#' Leclercq et al. **EUCAST expert rules in antimicrobial susceptibility testing.** *Clin Microbiol Infect.* 2013;19(2):141-60. [(link)](https://doi.org/10.1111/j.1469-0691.2011.03703.x)
#' - EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes Tables. Version 3.1, 2016. [(link)](https://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Expert_Rules/Expert_rules_intrinsic_exceptional_V3.1.pdf)
#' - EUCAST Intrinsic Resistance and Unusual Phenotypes. Version 3.2, 2020. [(link)](https://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Expert_Rules/2020/Intrinsic_Resistance_and_Unusual_Phenotypes_Tables_v3.2_20200225.pdf)
#' - EUCAST Breakpoint tables for interpretation of MICs and zone diameters. Version 9.0, 2019. [(link)](https://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Breakpoint_tables/v_9.0_Breakpoint_Tables.xlsx)
#' - EUCAST Breakpoint tables for interpretation of MICs and zone diameters. Version 10.0, 2020. [(link)](https://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Breakpoint_tables/v_10.0_Breakpoint_Tables.xlsx)
2020-08-21 11:40:13 +02:00
#' @inheritSection AMR Reference data publicly available
2019-01-02 23:24:07 +01:00
#' @inheritSection AMR Read more on our website!
2018-02-21 11:52:31 +01:00
#' @examples
#' \donttest{
#' a <- data.frame(mo = c("Staphylococcus aureus",
#' "Enterococcus faecalis",
#' "Escherichia coli",
#' "Klebsiella pneumoniae",
#' "Pseudomonas aeruginosa"),
2019-05-10 16:44:59 +02:00
#' VAN = "-", # Vancomycin
#' AMX = "-", # Amoxicillin
#' COL = "-", # Colistin
#' CAZ = "-", # Ceftazidime
#' CXM = "-", # Cefuroxime
2020-09-24 00:30:11 +02:00
#' PEN = "S", # Benzylpenicillin
2019-05-10 16:44:59 +02:00
#' FOX = "S", # Cefoxitin
2018-02-22 21:37:10 +01:00
#' stringsAsFactors = FALSE)
2018-10-18 12:10:10 +02:00
#'
2018-02-22 21:37:10 +01:00
#' a
2019-05-10 16:44:59 +02:00
#' # mo VAN AMX COL CAZ CXM PEN FOX
2018-10-18 12:10:10 +02:00
#' # 1 Staphylococcus aureus - - - - - S S
#' # 2 Enterococcus faecalis - - - - - S S
#' # 3 Escherichia coli - - - - - S S
#' # 4 Klebsiella pneumoniae - - - - - S S
#' # 5 Pseudomonas aeruginosa - - - - - S S
#'
2019-02-08 16:06:54 +01:00
#'
2020-09-24 00:30:11 +02:00
#' # apply EUCAST rules: some results wil be changed
2019-02-08 16:06:54 +01:00
#' b <- eucast_rules(a)
2018-04-02 16:05:09 +02:00
#'
2018-02-22 21:37:10 +01:00
#' b
2019-05-10 16:44:59 +02:00
#' # mo VAN AMX COL CAZ CXM PEN FOX
2018-10-18 12:10:10 +02:00
#' # 1 Staphylococcus aureus - S R R S S S
#' # 2 Enterococcus faecalis - - R R R S R
#' # 3 Escherichia coli R - - - - R S
#' # 4 Klebsiella pneumoniae R R - - - R S
#' # 5 Pseudomonas aeruginosa R R - - R R R
2019-02-08 16:06:54 +01:00
#'
#'
2019-07-30 13:12:40 +02:00
#' # do not apply EUCAST rules, but rather get a data.frame
2020-09-24 00:30:11 +02:00
#' # containing all details about the transformations:
2019-02-08 16:06:54 +01:00
#' c <- eucast_rules(a, verbose = TRUE)
#' }
2019-04-05 18:47:39 +02:00
eucast_rules <- function(x,
2018-11-01 20:23:33 +01:00
col_mo = NULL,
info = interactive(),
2020-09-24 00:30:11 +02:00
rules = getOption("AMR_eucastrules", default = c("breakpoints", "expert")),
2018-11-01 20:23:33 +01:00
verbose = FALSE,
2020-09-24 00:30:11 +02:00
version_breakpoints = 10.0,
version_expertrules = 3.2,
2019-04-05 18:47:39 +02:00
...) {
meet_criteria(x, allow_class = "data.frame")
meet_criteria(col_mo, allow_class = "character", has_length = 1, is_in = colnames(x), allow_NULL = TRUE)
meet_criteria(info, allow_class = "logical", has_length = 1)
meet_criteria(rules, allow_class = "character", has_length = c(1, 2, 3, 4), is_in = c("breakpoints", "expert", "other", "all"))
meet_criteria(verbose, allow_class = "logical", has_length = 1)
meet_criteria(version_breakpoints, allow_class = "numeric", has_length = 1)
meet_criteria(version_expertrules, allow_class = "numeric", has_length = 1)
2020-09-24 12:38:13 +02:00
x_deparsed <- deparse(substitute(x))
2020-10-04 19:26:43 +02:00
if (length(x_deparsed) > 1 || !all(x_deparsed %like% "[a-z]+")) {
2020-09-24 12:38:13 +02:00
x_deparsed <- "your_data"
}
2020-02-14 19:54:13 +01:00
check_dataset_integrity()
2020-09-28 11:00:59 +02:00
version_breakpoints <- as.double(gsub("[^0-9.]+", "", version_breakpoints))
version_expertrules <- as.double(gsub("[^0-9.]+", "", version_expertrules))
2020-09-24 00:30:11 +02:00
stop_ifnot(version_breakpoints %in% as.double(names(EUCAST_VERSION_BREAKPOINTS)),
"EUCAST version ", version_breakpoints, " for clinical breakpoints not found")
stop_ifnot(version_expertrules %in% as.double(names(EUCAST_VERSION_EXPERT_RULES)),
"EUCAST version ", version_expertrules, " for expert rules/intrinsic resistance not found")
breakpoints_info <- EUCAST_VERSION_BREAKPOINTS[[which(as.double(names(EUCAST_VERSION_BREAKPOINTS)) == version_breakpoints)]]
expertrules_info <- EUCAST_VERSION_EXPERT_RULES[[which(as.double(names(EUCAST_VERSION_EXPERT_RULES)) == version_expertrules)]]
# support old setting (until AMR v1.3.0)
if (missing(rules) & !is.null(getOption("AMR.eucast_rules", default = NULL))) {
rules <- getOption("AMR.eucast_rules")
}
2020-09-24 12:38:13 +02:00
if (interactive() & verbose == TRUE & info == TRUE) {
txt <- paste0("WARNING: In Verbose mode, the eucast_rules() function does not apply rules to the data, but instead returns a data set in logbook form with extensive info about which rows and columns would be effected and in which way.",
"\n\nThis may overwrite your existing data if you use e.g.:",
"\ndata <- eucast_rules(data, verbose = TRUE)\n\nDo you want to continue?")
2020-09-24 00:30:11 +02:00
showQuestion <- import_fn("showQuestion", "rstudioapi", error_on_fail = FALSE)
2020-09-24 12:38:13 +02:00
if (!is.null(showQuestion)) {
2020-05-16 21:40:50 +02:00
q_continue <- showQuestion("Using verbose = TRUE with eucast_rules()", txt)
} else {
2020-05-16 13:05:47 +02:00
q_continue <- utils::menu(choices = c("OK", "Cancel"), graphics = FALSE, title = txt)
}
if (q_continue %in% c(FALSE, 2)) {
2020-10-27 15:56:51 +01:00
message_("Cancelled, returning original data", add_fn = font_red, as_note = FALSE)
2019-08-20 11:40:54 +02:00
return(x)
}
}
2018-11-01 20:23:33 +01:00
# try to find columns based on type
# -- mo
2019-01-15 12:45:24 +01:00
if (is.null(col_mo)) {
2020-09-24 00:30:11 +02:00
col_mo <- search_type_in_df(x = x, type = "mo", info = info)
2018-12-22 22:39:34 +01:00
}
stop_if(is.null(col_mo), "`col_mo` must be set")
decimal.mark <- getOption("OutDec")
big.mark <- ifelse(decimal.mark != ",", ",", ".")
2020-05-16 13:05:47 +02:00
formatnr <- function(x, big = big.mark, dec = decimal.mark) {
trimws(format(x, big.mark = big, decimal.mark = dec))
}
2018-10-17 17:32:34 +02:00
warned <- FALSE
2020-10-04 19:26:43 +02:00
warn_lacking_rsi_class <- character(0)
2020-09-24 00:30:11 +02:00
txt_ok <- function(n_added, n_changed, warned = FALSE) {
2019-10-11 17:21:02 +02:00
if (warned == FALSE) {
2020-09-24 00:30:11 +02:00
if (n_added + n_changed == 0) {
2020-05-16 13:05:47 +02:00
cat(font_subtle(" (no changes)\n"))
2018-10-17 17:32:34 +02:00
} else {
# opening
2020-05-16 13:05:47 +02:00
cat(font_grey(" ("))
# additions
2020-09-24 00:30:11 +02:00
if (n_added > 0) {
if (n_added == 1) {
2020-05-16 13:05:47 +02:00
cat(font_green("1 value added"))
} else {
2020-09-24 00:30:11 +02:00
cat(font_green(formatnr(n_added), "values added"))
}
}
# separator
2020-09-24 00:30:11 +02:00
if (n_added > 0 & n_changed > 0) {
2020-05-16 13:05:47 +02:00
cat(font_grey(", "))
}
# changes
2020-09-24 00:30:11 +02:00
if (n_changed > 0) {
if (n_changed == 1) {
2020-05-16 13:05:47 +02:00
cat(font_blue("1 value changed"))
} else {
2020-09-24 00:30:11 +02:00
cat(font_blue(formatnr(n_changed), "values changed"))
}
}
# closing
2020-05-16 13:05:47 +02:00
cat(font_grey(")\n"))
2018-10-17 17:32:34 +02:00
}
warned <<- FALSE
}
}
cols_ab <- get_column_abx(x = x,
2019-05-10 16:44:59 +02:00
soft_dependencies = c("AMC",
"AMP",
2020-09-24 00:30:11 +02:00
"AMX",
2019-05-10 16:44:59 +02:00
"CIP",
"ERY",
2020-09-24 00:30:11 +02:00
"FOX1",
2019-05-10 16:44:59 +02:00
"GEN",
"MFX",
"NAL",
"NOR",
"PEN",
"PIP",
"TCY",
"TIC",
2020-09-24 00:30:11 +02:00
"TOB"),
2019-05-10 16:44:59 +02:00
hard_dependencies = NULL,
2019-05-20 19:12:41 +02:00
verbose = verbose,
2020-09-24 00:30:11 +02:00
info = info,
2019-05-20 19:12:41 +02:00
...)
2019-10-11 17:21:02 +02:00
AMC <- cols_ab["AMC"]
AMK <- cols_ab["AMK"]
AMP <- cols_ab["AMP"]
AMX <- cols_ab["AMX"]
ATM <- cols_ab["ATM"]
2020-09-24 00:30:11 +02:00
AVO <- cols_ab["AVO"]
2019-10-11 17:21:02 +02:00
AZL <- cols_ab["AZL"]
AZM <- cols_ab["AZM"]
2020-09-24 00:30:11 +02:00
BAM <- cols_ab["BAM"]
BPR <- cols_ab["BPR"]
CAC <- cols_ab["CAC"]
CAT <- cols_ab["CAT"]
2019-10-11 17:21:02 +02:00
CAZ <- cols_ab["CAZ"]
2020-09-24 00:30:11 +02:00
CCV <- cols_ab["CCV"]
CDR <- cols_ab["CDR"]
CDZ <- cols_ab["CDZ"]
CEC <- cols_ab["CEC"]
2019-10-11 17:21:02 +02:00
CED <- cols_ab["CED"]
2020-09-24 00:30:11 +02:00
CEI <- cols_ab["CEI"]
CEP <- cols_ab["CEP"]
CFM <- cols_ab["CFM"]
CFM1 <- cols_ab["CFM1"]
CFP <- cols_ab["CFP"]
CFR <- cols_ab["CFR"]
CFS <- cols_ab["CFS"]
2019-10-11 17:21:02 +02:00
CHL <- cols_ab["CHL"]
2020-09-24 00:30:11 +02:00
CID <- cols_ab["CID"]
2019-10-11 17:21:02 +02:00
CIP <- cols_ab["CIP"]
CLI <- cols_ab["CLI"]
2020-09-24 00:30:11 +02:00
CLI <- cols_ab["CLI"]
2019-10-11 17:21:02 +02:00
CLR <- cols_ab["CLR"]
2020-09-24 00:30:11 +02:00
CMX <- cols_ab["CMX"]
CMZ <- cols_ab["CMZ"]
CND <- cols_ab["CND"]
2019-10-11 17:21:02 +02:00
COL <- cols_ab["COL"]
2020-09-24 00:30:11 +02:00
CPD <- cols_ab["CPD"]
CPM <- cols_ab["CPM"]
CPO <- cols_ab["CPO"]
CPR <- cols_ab["CPR"]
CPT <- cols_ab["CPT"]
CRB <- cols_ab["CRB"]
CRD <- cols_ab["CRD"]
CRN <- cols_ab["CRN"]
2019-10-11 17:21:02 +02:00
CRO <- cols_ab["CRO"]
2020-09-24 00:30:11 +02:00
CSL <- cols_ab["CSL"]
CTB <- cols_ab["CTB"]
CTF <- cols_ab["CTF"]
CTL <- cols_ab["CTL"]
CTT <- cols_ab["CTT"]
2019-10-11 17:21:02 +02:00
CTX <- cols_ab["CTX"]
2020-09-24 00:30:11 +02:00
CTZ <- cols_ab["CTZ"]
2019-10-11 17:21:02 +02:00
CXM <- cols_ab["CXM"]
2020-09-24 00:30:11 +02:00
CYC <- cols_ab["CYC"]
CZD <- cols_ab["CZD"]
2019-10-11 17:21:02 +02:00
CZO <- cols_ab["CZO"]
2020-09-24 00:30:11 +02:00
CZX <- cols_ab["CZX"]
DAL <- cols_ab["DAL"]
2019-10-11 17:21:02 +02:00
DAP <- cols_ab["DAP"]
2020-09-24 00:30:11 +02:00
DIR <- cols_ab["DIR"]
DIT <- cols_ab["DIT"]
DIZ <- cols_ab["DIZ"]
DKB <- cols_ab["DKB"]
DOR <- cols_ab["DOR"]
2019-10-11 17:21:02 +02:00
DOX <- cols_ab["DOX"]
2020-09-24 00:30:11 +02:00
ENX <- cols_ab["ENX"]
EPC <- cols_ab["EPC"]
2019-10-11 17:21:02 +02:00
ERY <- cols_ab["ERY"]
ETP <- cols_ab["ETP"]
FEP <- cols_ab["FEP"]
FLC <- cols_ab["FLC"]
2020-09-24 00:30:11 +02:00
FLE <- cols_ab["FLE"]
FLR1 <- cols_ab["FLR1"]
2019-10-11 17:21:02 +02:00
FOS <- cols_ab["FOS"]
FOX <- cols_ab["FOX"]
2020-09-24 00:30:11 +02:00
FOX1 <- cols_ab["FOX1"]
2019-10-11 17:21:02 +02:00
FUS <- cols_ab["FUS"]
2020-09-24 00:30:11 +02:00
GAT <- cols_ab["GAT"]
GEM <- cols_ab["GEM"]
2019-10-11 17:21:02 +02:00
GEN <- cols_ab["GEN"]
2020-09-24 00:30:11 +02:00
GRX <- cols_ab["GRX"]
HAP <- cols_ab["HAP"]
HET <- cols_ab["HET"]
2019-10-11 17:21:02 +02:00
IPM <- cols_ab["IPM"]
2020-09-24 00:30:11 +02:00
ISE <- cols_ab["ISE"]
JOS <- cols_ab["JOS"]
2019-10-11 17:21:02 +02:00
KAN <- cols_ab["KAN"]
2020-09-24 00:30:11 +02:00
LEX <- cols_ab["LEX"]
2019-10-11 17:21:02 +02:00
LIN <- cols_ab["LIN"]
LNZ <- cols_ab["LNZ"]
2020-09-24 00:30:11 +02:00
LOM <- cols_ab["LOM"]
LOR <- cols_ab["LOR"]
LTM <- cols_ab["LTM"]
2019-10-11 17:21:02 +02:00
LVX <- cols_ab["LVX"]
2020-09-24 00:30:11 +02:00
MAN <- cols_ab["MAN"]
MCM <- cols_ab["MCM"]
MEC <- cols_ab["MEC"]
2019-10-11 17:21:02 +02:00
MEM <- cols_ab["MEM"]
2020-09-24 00:30:11 +02:00
MEV <- cols_ab["MEV"]
2019-10-11 17:21:02 +02:00
MEZ <- cols_ab["MEZ"]
MFX <- cols_ab["MFX"]
2020-09-24 00:30:11 +02:00
MID <- cols_ab["MID"]
2019-10-11 17:21:02 +02:00
MNO <- cols_ab["MNO"]
2020-09-24 00:30:11 +02:00
MTM <- cols_ab["MTM"]
2019-10-11 17:21:02 +02:00
NAL <- cols_ab["NAL"]
NEO <- cols_ab["NEO"]
NET <- cols_ab["NET"]
NIT <- cols_ab["NIT"]
NOR <- cols_ab["NOR"]
NOV <- cols_ab["NOV"]
2020-09-24 00:30:11 +02:00
NVA <- cols_ab["NVA"]
2019-10-11 17:21:02 +02:00
OFX <- cols_ab["OFX"]
2020-09-24 00:30:11 +02:00
OLE <- cols_ab["OLE"]
ORI <- cols_ab["ORI"]
2019-10-11 17:21:02 +02:00
OXA <- cols_ab["OXA"]
2020-09-24 00:30:11 +02:00
PAZ <- cols_ab["PAZ"]
PEF <- cols_ab["PEF"]
2019-10-11 17:21:02 +02:00
PEN <- cols_ab["PEN"]
2020-09-24 00:30:11 +02:00
PHN <- cols_ab["PHN"]
2019-10-11 17:21:02 +02:00
PIP <- cols_ab["PIP"]
PLB <- cols_ab["PLB"]
2020-09-24 00:30:11 +02:00
PME <- cols_ab["PME"]
2019-10-11 17:21:02 +02:00
PRI <- cols_ab["PRI"]
2020-09-24 00:30:11 +02:00
PRL <- cols_ab["PRL"]
PRU <- cols_ab["PRU"]
PVM <- cols_ab["PVM"]
QDA <- cols_ab["QDA"]
2019-10-11 17:21:02 +02:00
QDA <- cols_ab["QDA"]
2020-09-24 00:30:11 +02:00
RAM <- cols_ab["RAM"]
RFL <- cols_ab["RFL"]
2019-10-11 17:21:02 +02:00
RID <- cols_ab["RID"]
RIF <- cols_ab["RIF"]
2020-09-24 00:30:11 +02:00
ROK <- cols_ab["ROK"]
RST <- cols_ab["RST"]
2019-10-11 17:21:02 +02:00
RXT <- cols_ab["RXT"]
2020-05-27 16:37:49 +02:00
SAM <- cols_ab["SAM"]
2020-09-24 00:30:11 +02:00
SBC <- cols_ab["SBC"]
SDI <- cols_ab["SDI"]
SDM <- cols_ab["SDM"]
2019-10-11 17:21:02 +02:00
SIS <- cols_ab["SIS"]
2020-09-24 00:30:11 +02:00
SLF <- cols_ab["SLF"]
SLF1 <- cols_ab["SLF1"]
SLF10 <- cols_ab["SLF10"]
SLF11 <- cols_ab["SLF11"]
SLF12 <- cols_ab["SLF12"]
SLF13 <- cols_ab["SLF13"]
SLF2 <- cols_ab["SLF2"]
SLF3 <- cols_ab["SLF3"]
SLF4 <- cols_ab["SLF4"]
SLF5 <- cols_ab["SLF5"]
SLF6 <- cols_ab["SLF6"]
SLF7 <- cols_ab["SLF7"]
SLF8 <- cols_ab["SLF8"]
SLF9 <- cols_ab["SLF9"]
SLT1 <- cols_ab["SLT1"]
SLT2 <- cols_ab["SLT2"]
SLT3 <- cols_ab["SLT3"]
SLT4 <- cols_ab["SLT4"]
SLT5 <- cols_ab["SLT5"]
SMX <- cols_ab["SMX"]
SPI <- cols_ab["SPI"]
SPX <- cols_ab["SPX"]
STR <- cols_ab["STR"]
STR1 <- cols_ab["STR1"]
SUD <- cols_ab["SUD"]
SUT <- cols_ab["SUT"]
2019-10-11 17:21:02 +02:00
SXT <- cols_ab["SXT"]
2020-09-24 00:30:11 +02:00
SZO <- cols_ab["SZO"]
TAL <- cols_ab["TAL"]
TCC <- cols_ab["TCC"]
TCM <- cols_ab["TCM"]
2019-10-11 17:21:02 +02:00
TCY <- cols_ab["TCY"]
TEC <- cols_ab["TEC"]
2020-09-24 00:30:11 +02:00
TEM <- cols_ab["TEM"]
2019-10-11 17:21:02 +02:00
TGC <- cols_ab["TGC"]
2020-09-24 00:30:11 +02:00
THA <- cols_ab["THA"]
2019-10-11 17:21:02 +02:00
TIC <- cols_ab["TIC"]
2020-09-24 00:30:11 +02:00
TLT <- cols_ab["TLT"]
TLV <- cols_ab["TLV"]
2019-10-11 17:21:02 +02:00
TMP <- cols_ab["TMP"]
2020-09-24 00:30:11 +02:00
TMX <- cols_ab["TMX"]
2019-10-11 17:21:02 +02:00
TOB <- cols_ab["TOB"]
2020-09-24 00:30:11 +02:00
TRL <- cols_ab["TRL"]
TVA <- cols_ab["TVA"]
TZD <- cols_ab["TZD"]
2019-10-11 17:21:02 +02:00
TZP <- cols_ab["TZP"]
VAN <- cols_ab["VAN"]
2019-04-05 18:47:39 +02:00
ab_missing <- function(ab) {
all(ab %in% c(NULL, NA))
}
2019-11-15 15:25:03 +01:00
if (ab_missing(AMP) & !ab_missing(AMX)) {
# ampicillin column is missing, but amoxicillin is available
2020-09-24 00:30:11 +02:00
if (info == TRUE) {
2020-10-27 15:56:51 +01:00
message_("Using column `", font_bold(AMX), "` as input for ampicillin since many EUCAST rules depend on it.")
2020-09-24 00:30:11 +02:00
}
2019-05-10 16:44:59 +02:00
AMP <- AMX
2018-10-17 17:32:34 +02:00
}
2020-09-24 00:30:11 +02:00
# data preparation ----
if (info == TRUE & NROW(x) > 10000) {
2020-10-27 15:56:51 +01:00
message_("Preparing data...", appendLF = FALSE, as_note = FALSE)
2020-09-24 00:30:11 +02:00
}
2020-10-04 19:26:43 +02:00
2019-10-11 17:21:02 +02:00
# nolint start
2020-08-26 11:33:54 +02:00
# antibiotic classes ----
2020-09-24 00:30:11 +02:00
aminoglycosides <- c(AMK, DKB, GEN, ISE, KAN, NEO, NET, RST, SIS, STR, STR1, TOB)
2019-05-10 16:44:59 +02:00
aminopenicillins <- c(AMP, AMX)
2020-09-24 00:30:11 +02:00
carbapenems <- c(DOR, ETP, IPM, MEM, MEV)
cephalosporins <- c(CDZ, CAC, CEC, CFR, RID, MAN, CTZ, CZD, CZO, CDR, DIT, FEP, CAT, CFM, CMX, CMZ, DIZ, CID, CFP, CSL, CND, CTX, CTT, CTF, FOX, CPM, CPO, CPD, CPR, CRD, CFS, CPT, CAZ, CCV, CTL, CTB, CZX, BPR, CFM1, CEI, CRO, CXM, LEX, CEP, HAP, CED, LTM, LOR)
cephalosporins_1st <- c(CAC, CFR, RID, CTZ, CZD, CZO, CRD, CTL, LEX, CEP, HAP, CED)
cephalosporins_2nd <- c(CEC, MAN, CMZ, CID, CND, CTT, CTF, FOX, CPR, CXM, LOR)
cephalosporins_except_CAZ <- cephalosporins[cephalosporins != ifelse(is.null(CAZ), "", CAZ)]
2020-09-24 00:30:11 +02:00
fluoroquinolones <- c(CIP, ENX, FLE, GAT, GEM, GRX, LVX, LOM, MFX, NOR, OFX, PAZ, PEF, PRU, RFL, SPX, TMX, TVA)
glycopeptides <- c(AVO, NVA, RAM, TEC, TCM, VAN) # dalba/orita/tela are in lipoglycopeptides
lincosamides <- c(CLI, LIN, PRL)
lipoglycopeptides <- c(DAL, ORI, TLV)
macrolides <- c(AZM, CLR, DIR, ERY, FLR1, JOS, MID, MCM, OLE, ROK, RXT, SPI, TLT, TRL)
oxazolidinones <- c(CYC, LNZ, THA, TZD)
polymyxins <- c(PLB, COL)
streptogramins <- c(QDA, PRI)
tetracyclines <- c(DOX, MNO, TCY) # since EUCAST v3.1 tigecycline (TGC) is set apart
2019-05-10 16:44:59 +02:00
ureidopenicillins <- c(PIP, TZP, AZL, MEZ)
all_betalactams <- c(aminopenicillins, cephalosporins, carbapenems, ureidopenicillins, AMC, OXA, FLC, PEN)
2019-10-11 17:21:02 +02:00
# nolint end
2020-09-24 00:30:11 +02:00
# Some helper functions ---------------------------------------------------
2019-04-05 18:47:39 +02:00
get_antibiotic_columns <- function(x, df) {
x <- trimws(unlist(strsplit(x, ",", fixed = TRUE)))
y <- character(0)
2019-10-11 17:21:02 +02:00
for (i in seq_len(length(x))) {
2019-08-08 15:52:07 +02:00
if (is.function(get(x[i]))) {
stop("Column ", x[i], " is also a function. Please create an issue on github.com/msberends/AMR/issues.")
}
2019-04-05 18:47:39 +02:00
y <- c(y, tryCatch(get(x[i]), error = function(e) ""))
2018-10-18 12:10:10 +02:00
}
2019-04-05 18:47:39 +02:00
y[y != "" & y %in% colnames(df)]
}
2020-09-24 00:30:11 +02:00
markup_italics_where_needed <- function(x) {
# returns names found in family, genus or species as italics
if (!has_colour()) {
return(x)
}
x <- unlist(strsplit(x, " "))
ind <- gsub("[)(:]", "", x) %in% c(MO_lookup[which(MO_lookup$rank %in% c("family", "genus")), ]$fullname,
MO_lookup[which(MO_lookup$rank == "species"), ]$species)
x[ind] <- font_italic(x[ind], collapse = NULL)
paste(x, collapse = " ")
}
get_antibiotic_names <- function(x) {
x <- x %pm>%
strsplit(",") %pm>%
unlist() %pm>%
trimws() %pm>%
sapply(function(x) if (x %in% antibiotics$ab) ab_name(x, language = NULL, tolower = TRUE) else x) %pm>%
sort() %pm>%
paste(collapse = ", ")
x <- gsub("_", " ", x, fixed = TRUE)
x <- gsub("except CAZ", paste("except", ab_name("CAZ", language = NULL, tolower = TRUE)), x, fixed = TRUE)
2020-09-24 12:38:13 +02:00
x <- gsub("cephalosporins (1st|2nd|3rd|4th|5th)", "cephalosporins (\\1 gen.)", x)
x
}
format_antibiotic_names <- function(ab_names, ab_results) {
ab_names <- trimws(unlist(strsplit(ab_names, ",")))
ab_results <- trimws(unlist(strsplit(ab_results, ",")))
if (length(ab_results) == 1) {
if (length(ab_names) == 1) {
# like FOX S
x <- paste(ab_names, "is")
} else if (length(ab_names) == 2) {
# like PEN,FOX S
x <- paste(paste0(ab_names, collapse = " and "), "are both")
} else {
# like PEN,FOX,GEN S (although dependency on > 2 ABx does not exist at the moment)
x <- paste(paste0(ab_names, collapse = " and "), "are all")
}
return(paste0(x, " '", ab_results, "'"))
} else {
if (length(ab_names) == 2) {
# like PEN,FOX S,R
paste0(ab_names[1], " is '", ab_results[1], "' and ",
ab_names[2], " is '", ab_results[2], "'")
} else {
# like PEN,FOX,GEN S,R,R (although dependency on > 2 ABx does not exist at the moment)
paste0(ab_names[1], " is '", ab_results[1], "' and ",
ab_names[2], " is '", ab_results[2], "' and ",
ab_names[3], " is '", ab_results[3], "'")
}
}
}
2020-09-24 00:30:11 +02:00
as.rsi_no_warning <- function(x) {
if (is.rsi(x)) {
return(x)
}
suppressWarnings(as.rsi(x))
}
2020-09-29 10:40:25 +02:00
# Preparing the data ------------------------------------------------------
2020-09-24 00:30:11 +02:00
verbose_info <- data.frame(rowid = character(0),
col = character(0),
mo_fullname = character(0),
old = as.rsi(character(0)),
new = as.rsi(character(0)),
rule = character(0),
rule_group = character(0),
rule_name = character(0),
rule_source = character(0),
stringsAsFactors = FALSE)
old_cols <- colnames(x)
old_attributes <- attributes(x)
x <- as.data.frame(x, stringsAsFactors = FALSE) # no tibbles, data.tables, etc.
rownames(x) <- NULL # will later be restored with old_attributes
# create unique row IDs - combination of the MO and all ABx columns (so they will only run once per unique combination)
2020-11-10 16:35:56 +01:00
x$`.rowid` <- sapply(as.list(as.data.frame(t(x[, c(col_mo, cols_ab), drop = FALSE]),
stringsAsFactors = FALSE)),
function(x) {
x[is.na(x)] <- "."
paste0(x, collapse = "")
})
2020-09-24 00:30:11 +02:00
# save original table, with the new .rowid column
x.bak <- x
# keep only unique rows for MO and ABx
x <- x %pm>%
pm_arrange(`.rowid`) %pm>%
# big speed gain! only analyse unique rows:
pm_distinct(`.rowid`, .keep_all = TRUE) %pm>%
as.data.frame(stringsAsFactors = FALSE)
x[, col_mo] <- as.mo(as.character(x[, col_mo, drop = TRUE]))
2020-09-24 00:30:11 +02:00
x <- x %pm>%
left_join_microorganisms(by = col_mo, suffix = c("_oldcols", ""))
x$gramstain <- mo_gramstain(x[, col_mo, drop = TRUE], language = NULL)
x$genus_species <- paste(x$genus, x$species)
if (info == TRUE & NROW(x) > 10000) {
message_(" OK.", add_fn = list(font_green, font_bold), as_note = FALSE)
2020-09-24 00:30:11 +02:00
}
if (any(x$genus == "Staphylococcus", na.rm = TRUE)) {
all_staph <- MO_lookup[which(MO_lookup$genus == "Staphylococcus"), ]
all_staph$CNS_CPS <- suppressWarnings(mo_name(all_staph$mo, Becker = "all", language = NULL))
}
if (any(x$genus == "Streptococcus", na.rm = TRUE)) {
all_strep <- MO_lookup[which(MO_lookup$genus == "Streptococcus"), ]
all_strep$Lancefield <- suppressWarnings(mo_name(all_strep$mo, Lancefield = TRUE, language = NULL))
}
n_added <- 0
n_changed <- 0
2020-05-27 16:37:49 +02:00
# Other rules: enzyme inhibitors ------------------------------------------
if (any(c("all", "other") %in% rules)) {
if (info == TRUE) {
cat(font_bold(paste0("\nRules by this AMR package (",
font_red(paste0("v", utils::packageVersion("AMR"), ", ",
format(utils::packageDate("AMR"), "%Y"))), "), see ?eucast_rules\n")))
2020-05-27 16:37:49 +02:00
}
ab_enzyme <- subset(antibiotics, name %like% "/")[, c("ab", "name")]
ab_enzyme$base_name <- gsub("^([a-zA-Z0-9]+).*", "\\1", ab_enzyme$name)
ab_enzyme$base_ab <- as.ab(ab_enzyme$base_name)
for (i in seq_len(nrow(ab_enzyme))) {
if (all(c(ab_enzyme[i, ]$ab, ab_enzyme[i, ]$base_ab) %in% names(cols_ab), na.rm = TRUE)) {
ab_name_base <- ab_name(cols_ab[ab_enzyme[i, ]$base_ab], language = NULL, tolower = TRUE)
ab_name_enzyme <- ab_name(cols_ab[ab_enzyme[i, ]$ab], language = NULL, tolower = TRUE)
# Set base to R where base + enzyme inhibitor is R
rule_current <- paste0("Set ", ab_name_base, " (", cols_ab[ab_enzyme[i, ]$base_ab], ") = R where ",
ab_name_enzyme, " (", cols_ab[ab_enzyme[i, ]$ab], ") = R")
2020-10-04 19:26:43 +02:00
cat(rule_current)
2020-09-24 00:30:11 +02:00
run_changes <- edit_rsi(x = x,
col_mo = col_mo,
to = "R",
rule = c(rule_current, "Other rules", "", paste0("Non-EUCAST: AMR package v", utils::packageVersion("AMR"))),
2020-05-27 16:37:49 +02:00
rows = which(as.rsi_no_warning(x[, cols_ab[ab_enzyme[i, ]$ab]]) == "R"),
2020-09-24 00:30:11 +02:00
cols = cols_ab[ab_enzyme[i, ]$base_ab],
last_verbose_info = verbose_info,
original_data = x.bak,
warned = warned,
info = info)
n_added <- n_added + run_changes$added
n_changed <- n_changed + run_changes$changed
verbose_info <- run_changes$verbose_info
x <- run_changes$output
2020-10-04 19:26:43 +02:00
warn_lacking_rsi_class <- c(warn_lacking_rsi_class, run_changes$rsi_warn)
2020-05-27 16:37:49 +02:00
# Print number of new changes
if (info == TRUE) {
# print only on last one of rules in this group
2020-09-24 00:30:11 +02:00
txt_ok(n_added = n_added, n_changed = n_changed, warned = warned)
2020-05-27 16:37:49 +02:00
# and reset counters
2020-09-24 00:30:11 +02:00
n_added <- 0
n_changed <- 0
2020-05-27 16:37:49 +02:00
}
# Set base + enzyme inhibitor to S where base is S
rule_current <- paste0("Set ", ab_name_enzyme, " (", cols_ab[ab_enzyme[i, ]$ab], ") = S where ",
ab_name_base, " (", cols_ab[ab_enzyme[i, ]$base_ab], ") = S")
if (info == TRUE) {
cat(rule_current)
}
2020-09-24 00:30:11 +02:00
run_changes <- edit_rsi(x = x,
col_mo = col_mo,
to = "S",
rule = c(rule_current, "Other rules", "", paste0("Non-EUCAST: AMR package v", utils::packageVersion("AMR"))),
2020-05-27 16:37:49 +02:00
rows = which(as.rsi_no_warning(x[, cols_ab[ab_enzyme[i, ]$base_ab]]) == "S"),
2020-09-24 00:30:11 +02:00
cols = cols_ab[ab_enzyme[i, ]$ab],
last_verbose_info = verbose_info,
original_data = x.bak,
warned = warned,
info = info)
n_added <- n_added + run_changes$added
n_changed <- n_changed + run_changes$changed
verbose_info <- run_changes$verbose_info
x <- run_changes$output
2020-10-04 19:26:43 +02:00
warn_lacking_rsi_class <- c(warn_lacking_rsi_class, run_changes$rsi_warn)
2020-05-27 16:37:49 +02:00
# Print number of new changes
if (info == TRUE) {
# print only on last one of rules in this group
2020-09-24 00:30:11 +02:00
txt_ok(n_added = n_added, n_changed = n_changed, warned = warned)
2020-05-27 16:37:49 +02:00
# and reset counters
2020-09-24 00:30:11 +02:00
n_added <- 0
n_changed <- 0
2020-05-27 16:37:49 +02:00
}
}
}
} else {
if (info == TRUE) {
2020-11-10 16:35:56 +01:00
message_("\n\nSkipping inheritance rules defined by this package, such as setting trimethoprim (TMP) = R where trimethoprim/sulfamethoxazole (SXT) = R.",
as_note = FALSE,
add_fn = font_red)
message_("Use eucast_rules(..., rules = \"all\") to also apply those rules.",
as_note = FALSE,
add_fn = font_red)
2020-05-27 16:37:49 +02:00
}
2020-05-20 12:00:17 +02:00
}
2020-05-27 16:37:49 +02:00
# Official EUCAST rules ---------------------------------------------------
2019-11-15 15:25:03 +01:00
eucast_notification_shown <- FALSE
if (!is.null(list(...)$eucast_rules_df)) {
2020-09-24 00:30:11 +02:00
# this allows: eucast_rules(x, eucast_rules_df = AMR:::eucast_rules_file %pm>% filter(is.na(have_these_values)))
eucast_rules_df <- list(...)$eucast_rules_df
} else {
# otherwise internal data file, created in data-raw/internals.R
eucast_rules_df <- eucast_rules_file
}
2020-09-24 00:30:11 +02:00
# filter on user-set guideline versions ----
if (any(c("all", "breakpoints") %in% rules)) {
eucast_rules_df <- subset(eucast_rules_df,
!reference.rule_group %like% "breakpoint" |
2020-10-04 19:26:43 +02:00
(reference.rule_group %like% "breakpoint" & reference.version == version_breakpoints))
2020-09-24 00:30:11 +02:00
}
if (any(c("all", "expert") %in% rules)) {
eucast_rules_df <- subset(eucast_rules_df,
!reference.rule_group %like% "expert" |
(reference.rule_group %like% "expert" & reference.version == version_expertrules))
}
2019-10-11 17:21:02 +02:00
for (i in seq_len(nrow(eucast_rules_df))) {
2020-09-24 00:30:11 +02:00
rule_previous <- eucast_rules_df[max(1, i - 1), "reference.rule", drop = TRUE]
rule_current <- eucast_rules_df[i, "reference.rule", drop = TRUE]
rule_next <- eucast_rules_df[min(nrow(eucast_rules_df), i + 1), "reference.rule", drop = TRUE]
rule_group_previous <- eucast_rules_df[max(1, i - 1), "reference.rule_group", drop = TRUE]
rule_group_current <- eucast_rules_df[i, "reference.rule_group", drop = TRUE]
if (isFALSE(info) | isFALSE(verbose)) {
rule_text <- ""
2019-04-05 18:47:39 +02:00
} else {
2020-09-24 00:30:11 +02:00
if (is.na(eucast_rules_df[i, "and_these_antibiotics", drop = TRUE])) {
rule_text <- paste0("always report as '", eucast_rules_df[i, "to_value", drop = TRUE], "': ", get_antibiotic_names(eucast_rules_df[i, "then_change_these_antibiotics", drop = TRUE]))
} else {
rule_text <- paste0("report as '", eucast_rules_df[i, "to_value", drop = TRUE], "' when ",
format_antibiotic_names(ab_names = get_antibiotic_names(eucast_rules_df[i, "and_these_antibiotics", drop = TRUE]),
ab_results = eucast_rules_df[i, "have_these_values", drop = TRUE]), ": ",
get_antibiotic_names(eucast_rules_df[i, "then_change_these_antibiotics", drop = TRUE]))
}
2018-10-18 12:10:10 +02:00
}
2019-04-05 18:47:39 +02:00
if (i == 1) {
rule_previous <- ""
rule_group_previous <- ""
2018-10-18 12:10:10 +02:00
}
2019-04-05 18:47:39 +02:00
if (i == nrow(eucast_rules_df)) {
rule_next <- ""
2018-10-18 12:10:10 +02:00
}
2019-04-05 18:47:39 +02:00
# don't apply rules if user doesn't want to apply them
if (rule_group_current %like% "breakpoint" & !any(c("all", "breakpoints") %in% rules)) {
next
2018-10-18 12:10:10 +02:00
}
2019-04-05 18:47:39 +02:00
if (rule_group_current %like% "expert" & !any(c("all", "expert") %in% rules)) {
next
2018-10-18 12:10:10 +02:00
}
2018-10-18 12:10:10 +02:00
if (info == TRUE) {
2020-09-24 00:30:11 +02:00
# Print EUCAST intro ------------------------------------------------------
if (!rule_group_current %like% "other" & eucast_notification_shown == FALSE) {
cat(paste0("\n", font_grey(strrep("-", 0.95 * options()$width)),
"\nRules by the ", font_bold("European Committee on Antimicrobial Susceptibility Testing (EUCAST)"),
"\n", font_blue("https://eucast.org/"), "\n"))
eucast_notification_shown <- TRUE
}
2019-04-05 18:47:39 +02:00
# Print rule (group) ------------------------------------------------------
if (rule_group_current != rule_group_previous) {
# is new rule group, one of Breakpoints, Expert Rules and Other
2020-05-16 13:05:47 +02:00
cat(font_bold(
ifelse(
rule_group_current %like% "breakpoint",
2020-09-24 00:30:11 +02:00
paste0("\n", breakpoints_info$title, " (",
font_red(paste0(breakpoints_info$version_txt, ", ", breakpoints_info$year)), ")\n"),
2020-05-16 13:05:47 +02:00
ifelse(
rule_group_current %like% "expert",
2020-09-24 00:30:11 +02:00
paste0("\n", expertrules_info$title, " (",
font_red(paste0(expertrules_info$version_txt, ", ", expertrules_info$year)), ")\n"),
2020-05-27 16:37:49 +02:00
""))))
2019-04-05 18:47:39 +02:00
}
# Print rule -------------------------------------------------------------
if (rule_current != rule_previous) {
# is new rule within group, print its name
2020-11-10 16:35:56 +01:00
cat(markup_italics_where_needed(word_wrap(rule_current,
width = getOption("width") - 30,
extra_indent = 4)))
2019-04-05 18:47:39 +02:00
warned <- FALSE
}
2018-10-18 12:10:10 +02:00
}
2019-04-05 18:47:39 +02:00
# Get rule from file ------------------------------------------------------
2020-09-24 00:30:11 +02:00
if_mo_property <- trimws(eucast_rules_df[i, "if_mo_property", drop = TRUE])
like_is_one_of <- trimws(eucast_rules_df[i, "like.is.one_of", drop = TRUE])
mo_value <- trimws(eucast_rules_df[i, "this_value", drop = TRUE])
# be sure to comprise all coagulase-negative/-positive staphylococci when they are mentioned
2020-09-24 00:30:11 +02:00
if (mo_value %like% "coagulase" && any(x$genus == "Staphylococcus", na.rm = TRUE)) {
if (mo_value %like% "negative") {
eucast_rules_df[i, "this_value"] <- paste0("^(", paste0(all_staph[which(all_staph$CNS_CPS %like% "negative"),
"fullname",
drop = TRUE],
collapse = "|"),
")$")
2019-04-05 18:47:39 +02:00
} else {
2020-09-24 00:30:11 +02:00
eucast_rules_df[i, "this_value"] <- paste0("^(", paste0(all_staph[which(all_staph$CNS_CPS %like% "positive"),
"fullname",
drop = TRUE],
collapse = "|"),
")$")
2019-04-05 18:47:39 +02:00
}
like_is_one_of <- "like"
}
2020-09-24 00:30:11 +02:00
# be sure to comprise all beta-haemolytic Streptococci (Lancefield groups A, B, C and G) when they are mentioned
if (mo_value %like% "group [ABCG]" && any(x$genus == "Streptococcus", na.rm = TRUE)) {
eucast_rules_df[i, "this_value"] <- paste0("^(", paste0(all_strep[which(all_strep$Lancefield %like% "group [ABCG]"),
"fullname",
drop = TRUE],
collapse = "|"),
")$")
like_is_one_of <- "like"
}
2019-04-05 18:47:39 +02:00
if (like_is_one_of == "is") {
2020-05-18 11:09:02 +02:00
# so e.g. 'Enterococcus' will turn into '^Enterococcus$'
2020-09-24 00:30:11 +02:00
mo_value <- paste0("^", mo_value, "$")
2019-04-05 18:47:39 +02:00
} else if (like_is_one_of == "one_of") {
2019-10-11 17:21:02 +02:00
# so 'Clostridium, Actinomyces, ...' will turn into '^(Clostridium|Actinomyces|...)$'
2019-04-05 18:47:39 +02:00
mo_value <- paste0("^(",
2020-09-24 00:30:11 +02:00
paste(trimws(unlist(strsplit(mo_value, ",", fixed = TRUE))),
2019-04-05 18:47:39 +02:00
collapse = "|"),
")$")
2020-09-24 00:30:11 +02:00
} else if (like_is_one_of != "like") {
2019-11-15 15:25:03 +01:00
stop("invalid value for column 'like.is.one_of'", call. = FALSE)
2018-10-18 12:10:10 +02:00
}
2020-09-24 00:30:11 +02:00
source_antibiotics <- eucast_rules_df[i, "and_these_antibiotics", drop = TRUE]
source_value <- trimws(unlist(strsplit(eucast_rules_df[i, "have_these_values", drop = TRUE], ",", fixed = TRUE)))
target_antibiotics <- eucast_rules_df[i, "then_change_these_antibiotics", drop = TRUE]
target_value <- eucast_rules_df[i, "to_value", drop = TRUE]
2019-04-05 18:47:39 +02:00
if (is.na(source_antibiotics)) {
2020-09-24 00:30:11 +02:00
rows <- tryCatch(which(x[, if_mo_property, drop = TRUE] %like_perl% mo_value),
2019-04-05 18:47:39 +02:00
error = function(e) integer(0))
} else {
2019-05-23 16:58:59 +02:00
source_antibiotics <- get_antibiotic_columns(source_antibiotics, x)
2019-04-05 18:47:39 +02:00
if (length(source_value) == 1 & length(source_antibiotics) > 1) {
source_value <- rep(source_value, length(source_antibiotics))
}
if (length(source_antibiotics) == 0) {
rows <- integer(0)
} else if (length(source_antibiotics) == 1) {
2020-09-24 00:30:11 +02:00
rows <- tryCatch(which(x[, if_mo_property, drop = TRUE] %like_perl% mo_value
2020-05-27 16:37:49 +02:00
& as.rsi_no_warning(x[, source_antibiotics[1L]]) == source_value[1L]),
2019-04-05 18:47:39 +02:00
error = function(e) integer(0))
} else if (length(source_antibiotics) == 2) {
2020-09-24 00:30:11 +02:00
rows <- tryCatch(which(x[, if_mo_property, drop = TRUE] %like_perl% mo_value
2020-05-27 16:37:49 +02:00
& as.rsi_no_warning(x[, source_antibiotics[1L]]) == source_value[1L]
& as.rsi_no_warning(x[, source_antibiotics[2L]]) == source_value[2L]),
2019-04-05 18:47:39 +02:00
error = function(e) integer(0))
} else if (length(source_antibiotics) == 3) {
2020-09-24 00:30:11 +02:00
rows <- tryCatch(which(x[, if_mo_property, drop = TRUE] %like_perl% mo_value
2020-05-27 16:37:49 +02:00
& as.rsi_no_warning(x[, source_antibiotics[1L]]) == source_value[1L]
& as.rsi_no_warning(x[, source_antibiotics[2L]]) == source_value[2L]
& as.rsi_no_warning(x[, source_antibiotics[3L]]) == source_value[3L]),
2019-04-05 18:47:39 +02:00
error = function(e) integer(0))
} else {
2020-09-24 00:30:11 +02:00
stop_("only 3 antibiotics supported for source_antibiotics")
2019-04-05 18:47:39 +02:00
}
2018-10-18 12:10:10 +02:00
}
2019-05-23 16:58:59 +02:00
cols <- get_antibiotic_columns(target_antibiotics, x)
2020-10-04 19:26:43 +02:00
2019-04-05 18:47:39 +02:00
# Apply rule on data ------------------------------------------------------
# this will return the unique number of changes
2020-09-24 00:30:11 +02:00
run_changes <- edit_rsi(x = x,
col_mo = col_mo,
to = target_value,
rule = c(rule_text, rule_group_current, rule_current,
ifelse(rule_group_current %like% "breakpoint",
paste0(breakpoints_info$title, " ", breakpoints_info$version_txt, ", ", breakpoints_info$year),
paste0(expertrules_info$title, " ", expertrules_info$version_txt, ", ", expertrules_info$year))),
rows = rows,
2020-09-24 00:30:11 +02:00
cols = cols,
last_verbose_info = verbose_info,
original_data = x.bak,
warned = warned,
info = info)
n_added <- n_added + run_changes$added
n_changed <- n_changed + run_changes$changed
verbose_info <- run_changes$verbose_info
x <- run_changes$output
2020-10-04 19:26:43 +02:00
warn_lacking_rsi_class <- c(warn_lacking_rsi_class, run_changes$rsi_warn)
2019-04-05 18:47:39 +02:00
# Print number of new changes ---------------------------------------------
if (info == TRUE & rule_next != rule_current) {
# print only on last one of rules in this group
2020-09-24 00:30:11 +02:00
txt_ok(n_added = n_added, n_changed = n_changed, warned = warned)
# and reset counters
2020-09-24 00:30:11 +02:00
n_added <- 0
n_changed <- 0
2018-11-01 17:06:08 +01:00
}
2019-04-05 18:47:39 +02:00
}
2019-04-05 18:47:39 +02:00
# Print overview ----------------------------------------------------------
2020-10-04 19:26:43 +02:00
if (info == TRUE | verbose == TRUE) {
2020-09-29 10:40:25 +02:00
verbose_info <- x.bak %pm>%
pm_mutate(row = pm_row_number()) %pm>%
pm_select(`.rowid`, row) %pm>%
pm_right_join(verbose_info,
by = c(".rowid" = "rowid")) %pm>%
pm_select(-`.rowid`) %pm>%
2020-09-24 12:38:13 +02:00
pm_select(row, pm_everything()) %pm>%
2020-09-24 00:30:11 +02:00
pm_filter(!is.na(new)) %pm>%
pm_arrange(row, rule_group, rule_name, col)
2020-09-29 10:40:25 +02:00
rownames(verbose_info) <- NULL
2020-10-04 19:26:43 +02:00
}
if (info == TRUE) {
2020-09-24 00:30:11 +02:00
2018-10-19 00:57:10 +02:00
if (verbose == TRUE) {
wouldve <- "would have "
} else {
wouldve <- ""
}
2020-09-24 00:30:11 +02:00
cat(paste0("\n", font_grey(strrep("-", 0.95 * options()$width)), "\n"))
2020-11-10 16:35:56 +01:00
cat(word_wrap(paste0("The rules ", paste0(wouldve, "affected "),
font_bold(formatnr(pm_n_distinct(verbose_info$row)),
"out of", formatnr(nrow(x.bak)),
"rows"),
", making a total of ",
font_bold(formatnr(nrow(verbose_info)), "edits\n"))))
2020-10-04 19:26:43 +02:00
total_n_added <- verbose_info %pm>% pm_filter(is.na(old)) %pm>% nrow()
total_n_changed <- verbose_info %pm>% pm_filter(!is.na(old)) %pm>% nrow()
# print added values
2020-09-24 00:30:11 +02:00
if (total_n_added == 0) {
2019-04-05 18:47:39 +02:00
colour <- cat # is function
} else {
2020-05-16 13:05:47 +02:00
colour <- font_green # is function
2019-02-08 16:06:54 +01:00
}
2019-04-05 18:47:39 +02:00
cat(colour(paste0("=> ", wouldve, "added ",
font_bold(formatnr(verbose_info %pm>%
pm_filter(is.na(old)) %pm>%
2020-07-13 09:17:24 +02:00
nrow()), "test results"),
2019-02-08 16:06:54 +01:00
"\n")))
2020-09-24 00:30:11 +02:00
if (total_n_added > 0) {
added_summary <- verbose_info %pm>%
pm_filter(is.na(old)) %pm>%
2020-09-24 00:30:11 +02:00
pm_count(new, name = "n")
2020-05-16 13:05:47 +02:00
cat(paste(" -",
paste0(formatnr(added_summary$n), " test result", ifelse(added_summary$n > 1, "s", ""),
2020-09-24 12:38:13 +02:00
" added as ", paste0('"', added_summary$new, '"')), collapse = "\n"))
2019-04-05 18:47:39 +02:00
}
2020-09-24 00:30:11 +02:00
# print changed values
if (total_n_changed == 0) {
2019-04-05 18:47:39 +02:00
colour <- cat # is function
} else {
2020-05-16 13:05:47 +02:00
colour <- font_blue # is function
2019-04-05 18:47:39 +02:00
}
2020-09-24 00:30:11 +02:00
if (total_n_added + total_n_changed > 0) {
cat("\n")
}
cat(colour(paste0("=> ", wouldve, "changed ",
font_bold(formatnr(verbose_info %pm>%
pm_filter(!is.na(old)) %pm>%
2020-07-13 09:17:24 +02:00
nrow()), "test results"),
2019-02-08 16:06:54 +01:00
"\n")))
2020-09-24 00:30:11 +02:00
if (total_n_changed > 0) {
changed_summary <- verbose_info %pm>%
pm_filter(!is.na(old)) %pm>%
2020-09-24 00:30:11 +02:00
pm_count(old, new, name = "n")
2020-05-16 13:05:47 +02:00
cat(paste(" -",
paste0(formatnr(changed_summary$n), " test result", ifelse(changed_summary$n > 1, "s", ""), " changed from ",
2020-09-24 12:38:13 +02:00
paste0('"', changed_summary$old, '"'), " to ", paste0('"', changed_summary$new, '"')), collapse = "\n"))
2019-04-05 18:47:39 +02:00
cat("\n")
}
2020-09-24 00:30:11 +02:00
cat(paste0(font_grey(strrep("-", 0.95 * options()$width)), "\n"))
if (verbose == FALSE & total_n_added + total_n_changed > 0) {
2020-11-10 16:35:56 +01:00
cat("\n", word_wrap("Use ", font_bold("eucast_rules(..., verbose = TRUE)"), " (on your original data) to get a data.frame with all specified edits instead."), "\n\n", sep = "")
} else if (verbose == TRUE) {
2020-11-10 16:35:56 +01:00
cat("\n", word_wrap("Used 'Verbose mode' (", font_bold("verbose = TRUE"), "), which returns a data.frame with all specified edits.\nUse ", font_bold("verbose = FALSE"), " to apply the rules on your data."), "\n\n", sep = "")
2019-03-28 21:33:28 +01:00
}
2018-10-17 17:32:34 +02:00
}
2020-10-04 19:26:43 +02:00
if (length(warn_lacking_rsi_class) > 0) {
warn_lacking_rsi_class <- unique(warn_lacking_rsi_class)
2020-11-10 16:35:56 +01:00
warning_("Not all columns with antimicrobial results are of class <rsi>. Transform them on beforehand, with e.g.:\n",
" ", x_deparsed, " %>% mutate_if(is.rsi.eligible, as.rsi)\n",
" ", x_deparsed, " %>% as.rsi(", ifelse(length(warn_lacking_rsi_class) == 1,
warn_lacking_rsi_class,
paste0(warn_lacking_rsi_class[1], ":", warn_lacking_rsi_class[length(warn_lacking_rsi_class)])),
")",
call = FALSE)
2020-05-27 16:37:49 +02:00
}
2019-04-05 18:47:39 +02:00
# Return data set ---------------------------------------------------------
2018-10-19 00:17:03 +02:00
if (verbose == TRUE) {
2019-04-05 18:47:39 +02:00
verbose_info
} else {
2020-09-24 00:30:11 +02:00
# x was analysed with only unique rows, so join everything together again
x <- x[, c(cols_ab, ".rowid"), drop = FALSE]
x.bak <- x.bak[, setdiff(colnames(x.bak), cols_ab), drop = FALSE]
x.bak <- x.bak %pm>%
pm_left_join(x, by = ".rowid")
x.bak <- x.bak[, old_cols, drop = FALSE]
# reset original attributes
2020-09-24 00:30:11 +02:00
attributes(x.bak) <- old_attributes
x.bak
}
}
# helper function for editing the table ----
edit_rsi <- function(x,
col_mo,
to,
rule,
rows,
cols,
last_verbose_info,
original_data,
warned,
info) {
cols <- unique(cols[!is.na(cols) & !is.null(cols)])
# for Verbose Mode, keep track of all changes and return them
track_changes <- list(added = 0,
changed = 0,
output = x,
verbose_info = last_verbose_info,
2020-10-04 19:26:43 +02:00
rsi_warn = character(0))
2020-09-24 00:30:11 +02:00
txt_error <- function() {
if (info == TRUE) cat("", font_red_bg(font_white(" ERROR ")), "\n\n")
}
txt_warning <- function() {
if (warned == FALSE) {
if (info == TRUE) cat("", font_yellow_bg(font_black(" WARNING ")))
}
warned <<- TRUE
}
if (length(rows) > 0 & length(cols) > 0) {
new_edits <- x
if (any(!sapply(x[, cols, drop = FALSE], is.rsi), na.rm = TRUE)) {
2020-10-04 19:26:43 +02:00
track_changes$rsi_warn <- cols[!sapply(x[, cols, drop = FALSE], is.rsi)]
2020-09-24 00:30:11 +02:00
}
tryCatch(
# insert into original table
new_edits[rows, cols] <- to,
warning = function(w) {
if (w$message %like% "invalid factor level") {
xyz <- sapply(cols, function(col) {
2020-11-10 16:35:56 +01:00
new_edits[, col] <<- factor(x = as.character(pm_pull(new_edits, col)),
levels = unique(c(to, levels(pm_pull(new_edits, col)))))
2020-09-24 00:30:11 +02:00
invisible()
})
2020-11-10 16:35:56 +01:00
suppressWarnings(new_edits[rows, cols] <<- to)
warning_('Value "', to, '" added to the factor levels of column(s) `', paste(cols, collapse = "`, `"), "` because this value was not an existing factor level. A better way is to use as.rsi() on beforehand on antimicrobial columns to guarantee the right structure.", call = FALSE)
2020-09-24 00:30:11 +02:00
txt_warning()
warned <- FALSE
} else {
2020-11-10 16:35:56 +01:00
warning_(w$message, call = FALSE)
2020-09-24 00:30:11 +02:00
txt_warning()
cat("\n") # txt_warning() does not append a "\n" on itself
}
},
error = function(e) {
txt_error()
stop(paste0("In row(s) ", paste(rows[1:min(length(rows), 10)], collapse = ","),
ifelse(length(rows) > 10, "...", ""),
" while writing value '", to,
"' to column(s) `", paste(cols, collapse = "`, `"),
"`:\n", e$message),
call. = FALSE)
}
)
track_changes$output <- new_edits
2020-09-24 12:38:13 +02:00
if (isTRUE(info) && !isTRUE(all.equal(x, track_changes$output))) {
2020-09-24 00:30:11 +02:00
get_original_rows <- function(rowids) {
as.integer(rownames(original_data[which(original_data$.rowid %in% rowids), , drop = FALSE]))
}
for (i in seq_len(length(cols))) {
verbose_new <- data.frame(rowid = new_edits[rows, ".rowid", drop = TRUE],
col = cols[i],
mo_fullname = new_edits[rows, "fullname", drop = TRUE],
old = x[rows, cols[i], drop = TRUE],
new = to,
rule = font_stripstyle(rule[1]),
rule_group = font_stripstyle(rule[2]),
rule_name = font_stripstyle(rule[3]),
rule_source = font_stripstyle(rule[4]),
stringsAsFactors = FALSE)
colnames(verbose_new) <- c("rowid", "col", "mo_fullname", "old", "new",
"rule", "rule_group", "rule_name", "rule_source")
verbose_new <- verbose_new %pm>% pm_filter(old != new | is.na(old))
# save changes to data set 'verbose_info'
track_changes$verbose_info <- rbind(track_changes$verbose_info,
verbose_new,
stringsAsFactors = FALSE)
2020-09-24 00:30:11 +02:00
# count adds and changes
track_changes$added <- track_changes$added + verbose_new %pm>%
pm_filter(is.na(old)) %pm>%
pm_pull(rowid) %pm>%
get_original_rows() %pm>%
length()
track_changes$changed <- track_changes$changed + verbose_new %pm>%
pm_filter(!is.na(old)) %pm>%
pm_pull(rowid) %pm>%
get_original_rows() %pm>%
length()
}
}
2018-02-21 11:52:31 +01:00
}
2020-09-24 00:30:11 +02:00
return(track_changes)
2018-02-21 11:52:31 +01:00
}