Date: Mon, 20 May 2019 19:12:41 +0200
Subject: [PATCH] faster determination of abx
---
DESCRIPTION | 2 +-
R/eucast_rules.R | 3 +-
R/mdro.R | 314 +++++------
R/misc.R | 43 +-
docs/LICENSE-text.html | 2 +-
docs/articles/AMR.html | 527 +++++++++---------
.../AMR_files/figure-html/plot 1-1.png | Bin 36095 -> 36026 bytes
.../AMR_files/figure-html/plot 3-1.png | Bin 19279 -> 19260 bytes
.../AMR_files/figure-html/plot 4-1.png | Bin 69010 -> 69007 bytes
.../AMR_files/figure-html/plot 5-1.png | Bin 50773 -> 50772 bytes
docs/articles/EUCAST.html | 2 +-
docs/articles/SPSS.html | 2 +-
docs/articles/ab_property.html | 2 +-
docs/articles/benchmarks.html | 72 +--
.../figure-html/unnamed-chunk-5-1.png | Bin 27517 -> 27113 bytes
docs/articles/index.html | 2 +-
docs/articles/mo_property.html | 2 +-
docs/articles/resistance_predict.html | 2 +-
docs/authors.html | 2 +-
docs/index.html | 2 +-
docs/news/index.html | 2 +-
docs/reference/as.mo.html | 2 +-
docs/reference/index.html | 2 +-
tests/testthat/test-mdro.R | 2 +-
tests/testthat/test-misc.R | 2 +
25 files changed, 491 insertions(+), 498 deletions(-)
diff --git a/DESCRIPTION b/DESCRIPTION
index 4ba3534f..331974c1 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
Package: AMR
-Version: 0.6.1.9033
+Version: 0.6.1.9034
Date: 2019-05-20
Title: Antimicrobial Resistance Analysis
Authors@R: c(
diff --git a/R/eucast_rules.R b/R/eucast_rules.R
index 9bd9f59b..b92fae3a 100755
--- a/R/eucast_rules.R
+++ b/R/eucast_rules.R
@@ -291,7 +291,8 @@ eucast_rules <- function(x,
"SXT",
"VAN"),
hard_dependencies = NULL,
- verbose = verbose)
+ verbose = verbose,
+ ...)
AMC <- cols_ab['AMC']
AMK <- cols_ab['AMK']
diff --git a/R/mdro.R b/R/mdro.R
index eba2b7cc..175bf947 100755
--- a/R/mdro.R
+++ b/R/mdro.R
@@ -64,41 +64,41 @@ mdro <- function(x,
}
if (length(country) > 1) {
- stop('`country` must be a length one character string.', call. = FALSE)
+ stop("`country` must be a length one character string.", call. = FALSE)
}
if (is.null(country)) {
- country <- 'EUCAST'
+ country <- "EUCAST"
}
country <- trimws(country)
- if (tolower(country) != 'eucast' & !country %like% '^[a-z]{2}$') {
- stop('This is not a valid ISO 3166-1 alpha-2 country code: "', country, '". Please see ?mdro.', call. = FALSE)
+ if (tolower(country) != "eucast" & !country %like% "^[a-z]{2}$") {
+ stop("This is not a valid ISO 3166-1 alpha-2 country code: '", country, "'. Please see ?mdro.", call. = FALSE)
}
# create list and make country code case-independent
guideline <- list(country = list(code = tolower(country)))
- if (guideline$country$code == 'eucast') {
- guideline$country$name <- '(European guidelines)'
- guideline$name <- 'EUCAST Expert Rules, "Intrinsic Resistance and Exceptional Phenotypes Tables"'
- guideline$version <- 'Version 3.1'
- guideline$source <- 'http://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Expert_Rules/Expert_rules_intrinsic_exceptional_V3.1.pdf'
+ if (guideline$country$code == "eucast") {
+ guideline$country$name <- "(European guidelines)"
+ guideline$name <- "EUCAST Expert Rules, \"Intrinsic Resistance and Exceptional Phenotypes Tables\""
+ guideline$version <- "Version 3.1"
+ guideline$source <- "http://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Expert_Rules/Expert_rules_intrinsic_exceptional_V3.1.pdf"
# support per country:
- } else if (guideline$country$code == 'de') {
- guideline$country$name <- 'Germany'
- guideline$name <- ''
- guideline$version <- ''
- guideline$source <- ''
- } else if (guideline$country$code == 'nl') {
- guideline$country$name <- 'The Netherlands'
- guideline$name <- 'WIP-Richtlijn BRMO'
- guideline$version <- 'Revision as of December 2017'
- guideline$source <- 'https://www.rivm.nl/Documenten_en_publicaties/Professioneel_Praktisch/Richtlijnen/Infectieziekten/WIP_Richtlijnen/WIP_Richtlijnen/Ziekenhuizen/WIP_richtlijn_BRMO_Bijzonder_Resistente_Micro_Organismen_ZKH'
+ } else if (guideline$country$code == "de") {
+ guideline$country$name <- "Germany"
+ guideline$name <- ""
+ guideline$version <- ""
+ guideline$source <- ""
+ } else if (guideline$country$code == "nl") {
+ guideline$country$name <- "The Netherlands"
+ guideline$name <- "WIP-Richtlijn BRMO"
+ guideline$version <- "Revision as of December 2017"
+ guideline$source <- "https://www.rivm.nl/Documenten_en_publicaties/Professioneel_Praktisch/Richtlijnen/Infectieziekten/WIP_Richtlijnen/WIP_Richtlijnen/Ziekenhuizen/WIP_richtlijn_BRMO_Bijzonder_Resistente_Micro_Organismen_ZKH"
# add here more countries like this:
- # } else if (country$code == 'xx') {
- # country$name <- 'country name'
+ # } else if (country$code == "xx") {
+ # country$name <- "country name"
} else {
- stop('This country code is currently unsupported: ', guideline$country$code, call. = FALSE)
+ stop("This country code is currently unsupported: ", guideline$country$code, call. = FALSE)
}
if (info == TRUE) {
@@ -110,71 +110,71 @@ mdro <- function(x,
}
- cols_ab <- get_column_abx(x = x, verbose = verbose)
+ cols_ab <- get_column_abx(x = x, verbose = verbose, ...)
- AMC <- cols_ab['AMC']
- AMK <- cols_ab['AMK']
- AMP <- cols_ab['AMP']
- AMX <- cols_ab['AMX']
- ATM <- cols_ab['ATM']
- AZL <- cols_ab['AZL']
- AZM <- cols_ab['AZM']
- CAZ <- cols_ab['CAZ']
- CED <- cols_ab['CED']
- CHL <- cols_ab['CHL']
- CIP <- cols_ab['CIP']
- CLI <- cols_ab['CLI']
- CLR <- cols_ab['CLR']
- COL <- cols_ab['COL']
- CRO <- cols_ab['CRO']
- CTX <- cols_ab['CTX']
- CXM <- cols_ab['CXM']
- CZO <- cols_ab['CZO']
- DAP <- cols_ab['DAP']
- DOX <- cols_ab['DOX']
- ERY <- cols_ab['ERY']
- ETP <- cols_ab['ETP']
- FEP <- cols_ab['FEP']
- FLC <- cols_ab['FLC']
- FOS <- cols_ab['FOS']
- FOX <- cols_ab['FOX']
- FUS <- cols_ab['FUS']
- GEN <- cols_ab['GEN']
- IPM <- cols_ab['IPM']
- KAN <- cols_ab['KAN']
- LIN <- cols_ab['LIN']
- LNZ <- cols_ab['LNZ']
- LVX <- cols_ab['LVX']
- MEM <- cols_ab['MEM']
- MEZ <- cols_ab['MEZ']
- MTR <- cols_ab['MTR']
- MFX <- cols_ab['MFX']
- MNO <- cols_ab['MNO']
- NAL <- cols_ab['NAL']
- NEO <- cols_ab['NEO']
- NET <- cols_ab['NET']
- NIT <- cols_ab['NIT']
- NOR <- cols_ab['NOR']
- NOV <- cols_ab['NOV']
- OFX <- cols_ab['OFX']
- PEN <- cols_ab['PEN']
- PIP <- cols_ab['PIP']
- PLB <- cols_ab['PLB']
- PRI <- cols_ab['PRI']
- QDA <- cols_ab['QDA']
- RID <- cols_ab['RID']
- RIF <- cols_ab['RIF']
- RXT <- cols_ab['RXT']
- SIS <- cols_ab['SIS']
- SXT <- cols_ab['SXT']
- TCY <- cols_ab['TCY']
- TEC <- cols_ab['TEC']
- TGC <- cols_ab['TGC']
- TIC <- cols_ab['TIC']
- TMP <- cols_ab['TMP']
- TOB <- cols_ab['TOB']
- TZP <- cols_ab['TZP']
- VAN <- cols_ab['VAN']
+ AMC <- cols_ab["AMC"]
+ AMK <- cols_ab["AMK"]
+ AMP <- cols_ab["AMP"]
+ AMX <- cols_ab["AMX"]
+ ATM <- cols_ab["ATM"]
+ AZL <- cols_ab["AZL"]
+ AZM <- cols_ab["AZM"]
+ CAZ <- cols_ab["CAZ"]
+ CED <- cols_ab["CED"]
+ CHL <- cols_ab["CHL"]
+ CIP <- cols_ab["CIP"]
+ CLI <- cols_ab["CLI"]
+ CLR <- cols_ab["CLR"]
+ COL <- cols_ab["COL"]
+ CRO <- cols_ab["CRO"]
+ CTX <- cols_ab["CTX"]
+ CXM <- cols_ab["CXM"]
+ CZO <- cols_ab["CZO"]
+ DAP <- cols_ab["DAP"]
+ DOX <- cols_ab["DOX"]
+ ERY <- cols_ab["ERY"]
+ ETP <- cols_ab["ETP"]
+ FEP <- cols_ab["FEP"]
+ FLC <- cols_ab["FLC"]
+ FOS <- cols_ab["FOS"]
+ FOX <- cols_ab["FOX"]
+ FUS <- cols_ab["FUS"]
+ GEN <- cols_ab["GEN"]
+ IPM <- cols_ab["IPM"]
+ KAN <- cols_ab["KAN"]
+ LIN <- cols_ab["LIN"]
+ LNZ <- cols_ab["LNZ"]
+ LVX <- cols_ab["LVX"]
+ MEM <- cols_ab["MEM"]
+ MEZ <- cols_ab["MEZ"]
+ MTR <- cols_ab["MTR"]
+ MFX <- cols_ab["MFX"]
+ MNO <- cols_ab["MNO"]
+ NAL <- cols_ab["NAL"]
+ NEO <- cols_ab["NEO"]
+ NET <- cols_ab["NET"]
+ NIT <- cols_ab["NIT"]
+ NOR <- cols_ab["NOR"]
+ NOV <- cols_ab["NOV"]
+ OFX <- cols_ab["OFX"]
+ PEN <- cols_ab["PEN"]
+ PIP <- cols_ab["PIP"]
+ PLB <- cols_ab["PLB"]
+ PRI <- cols_ab["PRI"]
+ QDA <- cols_ab["QDA"]
+ RID <- cols_ab["RID"]
+ RIF <- cols_ab["RIF"]
+ RXT <- cols_ab["RXT"]
+ SIS <- cols_ab["SIS"]
+ SXT <- cols_ab["SXT"]
+ TCY <- cols_ab["TCY"]
+ TEC <- cols_ab["TEC"]
+ TGC <- cols_ab["TGC"]
+ TIC <- cols_ab["TIC"]
+ TMP <- cols_ab["TMP"]
+ TOB <- cols_ab["TOB"]
+ TZP <- cols_ab["TZP"]
+ VAN <- cols_ab["VAN"]
ab_missing <- function(ab) {
@@ -194,15 +194,15 @@ mdro <- function(x,
cols <- cols[!is.na(cols)]
if (length(rows) > 0 & length(cols) > 0) {
if (any_all == "any") {
- col_filter <- which(tbl_[, cols] == 'R')
+ row_filter <- which(tbl_[, cols] == "R")
} else if (any_all == "all") {
- col_filter <- tbl_ %>%
+ row_filter <- tbl_ %>%
mutate(index = 1:nrow(.)) %>%
filter_at(vars(cols), all_vars(. == "R")) %>%
pull((index))
}
- rows <- rows[rows %in% col_filter]
- tbl_[rows, 'MDRO'] <<- to
+ rows <- rows[rows %in% row_filter]
+ tbl_[rows, "MDRO"] <<- to
}
}
@@ -213,105 +213,117 @@ mdro <- function(x,
# add unconfirmed to where genus is available
mutate(MDRO = ifelse(!is.na(genus), 1, NA_integer_))
- if (guideline$country$code == 'eucast') {
+ if (guideline$country$code == "eucast") {
# EUCAST ------------------------------------------------------------------
# Table 5
trans_tbl(3,
- which(tbl_$family == 'Enterobacteriaceae'
- | tbl_$fullname %like% '^Pseudomonas aeruginosa'
- | tbl_$genus == 'Acinetobacter'),
+ which(tbl_$family == "Enterobacteriaceae"
+ | tbl_$fullname %like% "^Pseudomonas aeruginosa"
+ | tbl_$genus == "Acinetobacter"),
COL,
"all")
trans_tbl(3,
- which(tbl_$fullname %like% '^Salmonella Typhi'),
+ which(tbl_$fullname %like% "^Salmonella Typhi"),
c(carbapenems, fluoroquinolones),
"any")
trans_tbl(3,
- which(tbl_$fullname %like% '^Haemophilus influenzae'),
+ which(tbl_$fullname %like% "^Haemophilus influenzae"),
c(cephalosporins_3rd, carbapenems, fluoroquinolones),
"any")
trans_tbl(3,
- which(tbl_$fullname %like% '^Moraxella catarrhalis'),
+ which(tbl_$fullname %like% "^Moraxella catarrhalis"),
c(cephalosporins_3rd, fluoroquinolones),
"any")
trans_tbl(3,
- which(tbl_$fullname %like% '^Neisseria meningitidis'),
+ which(tbl_$fullname %like% "^Neisseria meningitidis"),
c(cephalosporins_3rd, fluoroquinolones),
"any")
trans_tbl(3,
- which(tbl_$fullname %like% '^Neisseria gonorrhoeae'),
+ which(tbl_$fullname %like% "^Neisseria gonorrhoeae"),
AZM,
"any")
# Table 6
trans_tbl(3,
- which(tbl_$fullname %like% '^Staphylococcus (aureus|epidermidis|coagulase negatief|hominis|haemolyticus|intermedius|pseudointermedius)'),
+ which(tbl_$fullname %like% "^Staphylococcus (aureus|epidermidis|coagulase negatief|hominis|haemolyticus|intermedius|pseudointermedius)"),
c(VAN, TEC, DAP, LNZ, QDA, TGC),
"any")
trans_tbl(3,
- which(tbl_$genus == 'Corynebacterium'),
+ which(tbl_$genus == "Corynebacterium"),
c(VAN, TEC, DAP, LNZ, QDA, TGC),
"any")
trans_tbl(3,
- which(tbl_$fullname %like% '^Streptococcus pneumoniae'),
+ which(tbl_$fullname %like% "^Streptococcus pneumoniae"),
c(carbapenems, VAN, TEC, DAP, LNZ, QDA, TGC, RIF),
"any")
trans_tbl(3, # Sr. groups A/B/C/G
- which(tbl_$fullname %like% '^Streptococcus (pyogenes|agalactiae|equisimilis|equi|zooepidemicus|dysgalactiae|anginosus)'),
+ which(tbl_$fullname %like% "^Streptococcus (pyogenes|agalactiae|equisimilis|equi|zooepidemicus|dysgalactiae|anginosus)"),
c(PEN, cephalosporins, VAN, TEC, DAP, LNZ, QDA, TGC),
"any")
trans_tbl(3,
- which(tbl_$genus == 'Enterococcus'),
+ which(tbl_$genus == "Enterococcus"),
c(DAP, LNZ, TGC, TEC),
"any")
trans_tbl(3,
- which(tbl_$fullname %like% '^Enterococcus faecalis'),
+ which(tbl_$fullname %like% "^Enterococcus faecalis"),
c(AMP, AMX),
"any")
# Table 7
trans_tbl(3,
- which(tbl_$genus == 'Bacteroides'),
+ which(tbl_$genus == "Bacteroides"),
MTR,
"any")
trans_tbl(3,
- which(tbl_$fullname %like% '^Clostridium difficile'),
- c( MTR, VAN),
+ which(tbl_$fullname %like% "^Clostridium difficile"),
+ c(MTR, VAN),
"any")
}
- if (guideline$country$code == 'de') {
+ if (guideline$country$code == "de") {
# Germany -----------------------------------------------------------------
stop("We are still working on German guidelines in this beta version.", call. = FALSE)
}
- if (guideline$country$code == 'nl') {
+ if (guideline$country$code == "nl") {
# Netherlands -------------------------------------------------------------
- aminoglycosides <- aminoglycosides[!ab_missing(aminoglycosides)]
- fluoroquinolones <- fluoroquinolones[!ab_missing(fluoroquinolones)]
- carbapenems <- carbapenems[!ab_missing(carbapenems)]
+ aminoglycosides <- aminoglycosides[!is.na(aminoglycosides)]
+ fluoroquinolones <- fluoroquinolones[!is.na(fluoroquinolones)]
+ carbapenems <- carbapenems[!is.na(carbapenems)]
+ amino <- AMX %or% AMP
+ third <- CAZ %or% CTX
+ ESBLs <- c(amino, third)
+ ESBLs <- ESBLs[!is.na(ESBLs)]
+ if (length(ESBLs) != 2) {
+ ESBLs <- character(0)
+ }
# Table 1
trans_tbl(3,
- which(tbl_$family == 'Enterobacteriaceae'),
+ which(tbl_$family == "Enterobacteriaceae"),
c(aminoglycosides, fluoroquinolones),
"all")
trans_tbl(2,
- which(tbl_$family == 'Enterobacteriaceae'),
- c(carbapenems),
+ which(tbl_$family == "Enterobacteriaceae"),
+ carbapenems,
"any")
+ trans_tbl(2,
+ which(tbl_$family == "Enterobacteriaceae"),
+ ESBLs,
+ "all")
+
# Table 2
trans_tbl(2,
- which(tbl_$genus == 'Acinetobacter'),
+ which(tbl_$genus == "Acinetobacter"),
c(carbapenems),
"any")
trans_tbl(3,
- which(tbl_$genus == 'Acinetobacter'),
+ which(tbl_$genus == "Acinetobacter"),
c(aminoglycosides, fluoroquinolones),
"all")
trans_tbl(3,
- which(tbl_$fullname %like% '^Stenotrophomonas maltophilia'),
+ which(tbl_$fullname %like% "^Stenotrophomonas maltophilia"),
SXT,
"all")
@@ -330,28 +342,28 @@ mdro <- function(x,
tbl_$psae <- 0
}
tbl_[which(
- tbl_$fullname %like% 'Pseudomonas aeruginosa'
+ tbl_$fullname %like% "Pseudomonas aeruginosa"
& tbl_$psae >= 3
- ), 'MDRO'] <- 3
+ ), "MDRO"] <- 3
# Table 3
trans_tbl(3,
- which(tbl_$fullname %like% 'Streptococcus pneumoniae'),
+ which(tbl_$fullname %like% "Streptococcus pneumoniae"),
PEN,
"all")
trans_tbl(3,
- which(tbl_$fullname %like% 'Streptococcus pneumoniae'),
+ which(tbl_$fullname %like% "Streptococcus pneumoniae"),
VAN,
"all")
trans_tbl(3,
- which(tbl_$fullname %like% 'Enterococcus faecium'),
+ which(tbl_$fullname %like% "Enterococcus faecium"),
c(PEN, VAN),
"all")
}
factor(x = tbl_$MDRO,
levels = 1:3,
- labels = c('Negative', 'Positive, unconfirmed', 'Positive'),
+ labels = c("Negative", "Positive, unconfirmed", "Positive"),
ordered = TRUE)
}
@@ -372,55 +384,3 @@ mrgn <- function(x, country = "de", ...) {
eucast_exceptional_phenotypes <- function(x, country = "EUCAST", ...) {
mdro(x = x, country = "EUCAST", ...)
}
-
-# is_ESBL <- function(x, col_mo = NULL, ...) {
-# get_ab_col <- function(columns, ab) {
-# columns[names(columns) == ab]
-# }
-# col_mo <- get_column_mo(tbl = x, col_mo = col_mo)
-# cols_ab <- get_column_abx(tbl = x,
-# soft_dependencies = c("AMX", "AMP"),
-# hard_dependencies = c("CAZ"),
-# ...)
-#
-# if (!any(c("AMX", "AMP") %in% names(cols_ab))) {
-# # both ampicillin and amoxicillin are missing
-# generate_warning_abs_missing(c("AMX", "AMP"), any = TRUE)
-# return(rep(NA, nrow(x)))
-# }
-#
-# ESBLs <- rep(NA, nrow(x))
-#
-# # first make all eligible cases FALSE
-# ESBLs[which(mo_family(x[, col_mo]) == "Enterobacteriaceae"
-# & x[, get_ab_col(cols_ab, "AMX")] %in% c("R", "I", "S")
-# & x[, get_ab_col(cols_ab, "AMX")] %in% c("R", "I", "S")
-# & x[, get_ab_col(cols_ab, "AMX")] %in% c("R", "I", "S")
-# )] <- FALSE
-# # now make the positives cases TRUE
-# ESBLs[which(!is.na(ESBLs)
-# & x[, get_ab_col(cols_ab, "AMX")] == "R"
-# & x[, get_ab_col(cols_ab, "CAZ")] == "R")] <- TRUE
-# ESBLs
-#
-# }
-#
-# is_3MRGN <- function(x, ...) {
-#
-# }
-#
-# is_4MRGN <- function(x, ...) {
-#
-# }
-
-get_column_mo <- function(tbl, col_mo = NULL) {
- # throws a blue note about which column will be used if guessed
- if (is.null(col_mo)) {
- col_mo <- search_type_in_df(tbl = tbl, type = "mo")
- }
- if (is.null(col_mo)) {
- stop("`col_mo` must be set.", call. = FALSE)
- }
- col_mo
-}
-
diff --git a/R/misc.R b/R/misc.R
index 2739c5b1..8c4eb0f8 100755
--- a/R/misc.R
+++ b/R/misc.R
@@ -157,23 +157,56 @@ search_type_in_df <- function(tbl, type) {
get_column_abx <- function(x,
soft_dependencies = NULL,
hard_dependencies = NULL,
- verbose = FALSE) {
+ verbose = FALSE,
+ ...) {
+ # determine from given data set
df_trans <- data.frame(colnames = colnames(x),
abcode = suppressWarnings(as.ab(colnames(x))))
df_trans <- df_trans[!is.na(df_trans$abcode),]
x <- as.character(df_trans$colnames)
names(x) <- df_trans$abcode
+
+ # add from self-defined dots (...):
+ # get_column_abx(septic_patients %>% rename(thisone = AMX), amox = "thisone")
+ dots <- list(...)
+ if (length(dots) > 0) {
+ dots <- unlist(dots)
+ newnames <- suppressWarnings(as.ab(names(dots)))
+ if (any(is.na(newnames))) {
+ warning("Invalid antibiotic reference(s): ", toString(names(dots)[is.na(newnames)]),
+ call. = FALSE, immediate. = TRUE)
+ }
+ names(dots) <- newnames
+ dots <- dots[!is.na(names(dots))]
+ # merge, but overwrite automatically determined ones by 'dots'
+ x <- c(x[!x %in% dots & !names(x) %in% names(dots)], dots)
+ }
+
# sort on name
x <- x[sort(names(x))]
+ duplies <- x[base::duplicated(x)]
if (verbose == TRUE) {
for (i in 1:length(x)) {
- message(blue(paste0("NOTE: Using column `", bold(x[i]), "` as input for ", names(x)[i],
- " (", ab_name(names(x)[i], language = "en", tolower = TRUE), ").")))
+ if (x[i] %in% duplies) {
+ message(red(paste0("NOTE: Using column `", bold(x[i]), "` as input for ", names(x)[i],
+ " (", ab_name(names(x)[i], language = "en", tolower = TRUE), ") [DUPLICATED USE].")))
+ } else {
+ message(blue(paste0("NOTE: Using column `", bold(x[i]), "` as input for ", names(x)[i],
+ " (", ab_name(names(x)[i], language = "en", tolower = TRUE), ").")))
+ }
}
}
+ if (n_distinct(x) != length(x)) {
+ msg_txt <- paste("Column(s)", paste0("'", duplies, "'", collapse = "'"), "used for more than one antibiotic.")
+ if (verbose == FALSE) {
+ msg_txt <- paste(msg_txt, "Use verbose = TRUE to see which antibiotics are used by which columns.")
+ }
+ stop(msg_txt, call. = FALSE)
+ }
+
if (!is.null(hard_dependencies)) {
if (!all(hard_dependencies %in% names(x))) {
# missing a hard dependency will return NA and consequently the data will not be analysed
@@ -275,3 +308,7 @@ t <- function(from, language = get_locale()) {
base::enc2utf8(from)
}
+
+"%or%" <- function(x, y) {
+ ifelse(!is.na(x), x, ifelse(!is.na(y), y, NA))
+}
diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html
index 545bc27f..f314972c 100644
--- a/docs/LICENSE-text.html
+++ b/docs/LICENSE-text.html
@@ -78,7 +78,7 @@
AMR (for R)
- 0.6.1.9033
+ 0.6.1.9034
diff --git a/docs/articles/AMR.html b/docs/articles/AMR.html
index 6c353812..43dbe901 100644
--- a/docs/articles/AMR.html
+++ b/docs/articles/AMR.html
@@ -40,7 +40,7 @@
AMR (for R)
- 0.6.1.9003
+ 0.6.1.9034
@@ -327,20 +327,9 @@
-2014-11-27 |
-T8 |
-Hospital B |
-Escherichia coli |
-R |
-S |
-R |
-S |
-F |
-
-
-2016-12-03 |
-R9 |
-Hospital B |
+2013-08-26 |
+Q7 |
+Hospital A |
Staphylococcus aureus |
S |
S |
@@ -348,42 +337,20 @@
S |
F |
-
-2013-11-11 |
-M6 |
-Hospital D |
-Staphylococcus aureus |
-S |
-S |
-S |
-S |
-M |
-
-2014-03-17 |
-O9 |
-Hospital B |
+2012-10-04 |
+O2 |
+Hospital A |
Escherichia coli |
-S |
R |
S |
S |
+S |
F |
-2012-10-08 |
-R4 |
-Hospital C |
-Streptococcus pneumoniae |
-S |
-S |
-S |
-S |
-F |
-
-
-2015-10-15 |
-C5 |
+2012-10-23 |
+C4 |
Hospital C |
Escherichia coli |
S |
@@ -392,6 +359,39 @@
S |
M |
+
+2011-08-17 |
+R4 |
+Hospital B |
+Escherichia coli |
+R |
+S |
+R |
+S |
+F |
+
+
+2016-01-18 |
+L2 |
+Hospital B |
+Escherichia coli |
+S |
+S |
+S |
+S |
+M |
+
+
+2011-09-06 |
+O1 |
+Hospital A |
+Escherichia coli |
+R |
+S |
+S |
+S |
+F |
+
Now, let’s start the cleaning and the analysis!
@@ -411,8 +411,8 @@
#
# Item Count Percent Cum. Count Cum. Percent
# --- ----- ------- -------- ----------- -------------
-# 1 M 10,370 51.8% 10,370 51.8%
-# 2 F 9,630 48.2% 20,000 100.0%
+# 1 M 10,378 51.9% 10,378 51.9%
+# 2 F 9,622 48.1% 20,000 100.0%
So, we can draw at least two conclusions immediately. From a data scientist perspective, the data looks clean: only values M
and F
. From a researcher perspective: there are slightly more men. Nothing we didn’t already know.
The data is already quite clean, but we still need to transform some variables. The bacteria
column now consists of text, and we want to add more variables based on microbial IDs later on. So, we will transform this column to valid IDs. The mutate()
function of the dplyr
package makes this really easy:
data <- data %>%
@@ -442,14 +442,14 @@
# Pasteurella multocida (no new changes)
# Staphylococcus (no new changes)
# Streptococcus groups A, B, C, G (no new changes)
-# Streptococcus pneumoniae (1414 new changes)
+# Streptococcus pneumoniae (1443 new changes)
# Viridans group streptococci (no new changes)
#
# EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes (v3.1, 2016)
-# Table 01: Intrinsic resistance in Enterobacteriaceae (1367 new changes)
+# Table 01: Intrinsic resistance in Enterobacteriaceae (1303 new changes)
# Table 02: Intrinsic resistance in non-fermentative Gram-negative bacteria (no new changes)
# Table 03: Intrinsic resistance in other Gram-negative bacteria (no new changes)
-# Table 04: Intrinsic resistance in Gram-positive bacteria (2649 new changes)
+# Table 04: Intrinsic resistance in Gram-positive bacteria (2746 new changes)
# Table 08: Interpretive rules for B-lactam agents and Gram-positive cocci (no new changes)
# Table 09: Interpretive rules for B-lactam agents and Gram-negative rods (no new changes)
# Table 11: Interpretive rules for macrolides, lincosamides, and streptogramins (no new changes)
@@ -457,24 +457,24 @@
# Table 13: Interpretive rules for quinolones (no new changes)
#
# Other rules
-# Non-EUCAST: amoxicillin/clav acid = S where ampicillin = S (2211 new changes)
-# Non-EUCAST: ampicillin = R where amoxicillin/clav acid = R (107 new changes)
+# Non-EUCAST: amoxicillin/clav acid = S where ampicillin = S (2313 new changes)
+# Non-EUCAST: ampicillin = R where amoxicillin/clav acid = R (115 new changes)
# Non-EUCAST: piperacillin = R where piperacillin/tazobactam = R (no new changes)
# Non-EUCAST: piperacillin/tazobactam = S where piperacillin = S (no new changes)
# Non-EUCAST: trimethoprim = R where trimethoprim/sulfa = R (no new changes)
# Non-EUCAST: trimethoprim/sulfa = S where trimethoprim = S (no new changes)
#
# --------------------------------------------------------------------------
-# EUCAST rules affected 6,442 out of 20,000 rows, making a total of 7,748 edits
+# EUCAST rules affected 6,579 out of 20,000 rows, making a total of 7,920 edits
# => added 0 test results
#
-# => changed 7,748 test results
-# - 94 test results changed from S to I
-# - 4,687 test results changed from S to R
-# - 1,084 test results changed from I to S
-# - 313 test results changed from I to R
-# - 1,548 test results changed from R to S
-# - 22 test results changed from R to I
+# => changed 7,920 test results
+# - 123 test results changed from S to I
+# - 4,680 test results changed from S to R
+# - 1,131 test results changed from I to S
+# - 306 test results changed from I to R
+# - 1,657 test results changed from R to S
+# - 23 test results changed from R to I
# --------------------------------------------------------------------------
#
# Use verbose = TRUE to get a data.frame with all specified edits instead.
@@ -502,8 +502,8 @@
# NOTE: Using column `bacteria` as input for `col_mo`.
# NOTE: Using column `date` as input for `col_date`.
# NOTE: Using column `patient_id` as input for `col_patient_id`.
-# => Found 5,692 first isolates (28.5% of total)
-So only 28.5% is suitable for resistance analysis! We can now filter on it with the filter()
function, also from the dplyr
package:
+# => Found 5,674 first isolates (28.4% of total)
+So only 28.4% is suitable for resistance analysis! We can now filter on it with the filter()
function, also from the dplyr
package:
For future use, the above two syntaxes can be shortened with the filter_first_isolate()
function:
@@ -529,8 +529,8 @@
1 |
-2010-01-18 |
-F5 |
+2010-03-19 |
+N8 |
B_ESCHR_COL |
S |
S |
@@ -540,10 +540,10 @@
2 |
-2010-02-13 |
-F5 |
+2010-05-06 |
+N8 |
B_ESCHR_COL |
-S |
+R |
S |
S |
S |
@@ -551,8 +551,8 @@
3 |
-2010-04-04 |
-F5 |
+2010-05-20 |
+N8 |
B_ESCHR_COL |
R |
S |
@@ -562,10 +562,10 @@
4 |
-2010-04-28 |
-F5 |
+2010-07-20 |
+N8 |
B_ESCHR_COL |
-R |
+S |
S |
S |
S |
@@ -573,21 +573,21 @@
5 |
-2010-08-22 |
-F5 |
+2010-09-04 |
+N8 |
B_ESCHR_COL |
S |
S |
-R |
+S |
S |
FALSE |
6 |
-2010-10-07 |
-F5 |
+2010-09-13 |
+N8 |
B_ESCHR_COL |
-S |
+R |
S |
S |
S |
@@ -595,30 +595,8 @@
7 |
-2010-10-15 |
-F5 |
-B_ESCHR_COL |
-R |
-R |
-R |
-S |
-FALSE |
-
-
-8 |
-2010-11-24 |
-F5 |
-B_ESCHR_COL |
-R |
-I |
-S |
-R |
-FALSE |
-
-
-9 |
-2011-05-09 |
-F5 |
+2011-04-01 |
+N8 |
B_ESCHR_COL |
S |
S |
@@ -627,14 +605,36 @@
TRUE |
-10 |
-2011-05-17 |
-F5 |
+8 |
+2011-10-15 |
+N8 |
B_ESCHR_COL |
R |
-R |
-R |
-R |
+S |
+S |
+S |
+FALSE |
+
+
+9 |
+2011-11-14 |
+N8 |
+B_ESCHR_COL |
+S |
+S |
+S |
+S |
+FALSE |
+
+
+10 |
+2012-02-11 |
+N8 |
+B_ESCHR_COL |
+S |
+S |
+S |
+S |
FALSE |
@@ -650,7 +650,7 @@
# NOTE: Using column `patient_id` as input for `col_patient_id`.
# NOTE: Using column `keyab` as input for `col_keyantibiotics`. Use col_keyantibiotics = FALSE to prevent this.
# [Criterion] Inclusion based on key antibiotics, ignoring I.
-# => Found 15,100 first weighted isolates (75.5% of total)
+# => Found 14,968 first weighted isolates (74.8% of total)
1 |
-2010-01-18 |
-F5 |
+2010-03-19 |
+N8 |
B_ESCHR_COL |
S |
S |
@@ -679,119 +679,119 @@
2 |
-2010-02-13 |
-F5 |
+2010-05-06 |
+N8 |
B_ESCHR_COL |
-S |
+R |
S |
S |
S |
FALSE |
-FALSE |
+TRUE |
3 |
-2010-04-04 |
-F5 |
+2010-05-20 |
+N8 |
B_ESCHR_COL |
R |
S |
S |
S |
FALSE |
-TRUE |
+FALSE |
4 |
-2010-04-28 |
-F5 |
-B_ESCHR_COL |
-R |
-S |
-S |
-S |
-FALSE |
-FALSE |
-
-
-5 |
-2010-08-22 |
-F5 |
+2010-07-20 |
+N8 |
B_ESCHR_COL |
S |
S |
-R |
+S |
S |
FALSE |
TRUE |
-
-6 |
-2010-10-07 |
-F5 |
+
+5 |
+2010-09-04 |
+N8 |
B_ESCHR_COL |
S |
S |
S |
S |
FALSE |
+FALSE |
+
+
+6 |
+2010-09-13 |
+N8 |
+B_ESCHR_COL |
+R |
+S |
+S |
+S |
+FALSE |
TRUE |
7 |
-2010-10-15 |
-F5 |
+2011-04-01 |
+N8 |
B_ESCHR_COL |
-R |
-R |
-R |
S |
-FALSE |
+S |
+S |
+S |
+TRUE |
TRUE |
8 |
-2010-11-24 |
-F5 |
+2011-10-15 |
+N8 |
B_ESCHR_COL |
R |
-I |
S |
-R |
+S |
+S |
FALSE |
TRUE |
9 |
-2011-05-09 |
-F5 |
+2011-11-14 |
+N8 |
B_ESCHR_COL |
S |
S |
S |
S |
-TRUE |
+FALSE |
TRUE |
10 |
-2011-05-17 |
-F5 |
+2012-02-11 |
+N8 |
B_ESCHR_COL |
-R |
-R |
-R |
-R |
+S |
+S |
+S |
+S |
+FALSE |
FALSE |
-TRUE |
-Instead of 2, now 8 isolates are flagged. In total, 75.5% of all isolates are marked ‘first weighted’ - 47% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.
+Instead of 2, now 7 isolates are flagged. In total, 74.8% of all isolates are marked ‘first weighted’ - 46.5% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.
As with filter_first_isolate()
, there’s a shortcut for this new algorithm too:
-So we end up with 15,100 isolates for analysis.
+So we end up with 14,968 isolates for analysis.
We can remove unneeded columns:
@@ -799,7 +799,6 @@
-1 |
-2014-11-27 |
-T8 |
-Hospital B |
-B_ESCHR_COL |
-R |
-S |
-R |
-S |
-F |
-Gram negative |
-Escherichia |
-coli |
-TRUE |
-
-
-4 |
-2014-03-17 |
-O9 |
-Hospital B |
-B_ESCHR_COL |
-S |
-S |
-S |
-S |
-F |
-Gram negative |
-Escherichia |
-coli |
-TRUE |
-
-
-5 |
-2012-10-08 |
-R4 |
-Hospital C |
-B_STRPT_PNE |
-S |
-S |
-S |
-R |
-F |
-Gram positive |
-Streptococcus |
-pneumoniae |
-TRUE |
-
-
-8 |
-2016-05-04 |
-V6 |
+2013-08-26 |
+Q7 |
Hospital A |
B_STPHY_AUR |
-R |
S |
-R |
-R |
+S |
+S |
+S |
F |
Gram positive |
Staphylococcus |
aureus |
TRUE |
-
-9 |
-2016-06-27 |
-Z9 |
-Hospital B |
+
+2012-10-04 |
+O2 |
+Hospital A |
B_ESCHR_COL |
R |
S |
@@ -895,20 +844,64 @@
coli |
TRUE |
-
-10 |
-2015-06-28 |
-K1 |
-Hospital D |
-B_STPHY_AUR |
+
+2012-10-23 |
+C4 |
+Hospital C |
+B_ESCHR_COL |
S |
S |
S |
S |
M |
-Gram positive |
-Staphylococcus |
-aureus |
+Gram negative |
+Escherichia |
+coli |
+TRUE |
+
+
+2011-08-17 |
+R4 |
+Hospital B |
+B_ESCHR_COL |
+R |
+S |
+R |
+S |
+F |
+Gram negative |
+Escherichia |
+coli |
+TRUE |
+
+
+2016-01-18 |
+L2 |
+Hospital B |
+B_ESCHR_COL |
+S |
+S |
+S |
+S |
+M |
+Gram negative |
+Escherichia |
+coli |
+TRUE |
+
+
+2011-09-06 |
+O1 |
+Hospital A |
+B_ESCHR_COL |
+R |
+S |
+S |
+S |
+F |
+Gram negative |
+Escherichia |
+coli |
TRUE |
@@ -928,9 +921,9 @@
Or can be used like the dplyr
way, which is easier readable:
-Frequency table of genus
and species
from a data.frame
(15,100 x 13)
+Frequency table of genus
and species
from a data.frame
(14,968 x 13)
Columns: 2
-Length: 15,100 (of which NA: 0 = 0.00%)
+Length: 14,968 (of which NA: 0 = 0.00%)
Unique: 4
Shortest: 16
Longest: 24
@@ -947,33 +940,33 @@ Longest: 24
1 |
Escherichia coli |
-7,486 |
-49.6% |
-7,486 |
-49.6% |
+7,394 |
+49.4% |
+7,394 |
+49.4% |
2 |
Staphylococcus aureus |
-3,730 |
-24.7% |
-11,216 |
-74.3% |
+3,716 |
+24.8% |
+11,110 |
+74.2% |
3 |
Streptococcus pneumoniae |
-2,257 |
-14.9% |
-13,473 |
-89.2% |
+2,291 |
+15.3% |
+13,401 |
+89.5% |
4 |
Klebsiella pneumoniae |
-1,627 |
-10.8% |
-15,100 |
+1,567 |
+10.5% |
+14,968 |
100.0% |
@@ -984,7 +977,7 @@ Longest: 24
Resistance percentages
The functions portion_S()
, portion_SI()
, portion_I()
, portion_IR()
and portion_R()
can be used to determine the portion of a specific antimicrobial outcome. As per the EUCAST guideline of 2019, we calculate resistance as the portion of R (portion_R()
) and susceptibility as the portion of S and I (portion_SI()
). These functions can be used on their own:
+# [1] 0.4659941
Or can be used in conjuction with group_by()
and summarise()
, both from the dplyr
package:
@@ -1027,23 +1020,23 @@ Longest: 24