diff --git a/DESCRIPTION b/DESCRIPTION index 4ba3534f..331974c1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: AMR -Version: 0.6.1.9033 +Version: 0.6.1.9034 Date: 2019-05-20 Title: Antimicrobial Resistance Analysis Authors@R: c( diff --git a/R/eucast_rules.R b/R/eucast_rules.R index 9bd9f59b..b92fae3a 100755 --- a/R/eucast_rules.R +++ b/R/eucast_rules.R @@ -291,7 +291,8 @@ eucast_rules <- function(x, "SXT", "VAN"), hard_dependencies = NULL, - verbose = verbose) + verbose = verbose, + ...) AMC <- cols_ab['AMC'] AMK <- cols_ab['AMK'] diff --git a/R/mdro.R b/R/mdro.R index eba2b7cc..175bf947 100755 --- a/R/mdro.R +++ b/R/mdro.R @@ -64,41 +64,41 @@ mdro <- function(x, } if (length(country) > 1) { - stop('`country` must be a length one character string.', call. = FALSE) + stop("`country` must be a length one character string.", call. = FALSE) } if (is.null(country)) { - country <- 'EUCAST' + country <- "EUCAST" } country <- trimws(country) - if (tolower(country) != 'eucast' & !country %like% '^[a-z]{2}$') { - stop('This is not a valid ISO 3166-1 alpha-2 country code: "', country, '". Please see ?mdro.', call. = FALSE) + if (tolower(country) != "eucast" & !country %like% "^[a-z]{2}$") { + stop("This is not a valid ISO 3166-1 alpha-2 country code: '", country, "'. Please see ?mdro.", call. = FALSE) } # create list and make country code case-independent guideline <- list(country = list(code = tolower(country))) - if (guideline$country$code == 'eucast') { - guideline$country$name <- '(European guidelines)' - guideline$name <- 'EUCAST Expert Rules, "Intrinsic Resistance and Exceptional Phenotypes Tables"' - guideline$version <- 'Version 3.1' - guideline$source <- 'http://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Expert_Rules/Expert_rules_intrinsic_exceptional_V3.1.pdf' + if (guideline$country$code == "eucast") { + guideline$country$name <- "(European guidelines)" + guideline$name <- "EUCAST Expert Rules, \"Intrinsic Resistance and Exceptional Phenotypes Tables\"" + guideline$version <- "Version 3.1" + guideline$source <- "http://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Expert_Rules/Expert_rules_intrinsic_exceptional_V3.1.pdf" # support per country: - } else if (guideline$country$code == 'de') { - guideline$country$name <- 'Germany' - guideline$name <- '' - guideline$version <- '' - guideline$source <- '' - } else if (guideline$country$code == 'nl') { - guideline$country$name <- 'The Netherlands' - guideline$name <- 'WIP-Richtlijn BRMO' - guideline$version <- 'Revision as of December 2017' - guideline$source <- 'https://www.rivm.nl/Documenten_en_publicaties/Professioneel_Praktisch/Richtlijnen/Infectieziekten/WIP_Richtlijnen/WIP_Richtlijnen/Ziekenhuizen/WIP_richtlijn_BRMO_Bijzonder_Resistente_Micro_Organismen_ZKH' + } else if (guideline$country$code == "de") { + guideline$country$name <- "Germany" + guideline$name <- "" + guideline$version <- "" + guideline$source <- "" + } else if (guideline$country$code == "nl") { + guideline$country$name <- "The Netherlands" + guideline$name <- "WIP-Richtlijn BRMO" + guideline$version <- "Revision as of December 2017" + guideline$source <- "https://www.rivm.nl/Documenten_en_publicaties/Professioneel_Praktisch/Richtlijnen/Infectieziekten/WIP_Richtlijnen/WIP_Richtlijnen/Ziekenhuizen/WIP_richtlijn_BRMO_Bijzonder_Resistente_Micro_Organismen_ZKH" # add here more countries like this: - # } else if (country$code == 'xx') { - # country$name <- 'country name' + # } else if (country$code == "xx") { + # country$name <- "country name" } else { - stop('This country code is currently unsupported: ', guideline$country$code, call. = FALSE) + stop("This country code is currently unsupported: ", guideline$country$code, call. = FALSE) } if (info == TRUE) { @@ -110,71 +110,71 @@ mdro <- function(x, } - cols_ab <- get_column_abx(x = x, verbose = verbose) + cols_ab <- get_column_abx(x = x, verbose = verbose, ...) - AMC <- cols_ab['AMC'] - AMK <- cols_ab['AMK'] - AMP <- cols_ab['AMP'] - AMX <- cols_ab['AMX'] - ATM <- cols_ab['ATM'] - AZL <- cols_ab['AZL'] - AZM <- cols_ab['AZM'] - CAZ <- cols_ab['CAZ'] - CED <- cols_ab['CED'] - CHL <- cols_ab['CHL'] - CIP <- cols_ab['CIP'] - CLI <- cols_ab['CLI'] - CLR <- cols_ab['CLR'] - COL <- cols_ab['COL'] - CRO <- cols_ab['CRO'] - CTX <- cols_ab['CTX'] - CXM <- cols_ab['CXM'] - CZO <- cols_ab['CZO'] - DAP <- cols_ab['DAP'] - DOX <- cols_ab['DOX'] - ERY <- cols_ab['ERY'] - ETP <- cols_ab['ETP'] - FEP <- cols_ab['FEP'] - FLC <- cols_ab['FLC'] - FOS <- cols_ab['FOS'] - FOX <- cols_ab['FOX'] - FUS <- cols_ab['FUS'] - GEN <- cols_ab['GEN'] - IPM <- cols_ab['IPM'] - KAN <- cols_ab['KAN'] - LIN <- cols_ab['LIN'] - LNZ <- cols_ab['LNZ'] - LVX <- cols_ab['LVX'] - MEM <- cols_ab['MEM'] - MEZ <- cols_ab['MEZ'] - MTR <- cols_ab['MTR'] - MFX <- cols_ab['MFX'] - MNO <- cols_ab['MNO'] - NAL <- cols_ab['NAL'] - NEO <- cols_ab['NEO'] - NET <- cols_ab['NET'] - NIT <- cols_ab['NIT'] - NOR <- cols_ab['NOR'] - NOV <- cols_ab['NOV'] - OFX <- cols_ab['OFX'] - PEN <- cols_ab['PEN'] - PIP <- cols_ab['PIP'] - PLB <- cols_ab['PLB'] - PRI <- cols_ab['PRI'] - QDA <- cols_ab['QDA'] - RID <- cols_ab['RID'] - RIF <- cols_ab['RIF'] - RXT <- cols_ab['RXT'] - SIS <- cols_ab['SIS'] - SXT <- cols_ab['SXT'] - TCY <- cols_ab['TCY'] - TEC <- cols_ab['TEC'] - TGC <- cols_ab['TGC'] - TIC <- cols_ab['TIC'] - TMP <- cols_ab['TMP'] - TOB <- cols_ab['TOB'] - TZP <- cols_ab['TZP'] - VAN <- cols_ab['VAN'] + AMC <- cols_ab["AMC"] + AMK <- cols_ab["AMK"] + AMP <- cols_ab["AMP"] + AMX <- cols_ab["AMX"] + ATM <- cols_ab["ATM"] + AZL <- cols_ab["AZL"] + AZM <- cols_ab["AZM"] + CAZ <- cols_ab["CAZ"] + CED <- cols_ab["CED"] + CHL <- cols_ab["CHL"] + CIP <- cols_ab["CIP"] + CLI <- cols_ab["CLI"] + CLR <- cols_ab["CLR"] + COL <- cols_ab["COL"] + CRO <- cols_ab["CRO"] + CTX <- cols_ab["CTX"] + CXM <- cols_ab["CXM"] + CZO <- cols_ab["CZO"] + DAP <- cols_ab["DAP"] + DOX <- cols_ab["DOX"] + ERY <- cols_ab["ERY"] + ETP <- cols_ab["ETP"] + FEP <- cols_ab["FEP"] + FLC <- cols_ab["FLC"] + FOS <- cols_ab["FOS"] + FOX <- cols_ab["FOX"] + FUS <- cols_ab["FUS"] + GEN <- cols_ab["GEN"] + IPM <- cols_ab["IPM"] + KAN <- cols_ab["KAN"] + LIN <- cols_ab["LIN"] + LNZ <- cols_ab["LNZ"] + LVX <- cols_ab["LVX"] + MEM <- cols_ab["MEM"] + MEZ <- cols_ab["MEZ"] + MTR <- cols_ab["MTR"] + MFX <- cols_ab["MFX"] + MNO <- cols_ab["MNO"] + NAL <- cols_ab["NAL"] + NEO <- cols_ab["NEO"] + NET <- cols_ab["NET"] + NIT <- cols_ab["NIT"] + NOR <- cols_ab["NOR"] + NOV <- cols_ab["NOV"] + OFX <- cols_ab["OFX"] + PEN <- cols_ab["PEN"] + PIP <- cols_ab["PIP"] + PLB <- cols_ab["PLB"] + PRI <- cols_ab["PRI"] + QDA <- cols_ab["QDA"] + RID <- cols_ab["RID"] + RIF <- cols_ab["RIF"] + RXT <- cols_ab["RXT"] + SIS <- cols_ab["SIS"] + SXT <- cols_ab["SXT"] + TCY <- cols_ab["TCY"] + TEC <- cols_ab["TEC"] + TGC <- cols_ab["TGC"] + TIC <- cols_ab["TIC"] + TMP <- cols_ab["TMP"] + TOB <- cols_ab["TOB"] + TZP <- cols_ab["TZP"] + VAN <- cols_ab["VAN"] ab_missing <- function(ab) { @@ -194,15 +194,15 @@ mdro <- function(x, cols <- cols[!is.na(cols)] if (length(rows) > 0 & length(cols) > 0) { if (any_all == "any") { - col_filter <- which(tbl_[, cols] == 'R') + row_filter <- which(tbl_[, cols] == "R") } else if (any_all == "all") { - col_filter <- tbl_ %>% + row_filter <- tbl_ %>% mutate(index = 1:nrow(.)) %>% filter_at(vars(cols), all_vars(. == "R")) %>% pull((index)) } - rows <- rows[rows %in% col_filter] - tbl_[rows, 'MDRO'] <<- to + rows <- rows[rows %in% row_filter] + tbl_[rows, "MDRO"] <<- to } } @@ -213,105 +213,117 @@ mdro <- function(x, # add unconfirmed to where genus is available mutate(MDRO = ifelse(!is.na(genus), 1, NA_integer_)) - if (guideline$country$code == 'eucast') { + if (guideline$country$code == "eucast") { # EUCAST ------------------------------------------------------------------ # Table 5 trans_tbl(3, - which(tbl_$family == 'Enterobacteriaceae' - | tbl_$fullname %like% '^Pseudomonas aeruginosa' - | tbl_$genus == 'Acinetobacter'), + which(tbl_$family == "Enterobacteriaceae" + | tbl_$fullname %like% "^Pseudomonas aeruginosa" + | tbl_$genus == "Acinetobacter"), COL, "all") trans_tbl(3, - which(tbl_$fullname %like% '^Salmonella Typhi'), + which(tbl_$fullname %like% "^Salmonella Typhi"), c(carbapenems, fluoroquinolones), "any") trans_tbl(3, - which(tbl_$fullname %like% '^Haemophilus influenzae'), + which(tbl_$fullname %like% "^Haemophilus influenzae"), c(cephalosporins_3rd, carbapenems, fluoroquinolones), "any") trans_tbl(3, - which(tbl_$fullname %like% '^Moraxella catarrhalis'), + which(tbl_$fullname %like% "^Moraxella catarrhalis"), c(cephalosporins_3rd, fluoroquinolones), "any") trans_tbl(3, - which(tbl_$fullname %like% '^Neisseria meningitidis'), + which(tbl_$fullname %like% "^Neisseria meningitidis"), c(cephalosporins_3rd, fluoroquinolones), "any") trans_tbl(3, - which(tbl_$fullname %like% '^Neisseria gonorrhoeae'), + which(tbl_$fullname %like% "^Neisseria gonorrhoeae"), AZM, "any") # Table 6 trans_tbl(3, - which(tbl_$fullname %like% '^Staphylococcus (aureus|epidermidis|coagulase negatief|hominis|haemolyticus|intermedius|pseudointermedius)'), + which(tbl_$fullname %like% "^Staphylococcus (aureus|epidermidis|coagulase negatief|hominis|haemolyticus|intermedius|pseudointermedius)"), c(VAN, TEC, DAP, LNZ, QDA, TGC), "any") trans_tbl(3, - which(tbl_$genus == 'Corynebacterium'), + which(tbl_$genus == "Corynebacterium"), c(VAN, TEC, DAP, LNZ, QDA, TGC), "any") trans_tbl(3, - which(tbl_$fullname %like% '^Streptococcus pneumoniae'), + which(tbl_$fullname %like% "^Streptococcus pneumoniae"), c(carbapenems, VAN, TEC, DAP, LNZ, QDA, TGC, RIF), "any") trans_tbl(3, # Sr. groups A/B/C/G - which(tbl_$fullname %like% '^Streptococcus (pyogenes|agalactiae|equisimilis|equi|zooepidemicus|dysgalactiae|anginosus)'), + which(tbl_$fullname %like% "^Streptococcus (pyogenes|agalactiae|equisimilis|equi|zooepidemicus|dysgalactiae|anginosus)"), c(PEN, cephalosporins, VAN, TEC, DAP, LNZ, QDA, TGC), "any") trans_tbl(3, - which(tbl_$genus == 'Enterococcus'), + which(tbl_$genus == "Enterococcus"), c(DAP, LNZ, TGC, TEC), "any") trans_tbl(3, - which(tbl_$fullname %like% '^Enterococcus faecalis'), + which(tbl_$fullname %like% "^Enterococcus faecalis"), c(AMP, AMX), "any") # Table 7 trans_tbl(3, - which(tbl_$genus == 'Bacteroides'), + which(tbl_$genus == "Bacteroides"), MTR, "any") trans_tbl(3, - which(tbl_$fullname %like% '^Clostridium difficile'), - c( MTR, VAN), + which(tbl_$fullname %like% "^Clostridium difficile"), + c(MTR, VAN), "any") } - if (guideline$country$code == 'de') { + if (guideline$country$code == "de") { # Germany ----------------------------------------------------------------- stop("We are still working on German guidelines in this beta version.", call. = FALSE) } - if (guideline$country$code == 'nl') { + if (guideline$country$code == "nl") { # Netherlands ------------------------------------------------------------- - aminoglycosides <- aminoglycosides[!ab_missing(aminoglycosides)] - fluoroquinolones <- fluoroquinolones[!ab_missing(fluoroquinolones)] - carbapenems <- carbapenems[!ab_missing(carbapenems)] + aminoglycosides <- aminoglycosides[!is.na(aminoglycosides)] + fluoroquinolones <- fluoroquinolones[!is.na(fluoroquinolones)] + carbapenems <- carbapenems[!is.na(carbapenems)] + amino <- AMX %or% AMP + third <- CAZ %or% CTX + ESBLs <- c(amino, third) + ESBLs <- ESBLs[!is.na(ESBLs)] + if (length(ESBLs) != 2) { + ESBLs <- character(0) + } # Table 1 trans_tbl(3, - which(tbl_$family == 'Enterobacteriaceae'), + which(tbl_$family == "Enterobacteriaceae"), c(aminoglycosides, fluoroquinolones), "all") trans_tbl(2, - which(tbl_$family == 'Enterobacteriaceae'), - c(carbapenems), + which(tbl_$family == "Enterobacteriaceae"), + carbapenems, "any") + trans_tbl(2, + which(tbl_$family == "Enterobacteriaceae"), + ESBLs, + "all") + # Table 2 trans_tbl(2, - which(tbl_$genus == 'Acinetobacter'), + which(tbl_$genus == "Acinetobacter"), c(carbapenems), "any") trans_tbl(3, - which(tbl_$genus == 'Acinetobacter'), + which(tbl_$genus == "Acinetobacter"), c(aminoglycosides, fluoroquinolones), "all") trans_tbl(3, - which(tbl_$fullname %like% '^Stenotrophomonas maltophilia'), + which(tbl_$fullname %like% "^Stenotrophomonas maltophilia"), SXT, "all") @@ -330,28 +342,28 @@ mdro <- function(x, tbl_$psae <- 0 } tbl_[which( - tbl_$fullname %like% 'Pseudomonas aeruginosa' + tbl_$fullname %like% "Pseudomonas aeruginosa" & tbl_$psae >= 3 - ), 'MDRO'] <- 3 + ), "MDRO"] <- 3 # Table 3 trans_tbl(3, - which(tbl_$fullname %like% 'Streptococcus pneumoniae'), + which(tbl_$fullname %like% "Streptococcus pneumoniae"), PEN, "all") trans_tbl(3, - which(tbl_$fullname %like% 'Streptococcus pneumoniae'), + which(tbl_$fullname %like% "Streptococcus pneumoniae"), VAN, "all") trans_tbl(3, - which(tbl_$fullname %like% 'Enterococcus faecium'), + which(tbl_$fullname %like% "Enterococcus faecium"), c(PEN, VAN), "all") } factor(x = tbl_$MDRO, levels = 1:3, - labels = c('Negative', 'Positive, unconfirmed', 'Positive'), + labels = c("Negative", "Positive, unconfirmed", "Positive"), ordered = TRUE) } @@ -372,55 +384,3 @@ mrgn <- function(x, country = "de", ...) { eucast_exceptional_phenotypes <- function(x, country = "EUCAST", ...) { mdro(x = x, country = "EUCAST", ...) } - -# is_ESBL <- function(x, col_mo = NULL, ...) { -# get_ab_col <- function(columns, ab) { -# columns[names(columns) == ab] -# } -# col_mo <- get_column_mo(tbl = x, col_mo = col_mo) -# cols_ab <- get_column_abx(tbl = x, -# soft_dependencies = c("AMX", "AMP"), -# hard_dependencies = c("CAZ"), -# ...) -# -# if (!any(c("AMX", "AMP") %in% names(cols_ab))) { -# # both ampicillin and amoxicillin are missing -# generate_warning_abs_missing(c("AMX", "AMP"), any = TRUE) -# return(rep(NA, nrow(x))) -# } -# -# ESBLs <- rep(NA, nrow(x)) -# -# # first make all eligible cases FALSE -# ESBLs[which(mo_family(x[, col_mo]) == "Enterobacteriaceae" -# & x[, get_ab_col(cols_ab, "AMX")] %in% c("R", "I", "S") -# & x[, get_ab_col(cols_ab, "AMX")] %in% c("R", "I", "S") -# & x[, get_ab_col(cols_ab, "AMX")] %in% c("R", "I", "S") -# )] <- FALSE -# # now make the positives cases TRUE -# ESBLs[which(!is.na(ESBLs) -# & x[, get_ab_col(cols_ab, "AMX")] == "R" -# & x[, get_ab_col(cols_ab, "CAZ")] == "R")] <- TRUE -# ESBLs -# -# } -# -# is_3MRGN <- function(x, ...) { -# -# } -# -# is_4MRGN <- function(x, ...) { -# -# } - -get_column_mo <- function(tbl, col_mo = NULL) { - # throws a blue note about which column will be used if guessed - if (is.null(col_mo)) { - col_mo <- search_type_in_df(tbl = tbl, type = "mo") - } - if (is.null(col_mo)) { - stop("`col_mo` must be set.", call. = FALSE) - } - col_mo -} - diff --git a/R/misc.R b/R/misc.R index 2739c5b1..8c4eb0f8 100755 --- a/R/misc.R +++ b/R/misc.R @@ -157,23 +157,56 @@ search_type_in_df <- function(tbl, type) { get_column_abx <- function(x, soft_dependencies = NULL, hard_dependencies = NULL, - verbose = FALSE) { + verbose = FALSE, + ...) { + # determine from given data set df_trans <- data.frame(colnames = colnames(x), abcode = suppressWarnings(as.ab(colnames(x)))) df_trans <- df_trans[!is.na(df_trans$abcode),] x <- as.character(df_trans$colnames) names(x) <- df_trans$abcode + + # add from self-defined dots (...): + # get_column_abx(septic_patients %>% rename(thisone = AMX), amox = "thisone") + dots <- list(...) + if (length(dots) > 0) { + dots <- unlist(dots) + newnames <- suppressWarnings(as.ab(names(dots))) + if (any(is.na(newnames))) { + warning("Invalid antibiotic reference(s): ", toString(names(dots)[is.na(newnames)]), + call. = FALSE, immediate. = TRUE) + } + names(dots) <- newnames + dots <- dots[!is.na(names(dots))] + # merge, but overwrite automatically determined ones by 'dots' + x <- c(x[!x %in% dots & !names(x) %in% names(dots)], dots) + } + # sort on name x <- x[sort(names(x))] + duplies <- x[base::duplicated(x)] if (verbose == TRUE) { for (i in 1:length(x)) { - message(blue(paste0("NOTE: Using column `", bold(x[i]), "` as input for ", names(x)[i], - " (", ab_name(names(x)[i], language = "en", tolower = TRUE), ")."))) + if (x[i] %in% duplies) { + message(red(paste0("NOTE: Using column `", bold(x[i]), "` as input for ", names(x)[i], + " (", ab_name(names(x)[i], language = "en", tolower = TRUE), ") [DUPLICATED USE]."))) + } else { + message(blue(paste0("NOTE: Using column `", bold(x[i]), "` as input for ", names(x)[i], + " (", ab_name(names(x)[i], language = "en", tolower = TRUE), ")."))) + } } } + if (n_distinct(x) != length(x)) { + msg_txt <- paste("Column(s)", paste0("'", duplies, "'", collapse = "'"), "used for more than one antibiotic.") + if (verbose == FALSE) { + msg_txt <- paste(msg_txt, "Use verbose = TRUE to see which antibiotics are used by which columns.") + } + stop(msg_txt, call. = FALSE) + } + if (!is.null(hard_dependencies)) { if (!all(hard_dependencies %in% names(x))) { # missing a hard dependency will return NA and consequently the data will not be analysed @@ -275,3 +308,7 @@ t <- function(from, language = get_locale()) { base::enc2utf8(from) } + +"%or%" <- function(x, y) { + ifelse(!is.na(x), x, ifelse(!is.na(y), y, NA)) +} diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 545bc27f..f314972c 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -78,7 +78,7 @@
diff --git a/docs/articles/AMR.html b/docs/articles/AMR.html index 6c353812..43dbe901 100644 --- a/docs/articles/AMR.html +++ b/docs/articles/AMR.html @@ -40,7 +40,7 @@ @@ -327,20 +327,9 @@Now, let’s start the cleaning and the analysis!
@@ -411,8 +411,8 @@ # # Item Count Percent Cum. Count Cum. Percent # --- ----- ------- -------- ----------- ------------- -# 1 M 10,370 51.8% 10,370 51.8% -# 2 F 9,630 48.2% 20,000 100.0% +# 1 M 10,378 51.9% 10,378 51.9% +# 2 F 9,622 48.1% 20,000 100.0%So, we can draw at least two conclusions immediately. From a data scientist perspective, the data looks clean: only values M
and F
. From a researcher perspective: there are slightly more men. Nothing we didn’t already know.
The data is already quite clean, but we still need to transform some variables. The bacteria
column now consists of text, and we want to add more variables based on microbial IDs later on. So, we will transform this column to valid IDs. The mutate()
function of the dplyr
package makes this really easy:
data <- data %>%
@@ -442,14 +442,14 @@
# Pasteurella multocida (no new changes)
# Staphylococcus (no new changes)
# Streptococcus groups A, B, C, G (no new changes)
-# Streptococcus pneumoniae (1414 new changes)
+# Streptococcus pneumoniae (1443 new changes)
# Viridans group streptococci (no new changes)
#
# EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes (v3.1, 2016)
-# Table 01: Intrinsic resistance in Enterobacteriaceae (1367 new changes)
+# Table 01: Intrinsic resistance in Enterobacteriaceae (1303 new changes)
# Table 02: Intrinsic resistance in non-fermentative Gram-negative bacteria (no new changes)
# Table 03: Intrinsic resistance in other Gram-negative bacteria (no new changes)
-# Table 04: Intrinsic resistance in Gram-positive bacteria (2649 new changes)
+# Table 04: Intrinsic resistance in Gram-positive bacteria (2746 new changes)
# Table 08: Interpretive rules for B-lactam agents and Gram-positive cocci (no new changes)
# Table 09: Interpretive rules for B-lactam agents and Gram-negative rods (no new changes)
# Table 11: Interpretive rules for macrolides, lincosamides, and streptogramins (no new changes)
@@ -457,24 +457,24 @@
# Table 13: Interpretive rules for quinolones (no new changes)
#
# Other rules
-# Non-EUCAST: amoxicillin/clav acid = S where ampicillin = S (2211 new changes)
-# Non-EUCAST: ampicillin = R where amoxicillin/clav acid = R (107 new changes)
+# Non-EUCAST: amoxicillin/clav acid = S where ampicillin = S (2313 new changes)
+# Non-EUCAST: ampicillin = R where amoxicillin/clav acid = R (115 new changes)
# Non-EUCAST: piperacillin = R where piperacillin/tazobactam = R (no new changes)
# Non-EUCAST: piperacillin/tazobactam = S where piperacillin = S (no new changes)
# Non-EUCAST: trimethoprim = R where trimethoprim/sulfa = R (no new changes)
# Non-EUCAST: trimethoprim/sulfa = S where trimethoprim = S (no new changes)
#
# --------------------------------------------------------------------------
-# EUCAST rules affected 6,442 out of 20,000 rows, making a total of 7,748 edits
+# EUCAST rules affected 6,579 out of 20,000 rows, making a total of 7,920 edits
# => added 0 test results
#
-# => changed 7,748 test results
-# - 94 test results changed from S to I
-# - 4,687 test results changed from S to R
-# - 1,084 test results changed from I to S
-# - 313 test results changed from I to R
-# - 1,548 test results changed from R to S
-# - 22 test results changed from R to I
+# => changed 7,920 test results
+# - 123 test results changed from S to I
+# - 4,680 test results changed from S to R
+# - 1,131 test results changed from I to S
+# - 306 test results changed from I to R
+# - 1,657 test results changed from R to S
+# - 23 test results changed from R to I
# --------------------------------------------------------------------------
#
# Use verbose = TRUE to get a data.frame with all specified edits instead.
So only 28.5% is suitable for resistance analysis! We can now filter on it with the filter()
function, also from the dplyr
package:
So only 28.4% is suitable for resistance analysis! We can now filter on it with the filter()
function, also from the dplyr
package:
For future use, the above two syntaxes can be shortened with the filter_first_isolate()
function:
isolate | @@ -667,8 +667,8 @@||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | -2010-01-18 | -F5 | +2010-03-19 | +N8 | B_ESCHR_COL | S | S | @@ -679,119 +679,119 @@|||||||||
2 | -2010-02-13 | -F5 | +2010-05-06 | +N8 | B_ESCHR_COL | -S | +R | S | S | S | FALSE | -FALSE | +TRUE | |||
3 | -2010-04-04 | -F5 | +2010-05-20 | +N8 | B_ESCHR_COL | R | S | S | S | FALSE | -TRUE | +FALSE | ||||
4 | -2010-04-28 | -F5 | -B_ESCHR_COL | -R | -S | -S | -S | -FALSE | -FALSE | -|||||||
5 | -2010-08-22 | -F5 | +2010-07-20 | +N8 | B_ESCHR_COL | S | S | -R | +S | S | FALSE | TRUE | ||||
6 | -2010-10-07 | -F5 | +||||||||||||||
5 | +2010-09-04 | +N8 | B_ESCHR_COL | S | S | S | S | FALSE | +FALSE | +|||||||
6 | +2010-09-13 | +N8 | +B_ESCHR_COL | +R | +S | +S | +S | +FALSE | TRUE | |||||||
7 | -2010-10-15 | -F5 | +2011-04-01 | +N8 | B_ESCHR_COL | -R | -R | -R | S | -FALSE | +S | +S | +S | +TRUE | TRUE | |
8 | -2010-11-24 | -F5 | +2011-10-15 | +N8 | B_ESCHR_COL | R | -I | S | -R | +S | +S | FALSE | TRUE | |||
9 | -2011-05-09 | -F5 | +2011-11-14 | +N8 | B_ESCHR_COL | S | S | S | S | -TRUE | +FALSE | TRUE | ||||
10 | -2011-05-17 | -F5 | +2012-02-11 | +N8 | B_ESCHR_COL | -R | -R | -R | -R | +S | +S | +S | +S | +FALSE | FALSE | -TRUE |
Instead of 2, now 8 isolates are flagged. In total, 75.5% of all isolates are marked ‘first weighted’ - 47% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.
+Instead of 2, now 7 isolates are flagged. In total, 74.8% of all isolates are marked ‘first weighted’ - 46.5% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.
As with filter_first_isolate()
, there’s a shortcut for this new algorithm too:
So we end up with 15,100 isolates for analysis.
+So we end up with 14,968 isolates for analysis.
We can remove unneeded columns:
@@ -799,7 +799,6 @@date | patient_id | hospital | @@ -816,74 +815,24 @@||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | -2014-11-27 | -T8 | -Hospital B | -B_ESCHR_COL | -R | -S | -R | -S | -F | -Gram negative | -Escherichia | -coli | -TRUE | -|||||
4 | -2014-03-17 | -O9 | -Hospital B | -B_ESCHR_COL | -S | -S | -S | -S | -F | -Gram negative | -Escherichia | -coli | -TRUE | -|||||
5 | -2012-10-08 | -R4 | -Hospital C | -B_STRPT_PNE | -S | -S | -S | -R | -F | -Gram positive | -Streptococcus | -pneumoniae | -TRUE | -|||||
8 | -2016-05-04 | -V6 | +2013-08-26 | +Q7 | Hospital A | B_STPHY_AUR | -R | S | -R | -R | +S | +S | +S | F | Gram positive | Staphylococcus | aureus | TRUE |
9 | -2016-06-27 | -Z9 | -Hospital B | +|||||||||||||||
2012-10-04 | +O2 | +Hospital A | B_ESCHR_COL | R | S | @@ -895,20 +844,64 @@coli | TRUE | |||||||||||
10 | -2015-06-28 | -K1 | -Hospital D | -B_STPHY_AUR | +||||||||||||||
2012-10-23 | +C4 | +Hospital C | +B_ESCHR_COL | S | S | S | S | M | -Gram positive | -Staphylococcus | -aureus | +Gram negative | +Escherichia | +coli | +TRUE | +|||
2011-08-17 | +R4 | +Hospital B | +B_ESCHR_COL | +R | +S | +R | +S | +F | +Gram negative | +Escherichia | +coli | +TRUE | +||||||
2016-01-18 | +L2 | +Hospital B | +B_ESCHR_COL | +S | +S | +S | +S | +M | +Gram negative | +Escherichia | +coli | +TRUE | +||||||
2011-09-06 | +O1 | +Hospital A | +B_ESCHR_COL | +R | +S | +S | +S | +F | +Gram negative | +Escherichia | +coli | TRUE | ||||||
1 | Escherichia coli | -7,486 | -49.6% | -7,486 | -49.6% | +7,394 | +49.4% | +7,394 | +49.4% | |||||||||
2 | Staphylococcus aureus | -3,730 | -24.7% | -11,216 | -74.3% | +3,716 | +24.8% | +11,110 | +74.2% | |||||||||
3 | Streptococcus pneumoniae | -2,257 | -14.9% | -13,473 | -89.2% | +2,291 | +15.3% | +13,401 | +89.5% | |||||||||
4 | Klebsiella pneumoniae | -1,627 | -10.8% | -15,100 | +1,567 | +10.5% | +14,968 | 100.0% | ||||||||||
Hospital A | -0.4588551 | +0.4708540 | ||||||||||||||||
Hospital B | -0.4665288 | +0.4617585 | ||||||||||||||||
Hospital C | -0.4804318 | +0.4622030 | ||||||||||||||||
Hospital D | -0.4791059 | +0.4691689 |
In the table above, all measurements are shown in milliseconds (thousands of seconds). A value of 5 milliseconds means it can determine 200 input values per second. It case of 100 milliseconds, this is only 10 input values per second. The second input is the only one that has to be looked up thoroughly. All the others are known codes (the first one is a WHONET code) or common laboratory codes, or common full organism names like the last one. Full organism names are always preferred.
To achieve this speed, the as.mo
function also takes into account the prevalence of human pathogenic microorganisms. The downside is of course that less prevalent microorganisms will be determined less fast. See this example for the ID of Thermus islandicus (B_THERMS_ISL
), a bug probably never found before in humans:
T.islandicus <- microbenchmark(as.mo("theisl"),
@@ -236,12 +236,12 @@
print(T.islandicus, unit = "ms", signif = 2)
# Unit: milliseconds
# expr min lq mean median uq max neval
-# as.mo("theisl") 470 470 500 510 520 530 10
-# as.mo("THEISL") 470 470 490 470 520 520 10
-# as.mo("T. islandicus") 74 74 87 74 120 120 10
-# as.mo("T. islandicus") 74 74 89 74 120 140 10
-# as.mo("Thermus islandicus") 73 73 93 74 120 120 10
That takes 7.8 times as much time on average. A value of 100 milliseconds means it can only determine ~10 different input values per second. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like Thermus islandicus) are almost fast - these are the most probable input from most data sets.
+# as.mo("theisl") 470 480 510 520 530 530 10 +# as.mo("THEISL") 470 470 500 490 520 530 10 +# as.mo("T. islandicus") 74 75 85 76 81 120 10 +# as.mo("T. islandicus") 74 75 92 76 120 140 10 +# as.mo("Thermus islandicus") 73 73 74 74 74 75 10 +That takes 8.2 times as much time on average. A value of 100 milliseconds means it can only determine ~10 different input values per second. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like Thermus islandicus) are almost fast - these are the most probable input from most data sets.
In the figure below, we compare Escherichia coli (which is very common) with Prevotella brevis (which is moderately common) and with Thermus islandicus (which is very uncommon):
par(mar = c(5, 16, 4, 2)) # set more space for left margin text (16)
@@ -287,8 +287,8 @@
print(run_it, unit = "ms", signif = 3)
# Unit: milliseconds
# expr min lq mean median uq max neval
-# mo_fullname(x) 639 681 745 720 770 910 10
So transforming 500,000 values (!!) of 50 unique values only takes 0.72 seconds (719 ms). You only lose time on your unique input values.
+# mo_fullname(x) 677 770 812 790 884 988 10 +So transforming 500,000 values (!!) of 50 unique values only takes 0.79 seconds (789 ms). You only lose time on your unique input values.
So going from mo_fullname("Staphylococcus aureus")
to "Staphylococcus aureus"
takes 0.0017 seconds - it doesn’t even start calculating if the result would be the same as the expected resulting value. That goes for all helper functions:
run_it <- microbenchmark(A = mo_species("aureus"),
B = mo_genus("Staphylococcus"),
@@ -317,14 +317,14 @@
print(run_it, unit = "ms", signif = 3)
# Unit: milliseconds
# expr min lq mean median uq max neval
-# A 0.392 0.485 0.577 0.580 0.628 0.808 10
-# B 0.444 0.521 0.566 0.567 0.609 0.710 10
-# C 1.380 1.680 1.750 1.760 1.820 2.160 10
-# D 0.422 0.522 0.564 0.553 0.640 0.644 10
-# E 0.362 0.443 0.520 0.543 0.595 0.684 10
-# F 0.347 0.432 0.515 0.495 0.585 0.728 10
-# G 0.377 0.417 0.479 0.463 0.495 0.681 10
-# H 0.263 0.264 0.310 0.282 0.350 0.436 10
Of course, when running mo_phylum("Firmicutes")
the function has zero knowledge about the actual microorganism, namely S. aureus. But since the result would be "Firmicutes"
too, there is no point in calculating the result. And because this package ‘knows’ all phyla of all known bacteria (according to the Catalogue of Life), it can just return the initial value immediately.
Currently supported are German, Dutch, Spanish, Italian, French and Portuguese.
diff --git a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png index d4a59c07..f1fd99d1 100644 Binary files a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png and b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html index 2b66d591..1ce938da 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -78,7 +78,7 @@ diff --git a/docs/articles/mo_property.html b/docs/articles/mo_property.html index 2db6b9e7..bc2daca7 100644 --- a/docs/articles/mo_property.html +++ b/docs/articles/mo_property.html @@ -40,7 +40,7 @@ diff --git a/docs/articles/resistance_predict.html b/docs/articles/resistance_predict.html index 9caa6ed7..224ba3ba 100644 --- a/docs/articles/resistance_predict.html +++ b/docs/articles/resistance_predict.html @@ -40,7 +40,7 @@ diff --git a/docs/authors.html b/docs/authors.html index c9a82572..e1b4b482 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -78,7 +78,7 @@ diff --git a/docs/index.html b/docs/index.html index 4bfd1484..e7b5bbda 100644 --- a/docs/index.html +++ b/docs/index.html @@ -42,7 +42,7 @@ diff --git a/docs/news/index.html b/docs/news/index.html index c2b6a3c1..ca6d8444 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -78,7 +78,7 @@ diff --git a/docs/reference/as.mo.html b/docs/reference/as.mo.html index 2f5c2b20..511d07d4 100644 --- a/docs/reference/as.mo.html +++ b/docs/reference/as.mo.html @@ -80,7 +80,7 @@ diff --git a/docs/reference/index.html b/docs/reference/index.html index c0e9f866..a2b74a4d 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -78,7 +78,7 @@ diff --git a/tests/testthat/test-mdro.R b/tests/testthat/test-mdro.R index 7521eba6..831a758e 100755 --- a/tests/testthat/test-mdro.R +++ b/tests/testthat/test-mdro.R @@ -40,7 +40,7 @@ test_that("mdro works", { # septic_patients should have these finding using Dutch guidelines expect_equal(outcome %>% freq() %>% pull(count), - c(1989, 9, 2)) # 1989 neg, 9 pos, 2 unconfirmed + c(1969, 25, 6)) # 1969 neg, 25 unconfirmed, 6 pos expect_equal( suppressWarnings( diff --git a/tests/testthat/test-misc.R b/tests/testthat/test-misc.R index 1403da12..e617aa3a 100755 --- a/tests/testthat/test-misc.R +++ b/tests/testthat/test-misc.R @@ -48,4 +48,6 @@ test_that("looking up ab columns works", { expect_warning(generate_warning_abs_missing(c("AMP", "AMX"), any = TRUE)) expect_warning(get_column_abx(septic_patients, hard_dependencies = "FUS")) expect_warning(get_column_abx(septic_patients, soft_dependencies = "FUS")) + expect_error(get_column_abx(dplyr::rename(septic_patients, thisone = AMX), amox = "thisone", tmp = "thisone", verbose = TRUE)) + expect_error(get_column_abx(dplyr::rename(septic_patients, thisone = AMX), amox = "thisone", tmp = "thisone", verbose = FALSE)) })