diff --git a/NEWS.md b/NEWS.md index bf1c4ef7..4b3ff0ac 100755 --- a/NEWS.md +++ b/NEWS.md @@ -17,17 +17,20 @@ * Fix for `portion_*(..., as_percent = TRUE)` when minimal amount of isolates would not be met * Added parameter `also_single_tested` for `portion_*` and `count_*` functions to also include cases where not all antibiotics were tested but at least one of the tested antibiotics includes the target antimicribial interpretation, see `?portion` * Using `portion_*` functions now throws a warning when total available isolate is below parameter `minimum` -* Functions `as.mo`, `as.rsi`, `as.mic` and `as.atc` will not set package name as attribute anymore +* Functions `as.mo`, `as.rsi`, `as.mic`, `as.atc` and `freq` will not set package name as attribute anymore +* Frequency tables - `freq()`: + * Check for `hms::is.hms` in frequency tables (`freq()`) + * Now prints in markdown at default in non-interactive sessions + * No longer adds the factor level column and sorts factors on count again + * Gained `na` parameter, to choose with character to print for empty values + * Support for class `difftime` + * New parameter `header` to turn it off (default when `markdown = TRUE`) +* `first_isolate` now tries to find columns to use as input when parameters are left blank +* Improvement for MDRO algorithm * Data set `septic_patients` is now a `data.frame`, not a tibble anymore -* Check for `hms::is.hms` in frequency tables (`freq()`) -* New parameter `header` for frequency tables to turn them off (default when `markdown = TRUE`) -* Freq now prints in markdown at default in non-interactive sessions -* Freq no longer add the factor level column and sorts factors on count again * Removed diacritics from all authors (columns `microorganisms$ref` and `microorganisms.old$ref`) to comply with CRAN policy to only allow ASCII characters -* Freq gained `na` parameter, to choose with character to print for empty values * Fix for `mo_property` not working properly * Fix for `EUCAST_rules` where some Streptococci would become ceftazidime R in EUCAST rule 4.5 -* Support for class `difftime` in frequency tables * Support for named vectors of class `mo`, useful for `top_freq()` * `ggplot_rsi` and `scale_y_percent` have `breaks` parameter * AI improvements for `as.mo`: diff --git a/R/first_isolate.R b/R/first_isolate.R index 5fda01e2..400f3ece 100755 --- a/R/first_isolate.R +++ b/R/first_isolate.R @@ -20,9 +20,9 @@ #' #' Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type. #' @param tbl a \code{data.frame} containing isolates. -#' @param col_date column name of the result date (or date that is was received on the lab) -#' @param col_patient_id column name of the unique IDs of the patients -#' @param col_mo column name of the unique IDs of the microorganisms, see \code{\link{mo}}. If this column has another class than \code{"mo"}, values will be coerced using \code{\link{as.mo}}. +#' @param col_date column name of the result date (or date that is was received on the lab), defaults to the first column of class \code{Date} +#' @param col_patient_id column name of the unique IDs of the patients, defaults to the first column that starts with 'patient' (case insensitive) +#' @param col_mo column name of the unique IDs of the microorganisms (see \code{\link{mo}}), defaults to the first column of class \code{mo}. If this column has another class than \code{"mo"}, values will be coerced using \code{\link{as.mo}}. #' @param col_testcode column name of the test codes. Use \code{col_testcode = NA} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored. Supports tidyverse-like quotation. #' @param col_specimen column name of the specimen type or group #' @param col_icu column name of the logicals (\code{TRUE}/\code{FALSE}) whether a ward or department is an Intensive Care Unit (ICU) @@ -125,42 +125,63 @@ #' col_keyantibiotics = 'keyab') #' } first_isolate <- function(tbl, - col_date, - col_patient_id, - col_mo = NA, - col_testcode = NA, - col_specimen = NA, - col_icu = NA, - col_keyantibiotics = NA, + col_date = NULL, + col_patient_id = NULL, + col_mo = NULL, + col_testcode = NULL, + col_specimen = NULL, + col_icu = NULL, + col_keyantibiotics = NULL, episode_days = 365, - testcodes_exclude = '', + testcodes_exclude = NULL, icu_exclude = FALSE, - filter_specimen = NA, + filter_specimen = NULL, output_logical = TRUE, type = "keyantibiotics", ignore_I = TRUE, points_threshold = 2, info = TRUE, - col_bactid = NA, - col_genus = NA, - col_species = NA) { + col_bactid = NULL, + col_genus = NULL, + col_species = NULL) { - if (!is.na(col_bactid)) { + if (!is.data.frame(tbl)) { + stop("`tbl` must be a data frame.", call. = FALSE) + } + + # try to find columns based on type + # -- mo + if (!is.null(col_bactid)) { col_mo <- col_bactid warning("Use of `col_bactid` is deprecated. Use `col_mo` instead.") + } else if (is.null(col_mo) & "mo" %in% lapply(tbl, class)) { + col_mo <- colnames(tbl)[lapply(tbl, class) == "mo"] + message("NOTE: Using column `", col_mo, "` as input for `col_mo`.") } + # -- date + if (is.null(col_date) & "Date" %in% lapply(tbl, class)) { + col_date <- colnames(tbl)[lapply(tbl, class) == "Date"] + message("NOTE: Using column `", col_date, "` as input for `col_date`.") + } + # -- patient id + if (is.null(col_patient_id) & any(colnames(tbl) %like% "^patient")) { + col_patient_id <- colnames(tbl)[colnames(tbl) %like% "^patient"][1] + message("NOTE: Using column `", col_patient_id, "` as input for `col_patient_id`.") + } + # bactid OR genus+species must be available - if (is.na(col_mo) & (is.na(col_genus) | is.na(col_species))) { + if (is.null(col_mo) & (is.null(col_genus) | is.null(col_species))) { stop('`col_mo` or both `col_genus` and `col_species` must be available.') } + # check if columns exist check_columns_existance <- function(column, tblname = tbl) { if (NROW(tblname) <= 1 | NCOL(tblname) <= 1) { stop('Please check tbl for existance.') } - if (!is.na(column)) { + if (!is.null(column)) { if (!(column %in% colnames(tblname))) { stop('Column `', column, '` not found.') } @@ -176,7 +197,7 @@ first_isolate <- function(tbl, check_columns_existance(col_icu) check_columns_existance(col_keyantibiotics) - if (!is.na(col_mo)) { + if (!is.null(col_mo)) { if (!tbl %>% pull(col_mo) %>% is.mo()) { tbl[, col_mo] <- as.mo(tbl[, col_mo]) } @@ -186,41 +207,37 @@ first_isolate <- function(tbl, col_species <- "species" } - if (is.na(col_testcode)) { - testcodes_exclude <- NA + if (is.null(col_testcode)) { + testcodes_exclude <- NULL } # remove testcodes - if (!is.na(testcodes_exclude[1]) & testcodes_exclude[1] != '' & info == TRUE) { + if (!is.null(testcodes_exclude) & info == TRUE) { cat('[Criteria] Excluded test codes:\n', toString(testcodes_exclude), '\n') } - if (is.na(col_icu)) { + if (is.null(col_icu)) { icu_exclude <- FALSE } else { tbl <- tbl %>% mutate(col_icu = tbl %>% pull(col_icu) %>% as.logical()) } - if (is.na(col_specimen)) { - filter_specimen <- '' + if (is.null(col_specimen)) { + filter_specimen <- NULL } # filter on specimen group and keyantibiotics when they are filled in - if (!is.na(filter_specimen) & filter_specimen != '') { + if (!is.null(filter_specimen)) { check_columns_existance(col_specimen, tbl) if (info == TRUE) { cat('[Criteria] Excluded other than specimen group \'', filter_specimen, '\'\n', sep = '') } - } else { - filter_specimen <- '' } - if (col_keyantibiotics %in% c(NA, '')) { - col_keyantibiotics <- '' - } else { + if (!is.null(col_keyantibiotics)) { tbl <- tbl %>% mutate(key_ab = tbl %>% pull(col_keyantibiotics)) } - if (is.na(testcodes_exclude[1])) { + if (is.null(testcodes_exclude)) { testcodes_exclude <- '' } @@ -234,10 +251,10 @@ first_isolate <- function(tbl, mutate(species = if_else(is.na(species) | species == "(no MO)", "", species), genus = if_else(is.na(genus) | genus == "(no MO)", "", genus)) - if (filter_specimen == '') { - + if (is.null(filter_specimen)) { + # not filtering on specimen if (icu_exclude == FALSE) { - if (info == TRUE & !is.na(col_icu)) { + if (info == TRUE & !is.null(col_icu)) { cat('[Criteria] Included isolates from ICU.\n') } tbl <- tbl %>% @@ -267,9 +284,9 @@ first_isolate <- function(tbl, } } else { - # sort on specimen and only analyse these row to save time + # filtering on specimen and only analyse these row to save time if (icu_exclude == FALSE) { - if (info == TRUE & !is.na(col_icu)) { + if (info == TRUE & !is.null(col_icu)) { cat('[Criteria] Included isolates from ICU.\n') } tbl <- tbl %>% @@ -344,7 +361,7 @@ first_isolate <- function(tbl, 0)) weighted.notice <- '' - if (col_keyantibiotics != '') { + if (!is.null(col_keyantibiotics)) { weighted.notice <- 'weighted ' if (info == TRUE) { if (type == 'keyantibiotics') { @@ -402,7 +419,7 @@ first_isolate <- function(tbl, # first one as TRUE all_first[row.start, 'real_first_isolate'] <- TRUE # no tests that should be included, or ICU - if (!is.na(col_testcode)) { + if (!is.null(col_testcode)) { all_first[which(all_first[, col_testcode] %in% tolower(testcodes_exclude)), 'real_first_isolate'] <- FALSE } if (icu_exclude == TRUE) { diff --git a/R/mdro.R b/R/mdro.R index 01818568..07480a05 100755 --- a/R/mdro.R +++ b/R/mdro.R @@ -29,6 +29,8 @@ #' @details When \code{country} will be left blank, guidelines will be taken from EUCAST Expert Rules Version 3.1 "Intrinsic Resistance and Exceptional Phenotypes Tables" (\url{http://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Expert_Rules/Expert_rules_intrinsic_exceptional_V3.1.pdf}). #' @return Ordered factor with levels \code{Unknown < Negative < Unconfirmed < Positive}. #' @rdname MDRO +#' @importFrom dplyr %>% +#' @importFrom crayon red blue #' @export #' @examples #' library(dplyr) @@ -38,7 +40,7 @@ #' BRMO = BRMO(.)) MDRO <- function(tbl, country = NULL, - col_mo = 'mo', + col_mo = NULL, info = TRUE, amcl = 'amcl', amik = 'amik', @@ -99,13 +101,21 @@ MDRO <- function(tbl, trim = 'trim', trsu = 'trsu', vanc = 'vanc', - col_bactid = 'bactid') { + col_bactid = NULL) { - if (col_bactid %in% colnames(tbl)) { + if (!is.data.frame(tbl)) { + stop("`tbl` must be a data frame.", call. = FALSE) + } + + # try to find columns based on type + # -- mo + if (!is.null(col_bactid)) { col_mo <- col_bactid warning("Use of `col_bactid` is deprecated. Use `col_mo` instead.") - } - if (!col_mo %in% colnames(tbl)) { + } else if (is.null(col_mo) & "mo" %in% lapply(tbl, class)) { + col_mo <- colnames(tbl)[lapply(tbl, class) == "mo"] + message("NOTE: Using column `", col_mo, "` as input for `col_mo`.") + } else if (!col_mo %in% colnames(tbl)) { stop('Column ', col_mo, ' not found.', call. = FALSE) } @@ -142,23 +152,17 @@ MDRO <- function(tbl, guideline$version <- 'Revision of December 2017' guideline$source <- 'https://www.rivm.nl/Documenten_en_publicaties/Professioneel_Praktisch/Richtlijnen/Infectieziekten/WIP_Richtlijnen/WIP_Richtlijnen/Ziekenhuizen/WIP_richtlijn_BRMO_Bijzonder_Resistente_Micro_Organismen_ZKH' # add here more countries like this: - # } else if (country$code == 'AA') { + # } else if (country$code == 'xx') { # country$name <- 'country name' } else { stop('This country code is currently unsupported: ', guideline$country$code, call. = FALSE) } - # Console colours - # source: http://www.tldp.org/HOWTO/Bash-Prompt-HOWTO/x329.html - ANSI_red <- "\033[31m" - ANSI_blue <- "\033[34m" - ANSI_reset <- "\033[0m" - if (info == TRUE) { cat("Determining multidrug-resistant organisms (MDRO), according to:\n", - "Guideline: ", ANSI_red, guideline$name, ", ", guideline$version, ANSI_reset, "\n", - "Country : ", ANSI_red, guideline$country$name, ANSI_reset, "\n", - "Source : ", ANSI_blue, guideline$source, ANSI_reset, "\n", + "Guideline: ", red(paste0(guideline$name, ", ", guideline$version, "\n")), + "Country : ", red(paste0(guideline$country$name, "\n")), + "Source : ", blue(paste0(guideline$source, "\n")), "\n", sep = "") } @@ -231,18 +235,11 @@ MDRO <- function(tbl, vanc <- col.list[vanc] # antibiotic classes - aminoglycosides <- c(tobr, gent, kana, neom, neti, siso) - tetracyclines <- c(doxy, mino, tetr) # since EUCAST v3.1 tige(cycline) is set apart - polymyxins <- c(poly, coli) - macrolides <- c(eryt, azit, roxi, clar) # since EUCAST v3.1 clin(damycin) is set apart - glycopeptides <- c(vanc, teic) - streptogramins <- qida # should officially also be pristinamycin and quinupristin/dalfopristin + aminoglycosides <- c(tobr, gent) # can also be kana but that one is often intrinsic R cephalosporins <- c(cfep, cfot, cfox, cfra, cfta, cftr, cfur, czol) cephalosporins_3rd <- c(cfot, cftr, cfta) carbapenems <- c(erta, imip, mero) - aminopenicillins <- c(ampi, amox) - ureidopenicillins <- pita # should officially also be azlo and mezlo - fluoroquinolones <- c(oflo, cipr, norf, levo, moxi) + fluoroquinolones <- c(oflo, cipr, levo, moxi) # helper function for editing the table trans_tbl <- function(to, rows, cols) { @@ -254,10 +251,15 @@ MDRO <- function(tbl, } } - # join microorganisms - tbl <- tbl %>% left_join_microorganisms(col_mo) + if (!tbl %>% pull(col_mo) %>% is.mo()) { + tbl[, col_mo] <- as.mo(tbl[, col_mo]) + } - tbl$MDRO <- NA_integer_ + tbl <- tbl %>% + # join to microorganisms data set + left_join_microorganisms(by = col_mo) %>% + # add unconfirmed to where genus is available + mutate(MDRO = ifelse(!is.na(genus), 1, NA_integer_)) if (guideline$country$code == 'eucast') { # EUCAST ------------------------------------------------------------------ @@ -327,6 +329,11 @@ MDRO <- function(tbl, & rowSums(tbl[, aminoglycosides] == 'R', na.rm = TRUE) >= 1 & rowSums(tbl[, fluoroquinolones] == 'R', na.rm = TRUE) >= 1 ), 'MDRO'] <- 4 + a <<- tbl[which( + tbl$family == 'Enterobacteriaceae' + & rowSums(tbl[, aminoglycosides] == 'R', na.rm = TRUE) >= 1 + & rowSums(tbl[, fluoroquinolones] == 'R', na.rm = TRUE) >= 1 + ), ] tbl[which( tbl$family == 'Enterobacteriaceae' & rowSums(tbl[, carbapenems] == 'R', na.rm = TRUE) >= 1 @@ -363,13 +370,18 @@ MDRO <- function(tbl, & tbl$MDRO == 1 ), 'MDRO'] <- 2 + tbl <- tbl %>% mutate( + psae = 0, + psae = ifelse(mero == "R" | imip == "R", psae + 1, psae), + psae = ifelse(gent == "R" & tobr == "R", psae + 1, psae), + psae = ifelse(cipr == "R", psae + 1, psae), + psae = ifelse(cfta == "R", psae + 1, psae), + psae = ifelse(pita == "R", psae + 1, psae), + psae = ifelse(is.na(psae), 0, psae) + ) tbl[which( tbl$fullname %like% 'Pseudomonas aeruginosa' - & sum(rowSums(tbl[, carbapenems] == 'R', na.rm = TRUE) >= 1, - rowSums(tbl[, aminoglycosides] == 'R', na.rm = TRUE) >= 1, - rowSums(tbl[, fluoroquinolones] == 'R', na.rm = TRUE) >= 1, - tbl[, cfta] == 'R', - tbl[, pita] == 'R') >= 3 + & tbl$psae >= 3 ), 'MDRO'] <- 4 # rest of Pseudomonas is negative tbl[which( @@ -405,7 +417,7 @@ MDRO <- function(tbl, factor(x = tbl$MDRO, levels = c(1:4), - labels = c('Unknown', 'Negative', 'Unconfirmed', 'Positive'), + labels = c('Not evaluated', 'Negative', 'Unconfirmed', 'Positive'), ordered = TRUE) } diff --git a/man/first_isolate.Rd b/man/first_isolate.Rd index 7e002f9c..6ce3cb5a 100755 --- a/man/first_isolate.Rd +++ b/man/first_isolate.Rd @@ -7,22 +7,23 @@ Methodology of this function is based on: \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition}, 2014, \emph{Clinical and Laboratory Standards Institute (CLSI)}. \url{https://clsi.org/standards/products/microbiology/documents/m39/}. } \usage{ -first_isolate(tbl, col_date, col_patient_id, col_mo = NA, - col_testcode = NA, col_specimen = NA, col_icu = NA, - col_keyantibiotics = NA, episode_days = 365, - testcodes_exclude = "", icu_exclude = FALSE, filter_specimen = NA, - output_logical = TRUE, type = "keyantibiotics", ignore_I = TRUE, - points_threshold = 2, info = TRUE, col_bactid = NA, - col_genus = NA, col_species = NA) +first_isolate(tbl, col_date = NULL, col_patient_id = NULL, + col_mo = NULL, col_testcode = NULL, col_specimen = NULL, + col_icu = NULL, col_keyantibiotics = NULL, episode_days = 365, + testcodes_exclude = NULL, icu_exclude = FALSE, + filter_specimen = NULL, output_logical = TRUE, + type = "keyantibiotics", ignore_I = TRUE, points_threshold = 2, + info = TRUE, col_bactid = NULL, col_genus = NULL, + col_species = NULL) } \arguments{ \item{tbl}{a \code{data.frame} containing isolates.} -\item{col_date}{column name of the result date (or date that is was received on the lab)} +\item{col_date}{column name of the result date (or date that is was received on the lab), defaults to the first column of class \code{Date}} -\item{col_patient_id}{column name of the unique IDs of the patients} +\item{col_patient_id}{column name of the unique IDs of the patients, defaults to the first column that starts with 'patient' (case insensitive)} -\item{col_mo}{column name of the unique IDs of the microorganisms, see \code{\link{mo}}. If this column has another class than \code{"mo"}, values will be coerced using \code{\link{as.mo}}.} +\item{col_mo}{column name of the unique IDs of the microorganisms (see \code{\link{mo}}), defaults to the first column of class \code{mo}. If this column has another class than \code{"mo"}, values will be coerced using \code{\link{as.mo}}.} \item{col_testcode}{column name of the test codes. Use \code{col_testcode = NA} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored. Supports tidyverse-like quotation.} diff --git a/man/freq.Rd b/man/freq.Rd index 1dbec7be..06ed0b58 100755 --- a/man/freq.Rd +++ b/man/freq.Rd @@ -10,11 +10,12 @@ frequency_tbl(x, ..., sort.count = TRUE, nmax = getOption("max.print.freq"), na.rm = TRUE, row.names = TRUE, markdown = !interactive(), digits = 2, quote = FALSE, - header = !markdown, sep = " ") + header = !markdown, na = "", sep = " ") freq(x, ..., sort.count = TRUE, nmax = getOption("max.print.freq"), na.rm = TRUE, row.names = TRUE, markdown = !interactive(), - digits = 2, quote = FALSE, header = !markdown, sep = " ") + digits = 2, quote = FALSE, header = !markdown, na = "", + sep = " ") top_freq(f, n) @@ -30,7 +31,7 @@ top_freq(f, n) \item{nmax}{number of row to print. The default, \code{15}, uses \code{\link{getOption}("max.print.freq")}. Use \code{nmax = 0}, \code{nmax = Inf}, \code{nmax = NULL} or \code{nmax = NA} to print all rows.} -\item{na.rm}{a logical value indicating whether \code{NA} values should be removed from the frequency table. The header will always print the amount of \code{NA}s.} +\item{na.rm}{a logical value indicating whether \code{NA} values should be removed from the frequency table. The header (if set) will always print the amount of \code{NA}s.} \item{row.names}{a logical value indicating whether row indices should be printed as \code{1:nrow(x)}} @@ -42,6 +43,8 @@ top_freq(f, n) \item{header}{a logical value indicating whether an informative header should be printed} +\item{na}{a character string to should be used to show empty (\code{NA}) values (only useful when \code{na.rm = FALSE})} + \item{sep}{a character string to separate the terms when selecting multiple columns} \item{f}{a frequency table} diff --git a/man/key_antibiotics.Rd b/man/key_antibiotics.Rd index 386e3b3f..498a1ab2 100755 --- a/man/key_antibiotics.Rd +++ b/man/key_antibiotics.Rd @@ -20,7 +20,7 @@ key_antibiotics_equal(x, y, type = c("keyantibiotics", "points"), \arguments{ \item{tbl}{table with antibiotics coloms, like \code{amox} and \code{amcl}.} -\item{col_mo}{column name of the unique IDs of the microorganisms, see \code{\link{mo}}. If this column has another class than \code{"mo"}, values will be coerced using \code{\link{as.mo}}.} +\item{col_mo}{column name of the unique IDs of the microorganisms (see \code{\link{mo}}), defaults to the first column of class \code{mo}. If this column has another class than \code{"mo"}, values will be coerced using \code{\link{as.mo}}.} \item{universal_1, universal_2, universal_3, universal_4, universal_5, universal_6}{column names of \strong{broad-spectrum} antibiotics, case-insensitive} diff --git a/tests/testthat/test-first_isolate.R b/tests/testthat/test-first_isolate.R index c3a5cab6..f9b2a5a7 100755 --- a/tests/testthat/test-first_isolate.R +++ b/tests/testthat/test-first_isolate.R @@ -119,11 +119,18 @@ test_that("first isolates work", { # errors expect_error(first_isolate("date", "patient_id", col_mo = "mo")) - expect_error(first_isolate(septic_patients)) expect_error(first_isolate(septic_patients, col_date = "non-existing col", col_mo = "mo")) + # look for columns itself + expect_message(first_isolate(septic_patients)) + expect_message(first_isolate(septic_patients %>% + mutate(mo = as.character(mo)) %>% + left_join_microorganisms(), + col_genus = "genus", + col_species = "species")) + # if mo is not an mo class, result should be the same expect_identical(septic_patients %>% mutate(mo = as.character(mo)) %>% diff --git a/tests/testthat/test-mdro.R b/tests/testthat/test-mdro.R index 7e7e721f..9a5b0339 100755 --- a/tests/testthat/test-mdro.R +++ b/tests/testthat/test-mdro.R @@ -20,7 +20,7 @@ test_that("MDRO works", { # septic_patients should have these finding using Dutch guidelines expect_equal(outcome %>% freq() %>% pull(count), - c(2, 14)) # 2 unconfirmed, 14 positive + c(1167, 817, 14, 2)) # 1167 not eval., 817 neg, 14 pos, 2 unconfirmed expect_equal(BRMO(septic_patients, info = FALSE), MDRO(septic_patients, "nl", info = FALSE))