From 0b8084871dff8554588da30eedabad746b074bf8 Mon Sep 17 00:00:00 2001 From: "Matthijs S. Berends" Date: Sat, 22 Dec 2018 22:39:34 +0100 Subject: [PATCH] dplyr 0.8.0 support, fixes #7 --- DESCRIPTION | 4 +- NAMESPACE | 12 +- NEWS.md | 23 ++- R/age.R | 32 +-- R/deprecated.R | 51 ----- R/eucast_rules.R | 24 +-- R/first_isolate.R | 225 +++++++++++++-------- R/freq.R | 236 +++++++++++++---------- R/key_antibiotics.R | 42 ++-- R/mdro.R | 22 +-- R/misc.R | 10 +- R/mo.R | 4 +- R/resistance_predict.R | 17 +- R/rsi_calc.R | 2 +- man/AMR-deprecated.Rd | 9 - man/age.Rd | 8 +- man/age_groups.Rd | 6 +- man/eucast_rules.Rd | 4 +- man/first_isolate.Rd | 80 +++++--- man/freq.Rd | 14 +- man/key_antibiotics.Rd | 18 +- man/mdro.Rd | 5 +- man/resistance_predict.Rd | 8 +- tests/testthat/test-age.R | 8 +- tests/testthat/test-deprecated.R | 14 -- tests/testthat/test-first_isolate.R | 31 ++- tests/testthat/test-mdro.R | 4 +- tests/testthat/test-portion.R | 50 ----- tests/testthat/test-resistance_predict.R | 71 +++++++ 29 files changed, 555 insertions(+), 479 deletions(-) create mode 100644 tests/testthat/test-resistance_predict.R diff --git a/DESCRIPTION b/DESCRIPTION index d76ea4a0..e4009bc4 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 0.5.0.9005 -Date: 2018-12-15 +Version: 0.5.0.9007 +Date: 2018-12-22 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/NAMESPACE b/NAMESPACE index 1f82d17b..11acfd14 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,6 @@ # Generated by roxygen2: do not edit by hand S3method(as.data.frame,atc) -S3method(as.data.frame,bactid) S3method(as.data.frame,frequency_tbl) S3method(as.data.frame,mo) S3method(as.double,mic) @@ -21,13 +20,11 @@ S3method(plot,frequency_tbl) S3method(plot,mic) S3method(plot,rsi) S3method(print,atc) -S3method(print,bactid) S3method(print,frequency_tbl) S3method(print,mic) S3method(print,mo) S3method(print,rsi) S3method(pull,atc) -S3method(pull,bactid) S3method(pull,mo) S3method(skewness,data.frame) S3method(skewness,default) @@ -50,7 +47,6 @@ export(age) export(age_groups) export(anti_join_microorganisms) export(as.atc) -export(as.bactid) export(as.mic) export(as.mo) export(as.rsi) @@ -68,6 +64,8 @@ export(count_df) export(eucast_exceptional_phenotypes) export(eucast_rules) export(facet_rsi) +export(filter_first_isolate) +export(filter_first_weighted_isolate) export(first_isolate) export(freq) export(frequency_tbl) @@ -77,12 +75,10 @@ export(geom_rsi) export(get_locale) export(ggplot_rsi) export(guess_atc) -export(guess_bactid) export(guess_mo) export(inner_join_microorganisms) export(interpretive_reading) export(is.atc) -export(is.bactid) export(is.mic) export(is.mo) export(is.rsi) @@ -137,7 +133,6 @@ export(skewness) export(theme_rsi) export(top_freq) exportMethods(as.data.frame.atc) -exportMethods(as.data.frame.bactid) exportMethods(as.data.frame.frequency_tbl) exportMethods(as.data.frame.mo) exportMethods(as.double.mic) @@ -158,13 +153,11 @@ exportMethods(plot.frequency_tbl) exportMethods(plot.mic) exportMethods(plot.rsi) exportMethods(print.atc) -exportMethods(print.bactid) exportMethods(print.frequency_tbl) exportMethods(print.mic) exportMethods(print.mo) exportMethods(print.rsi) exportMethods(pull.atc) -exportMethods(pull.bactid) exportMethods(pull.mo) exportMethods(skewness) exportMethods(skewness.data.frame) @@ -214,6 +207,7 @@ importFrom(dplyr,left_join) importFrom(dplyr,mutate) importFrom(dplyr,mutate_all) importFrom(dplyr,mutate_at) +importFrom(dplyr,n) importFrom(dplyr,n_distinct) importFrom(dplyr,progress_estimated) importFrom(dplyr,pull) diff --git a/NEWS.md b/NEWS.md index 6f70e83f..96b766ed 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,10 +1,25 @@ # 0.5.0.90xx (latest development version) #### New +* **BREAKING**: removed deprecated functions, parameters and references to 'bactid'. Use `as.mo` to identify an MO code. +* Support for `dplyr` version 0.8.0 * Function `mo_failures` to review values that could not be coerced to a valid MO code, using `as.mo`. This latter function will now only show a maximum of 25 uncoerced values. * Function `mo_renamed` to get a list of all returned values from `as.mo` that have had taxonomic renaming * Function `age` to calculate the (patients) age in years -* Function `age_groups` to split ages into custom or predefined groups (like children or elderly). This allows for easier demographic antimicrobial resistance analysis (per age group). +* Function `age_groups` to split ages into custom or predefined groups (like children or elderly). This allows for easier demographic antimicrobial resistance analysis per age group. +* Functions `filter_first_isolate` and `filter_first_weighted_isolate()` to shorten and fasten filtering on data sets with antimicrobial results, e.g.: + ```r + septic_patients %>% filter_first_isolate() + # or + filter_first_isolate(septic_patients) + ``` + is the same as: + ```r + septic_patients %>% + mutate(only_firsts = first_isolate(septic_patients, ...)) %>% + filter(only_firsts == TRUE) %>% + select(-only_firsts) + ``` #### Changed * Improvements for `as.mo`: @@ -18,6 +33,8 @@ * Function `first_isolate`: * Will now use a column named like "patid" for the patient ID (parameter `col_patientid`), when this parameter was left blank * Will now use a column named like "key(...)ab" or "key(...)antibiotics" for the key antibiotics (parameter `col_keyantibiotics`), when this parameter was left blank + * Removed parameter `output_logical`, the function will now always return a logical value + * Renamed parameter `filter_specimen` to `specimen_group`, although using `filter_specimen` will still work * A note to the manual pages of the `portion` functions, that low counts can influence the outcome and that the `portion` functions may camouflage this, since they only return the portion (albeit being dependent on the `minimum` parameter) * Function `mo_taxonomy` now contains the kingdom too * Function `first_isolate` will now use a column named like "patid" for the patient ID, when this parameter was left blank @@ -29,7 +46,11 @@ * Now honours the `decimal.mark` setting, which just like `format` defaults to `getOption("OutDec")` * The new `big.mark` parameter will at default be `","` when `decimal.mark = "."` and `"."` otherwise * Fix for header text where all observations are `NA` + * New parameter `droplevels` to exclude empty factor levels when input is a factor + * Factor levels will be in header when present * Function `scale_y_percent` now has the `limits` parameter +* Automatic parameter filling for `mdro`, `key_antibiotics` and `eucast_rules` +* Updated examples for resistance prediction (`resistance_predict` function) #### Other * Updated licence text to emphasise GPL 2.0 and that this is an R package. diff --git a/R/age.R b/R/age.R index b1268344..3ab523cc 100644 --- a/R/age.R +++ b/R/age.R @@ -19,28 +19,28 @@ #' Age in years of individuals #' #' Calculates age in years based on a reference date, which is the sytem time at default. -#' @param x date(s) - will be coerced with \code{\link{as.POSIXlt}} -#' @param y reference date(s) - defaults to \code{\link{Sys.Date}} - will be coerced with \code{\link{as.POSIXlt}} +#' @param x date(s), will be coerced with \code{\link{as.POSIXlt}} +#' @param reference reference date(s) (defaults to today), will be coerced with \code{\link{as.POSIXlt}} #' @return Integer (no decimals) -#' @seealso age_groups +#' @seealso \code{\link{age_groups}} to splits age into groups #' @importFrom dplyr if_else #' @export -age <- function(x, y = Sys.Date()) { - if (length(x) != length(y)) { - if (length(y) == 1) { - y <- rep(y, length(x)) +age <- function(x, reference = Sys.Date()) { + if (length(x) != length(reference)) { + if (length(reference) == 1) { + reference <- rep(reference, length(x)) } else { - stop("`x` and `y` must be of same length, or `y` must be of length 1.") + stop("`x` and `reference` must be of same length, or `reference` must be of length 1.") } } x <- base::as.POSIXlt(x) - y <- base::as.POSIXlt(y) - if (any(y < x)) { - stop("`y` cannot be lower (older) than `x`.") + reference <- base::as.POSIXlt(reference) + if (any(reference < x)) { + stop("`reference` cannot be lower (older) than `x`.") } - years_gap <- y$year - x$year + years_gap <- reference$year - x$year # from https://stackoverflow.com/a/25450756/4575331 - ages <- if_else(y$mon < x$mon | (y$mon == x$mon & y$mday < x$mday), + ages <- if_else(reference$mon < x$mon | (reference$mon == x$mon & reference$mday < x$mday), as.integer(years_gap - 1), as.integer(years_gap)) if (any(ages > 120)) { @@ -51,9 +51,9 @@ age <- function(x, y = Sys.Date()) { #' Split ages into age groups #' -#' Splits ages into groups defined by the \code{split} parameter. +#' Split ages into age groups defined by the \code{split} parameter. This allows for easier demographic (antimicrobial resistance) analysis. #' @param x age, e.g. calculated with \code{\link{age}} -#' @param split_at values to split \code{x}, defaults to 0-11, 12-24, 26-54, 55-74 and 75+. See Details. +#' @param split_at values to split \code{x} at, defaults to age groups 0-11, 12-24, 26-54, 55-74 and 75+. See Details. #' @details To split ages, the input can be: #' \itemize{ #' \item{A numeric vector. A vector of \code{c(10, 20)} will split on 0-9, 10-19 and 20+. A value of only \code{50} will split on 0-49 and 50+. @@ -68,7 +68,7 @@ age <- function(x, y = Sys.Date()) { #' } #' @keywords age_group age #' @return Ordered \code{\link{factor}} -#' @seealso age +#' @seealso \code{\link{age}} to determine ages based on one or more reference dates #' @export #' @examples #' ages <- c(3, 8, 16, 54, 31, 76, 101, 43, 21) diff --git a/R/deprecated.R b/R/deprecated.R index 88e0b91f..f26f6119 100644 --- a/R/deprecated.R +++ b/R/deprecated.R @@ -23,57 +23,6 @@ #' @keywords internal #' @name AMR-deprecated #' @rdname AMR-deprecated -as.bactid <- function(...) { - .Deprecated("as.mo", package = "AMR") - as.mo(...) -} - -#' @rdname AMR-deprecated -#' @export -is.bactid <- function(...) { - .Deprecated(new = "is.mo", package = "AMR") - is.mo(...) -} - -#' @rdname AMR-deprecated -#' @export -guess_bactid <- function(...) { - .Deprecated(new = "guess_mo", package = "AMR") - guess_mo(...) -} - -#' @exportMethod print.bactid -#' @export -#' @noRd -print.bactid <- function(x, ...) { - cat("Class 'bactid'\n") - print.default(as.character(x), quote = FALSE) -} - -#' @exportMethod as.data.frame.bactid -#' @export -#' @noRd -as.data.frame.bactid <- function (x, ...) { - # same as as.data.frame.character but with removed stringsAsFactors - nm <- paste(deparse(substitute(x), width.cutoff = 500L), - collapse = " ") - if (!"nm" %in% names(list(...))) { - as.data.frame.vector(x, ..., nm = nm) - } else { - as.data.frame.vector(x, ...) - } -} - -#' @exportMethod pull.bactid -#' @export -#' @importFrom dplyr pull -#' @noRd -pull.bactid <- function(.data, ...) { - pull(as.data.frame(.data), ...) -} - -#' @rdname AMR-deprecated -#' @export ratio <- function(x, ratio) { .Deprecated(package = "AMR") diff --git a/R/eucast_rules.R b/R/eucast_rules.R index 8040bd2f..38167f36 100755 --- a/R/eucast_rules.R +++ b/R/eucast_rules.R @@ -24,7 +24,6 @@ #' @param rules a character vector that specifies which rules should be applied - one or more of \code{c("breakpoints", "expert", "other", "all")} #' @param verbose a logical to indicate whether extensive info should be returned as a \code{data.frame} with info about which rows and columns are effected #' @param amcl,amik,amox,ampi,azit,azlo,aztr,cefa,cfep,cfot,cfox,cfra,cfta,cftr,cfur,chlo,cipr,clar,clin,clox,coli,czol,dapt,doxy,erta,eryt,fosf,fusi,gent,imip,kana,levo,linc,line,mero,mezl,mino,moxi,nali,neom,neti,nitr,norf,novo,oflo,oxac,peni,pipe,pita,poly,pris,qida,rifa,roxi,siso,teic,tetr,tica,tige,tobr,trim,trsu,vanc column name of an antibiotic, see Antibiotics -#' @param col_bactid deprecated, use \code{col_mo} instead. #' @param ... parameters that are passed on to \code{eucast_rules} #' @inheritParams first_isolate #' @section Antibiotics: @@ -217,8 +216,7 @@ eucast_rules <- function(tbl, tobr = 'tobr', trim = 'trim', trsu = 'trsu', - vanc = 'vanc', - col_bactid = NULL) { + vanc = 'vanc') { EUCAST_VERSION_BREAKPOINTS <- "8.1, 2018" EUCAST_VERSION_EXPERT_RULES <- "3.1, 2016" @@ -229,12 +227,12 @@ eucast_rules <- function(tbl, # try to find columns based on type # -- mo - if (!is.null(col_bactid)) { - col_mo <- col_bactid - warning("Use of `col_bactid` is deprecated. Use `col_mo` instead.") - } else if (is.null(col_mo) & "mo" %in% lapply(tbl, class)) { - col_mo <- colnames(tbl)[lapply(tbl, class) == "mo"] - message("NOTE: Using column `", col_mo, "` as input for `col_mo`.") + if (is.null(col_mo) & "mo" %in% lapply(tbl, class)) { + col_mo <- colnames(tbl)[lapply(tbl, class) == "mo"][1] + message(blue(paste0("NOTE: Using column `", bold(col_mo), "` as input for `col_mo`."))) + } + if (is.null(col_mo)) { + stop("`col_mo` must be set.", call. = FALSE) } if (!all(rules %in% c("breakpoints", "expert", "other", "all"))) { @@ -1731,12 +1729,14 @@ eucast_rules <- function(tbl, } else { colour <- blue } + decimal.mark <- getOption("OutDec") + big.mark <- ifelse(decimal.mark != ",", ",", ".") cat(bold(paste('\n=> EUCAST rules', paste0(wouldve, 'affected'), - amount_affected_rows %>% length() %>% format(big.mark = ","), - 'out of', nrow(tbl_original) %>% format(big.mark = ","), + amount_affected_rows %>% length() %>% format(big.mark = big.mark, decimal.mark = decimal.mark), + 'out of', nrow(tbl_original) %>% format(big.mark = big.mark, decimal.mark = decimal.mark), 'rows ->', colour(paste0(wouldve, 'changed'), - amount_changed %>% format(big.mark = ","), 'test results.\n\n')))) + amount_changed %>% format(big.mark = big.mark, decimal.mark = decimal.mark), 'test results.\n\n')))) } if (verbose == TRUE) { diff --git a/R/first_isolate.R b/R/first_isolate.R index 9866b733..1b78ddc3 100755 --- a/R/first_isolate.R +++ b/R/first_isolate.R @@ -29,18 +29,32 @@ #' @param col_keyantibiotics column name of the key antibiotics to determine first \emph{weighted} isolates, see \code{\link{key_antibiotics}}. Defaults to the first column that starts with 'key' followed by 'ab' or 'antibiotics' (case insensitive). Use \code{col_keyantibiotics = FALSE} to prevent this. #' @param episode_days episode in days after which a genus/species combination will be determined as 'first isolate' again #' @param testcodes_exclude character vector with test codes that should be excluded (case-insensitive) -#' @param icu_exclude logical whether ICU isolates should be excluded -#' @param filter_specimen specimen group or type that should be excluded -#' @param output_logical return output as \code{logical} (will else be the values \code{0} or \code{1}) +#' @param icu_exclude logical whether ICU isolates should be excluded (rows with value \code{TRUE} in column \code{col_icu}) +#' @param specimen_group value in column \code{col_specimen} to filter on #' @param type type to determine weighed isolates; can be \code{"keyantibiotics"} or \code{"points"}, see Details #' @param ignore_I logical to determine whether antibiotic interpretations with \code{"I"} will be ignored when \code{type = "keyantibiotics"}, see Details #' @param points_threshold points until the comparison of key antibiotics will lead to inclusion of an isolate when \code{type = "points"}, see Details #' @param info print progress -#' @param col_bactid (deprecated, use \code{col_mo} instead) -#' @param col_genus (deprecated, use \code{col_mo} instead) column name of the genus of the microorganisms -#' @param col_species (deprecated, use \code{col_mo} instead) column name of the species of the microorganisms +#' @param ... parameters passed on to the \code{first_isolate} function #' @details \strong{WHY THIS IS SO IMPORTANT} \cr -#' To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https://www.ncbi.nlm.nih.gov/pubmed/17304462}{[1]}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}. +#' To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https://www.ncbi.nlm.nih.gov/pubmed/17304462}{[1]}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}. +#' +#' The function \code{filter_first_isolate} is essentially equal to: +#' \preformatted{ +#' tbl \%>\% +#' mutate(only_firsts = first_isolate(tbl, ...)) \%>\% +#' filter(only_firsts == TRUE) \%>\% +#' select(-only_firsts) +#' } +#' The function \code{filter_first_weighted_isolate} is essentially equal to: +#' \preformatted{ +#' tbl \%>\% +#' mutate(keyab = key_antibiotics(.)) \%>\% +#' mutate(only_weighted_firsts = first_isolate(tbl, +#' col_keyantibiotics = "keyab", ...)) \%>\% +#' filter(only_weighted_firsts == TRUE) \%>\% +#' select(-only_weighted_firsts) +#' } #' @section Key antibiotics: #' There are two ways to determine whether isolates can be included as first \emph{weighted} isolates which will give generally the same results: \cr #' @@ -49,31 +63,42 @@ #' #' \strong{2. Using} \code{type = "points"} \strong{and parameter} \code{points_threshold} \cr #' A difference from I to S|R (or vice versa) means 0.5 points, a difference from S to R (or vice versa) means 1 point. When the sum of points exceeds \code{points_threshold}, an isolate will be (re)selected as a first weighted isolate. +#' @rdname first_isolate #' @keywords isolate isolates first #' @seealso \code{\link{key_antibiotics}} #' @export #' @importFrom dplyr arrange_at lag between row_number filter mutate arrange -#' @return A vector to add to table, see Examples. +#' @importFrom crayon blue bold silver +#' @return Logical vector #' @source Methodology of this function is based on: \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition}, 2014, \emph{Clinical and Laboratory Standards Institute (CLSI)}. \url{https://clsi.org/standards/products/microbiology/documents/m39/}. #' @examples #' # septic_patients is a dataset available in the AMR package. It is true, genuine data. #' ?septic_patients #' #' library(dplyr) -#' my_patients <- septic_patients %>% +#' # Filter on first isolates: +#' septic_patients %>% #' mutate(first_isolate = first_isolate(., #' col_date = "date", #' col_patient_id = "patient_id", -#' col_mo = "mo")) +#' col_mo = "mo")) %>% +#' filter(first_isolate == TRUE) +#' +#' # Which can be shortened to: +#' septic_patients %>% +#' filter_first_isolate() +#' # or for first weighted isolates: +#' septic_patients %>% +#' filter_first_weighted_isolate() #' #' # Now let's see if first isolates matter: -#' A <- my_patients %>% +#' A <- septic_patients %>% #' group_by(hospital_id) %>% #' summarise(count = n_rsi(gent), # gentamicin availability #' resistance = portion_IR(gent)) # gentamicin resistance #' -#' B <- my_patients %>% -#' filter(first_isolate == TRUE) %>% # the 1st isolate filter +#' B <- septic_patients %>% +#' filter_first_weighted_isolate() %>% # the 1st isolate filter #' group_by(hospital_id) %>% #' summarise(count = n_rsi(gent), # gentamicin availability #' resistance = portion_IR(gent)) # gentamicin resistance @@ -83,6 +108,7 @@ #' # Gentamicin resitance in hospital D appears to be 5.4% higher than #' # when you (erroneously) would have used all isolates! #' +#' #' ## OTHER EXAMPLES: #' #' \dontrun{ @@ -99,29 +125,29 @@ #' #' tbl$first_blood_isolate <- #' first_isolate(tbl, -#' filter_specimen = 'Blood') +#' specimen_group = 'Blood') #' #' tbl$first_blood_isolate_weighed <- #' first_isolate(tbl, -#' filter_specimen = 'Blood', +#' specimen_group = 'Blood', #' col_keyantibiotics = 'keyab') #' #' tbl$first_urine_isolate <- #' first_isolate(tbl, -#' filter_specimen = 'Urine') +#' specimen_group = 'Urine') #' #' tbl$first_urine_isolate_weighed <- #' first_isolate(tbl, -#' filter_specimen = 'Urine', +#' specimen_group = 'Urine', #' col_keyantibiotics = 'keyab') #' #' tbl$first_resp_isolate <- #' first_isolate(tbl, -#' filter_specimen = 'Respiratory') +#' specimen_group = 'Respiratory') #' #' tbl$first_resp_isolate_weighed <- #' first_isolate(tbl, -#' filter_specimen = 'Respiratory', +#' specimen_group = 'Respiratory', #' col_keyantibiotics = 'keyab') #' } first_isolate <- function(tbl, @@ -135,28 +161,34 @@ first_isolate <- function(tbl, episode_days = 365, testcodes_exclude = NULL, icu_exclude = FALSE, - filter_specimen = NULL, - output_logical = TRUE, + specimen_group = NULL, type = "keyantibiotics", ignore_I = TRUE, points_threshold = 2, info = TRUE, - col_bactid = NULL, - col_genus = NULL, - col_species = NULL) { + ...) { if (!is.data.frame(tbl)) { - stop("`tbl` must be a data frame.", call. = FALSE) + stop("`tbl` must be a data.frame.", call. = FALSE) + } + + dots <- unlist(list(...)) + if (length(dots) != 0) { + # backwards compatibility with old parameters + dots.names <- dots %>% names() + if ('filter_specimen' %in% dots.names) { + specimen_group <- dots[which(dots.names == 'filter_specimen')] + } } # try to find columns based on type # -- mo - if (!is.null(col_bactid)) { - col_mo <- col_bactid - warning("Use of `col_bactid` is deprecated. Use `col_mo` instead.") - } else if (is.null(col_mo) & "mo" %in% lapply(tbl, class)) { + if (is.null(col_mo) & "mo" %in% lapply(tbl, class)) { col_mo <- colnames(tbl)[lapply(tbl, class) == "mo"][1] - message("NOTE: Using column `", col_mo, "` as input for `col_mo`.") + message(blue(paste0("NOTE: Using column `", bold(col_mo), "` as input for `col_mo`."))) + } + if (is.null(col_mo)) { + stop("`col_mo` must be set.", call. = FALSE) } # -- date @@ -164,7 +196,7 @@ first_isolate <- function(tbl, for (i in 1:ncol(tbl)) { if ("Date" %in% class(tbl %>% pull(i)) | "POSIXct" %in% class(tbl %>% pull(i))) { col_date <- colnames(tbl)[i] - message("NOTE: Using column `", col_date, "` as input for `col_date`.") + message(blue(paste0("NOTE: Using column `", bold(col_date), "` as input for `col_date`."))) break } } @@ -178,7 +210,7 @@ first_isolate <- function(tbl, # -- patient id if (is.null(col_patient_id) & any(colnames(tbl) %like% "^(patient|patid)")) { col_patient_id <- colnames(tbl)[colnames(tbl) %like% "^(patient|patid)"][1] - message("NOTE: Using column `", col_patient_id, "` as input for `col_patient_id`.") + message(blue(paste0("NOTE: Using column `", bold(col_patient_id), "` as input for `col_patient_id`."))) } if (is.null(col_patient_id)) { stop("`col_patient_id` must be set.", call. = FALSE) @@ -187,18 +219,12 @@ first_isolate <- function(tbl, # -- key antibiotics if (is.null(col_keyantibiotics) & any(colnames(tbl) %like% "^key.*(ab|antibiotics)")) { col_keyantibiotics <- colnames(tbl)[colnames(tbl) %like% "^key.*(ab|antibiotics)"][1] - message("NOTE: Using column `", col_keyantibiotics, "` as input for `col_keyantibiotics`. Use col_keyantibiotics = FALSE to prevent this.") + message(blue(paste0("NOTE: Using column `", bold(col_keyantibiotics), "` as input for `col_keyantibiotics`. Use ", bold("col_keyantibiotics = FALSE"), " to prevent this."))) } if (isFALSE(col_keyantibiotics)) { col_keyantibiotics <- NULL } - # col_mo OR col_genus+col_species must be available - if (is.null(col_mo) & (is.null(col_genus) | is.null(col_species))) { - stop("`col_mo` or both `col_genus` and `col_species` must be set.", call. = FALSE) - } - - # check if columns exist check_columns_existance <- function(column, tblname = tbl) { if (NROW(tblname) <= 1 | NCOL(tblname) <= 1) { @@ -215,27 +241,23 @@ first_isolate <- function(tbl, check_columns_existance(col_date) check_columns_existance(col_patient_id) check_columns_existance(col_mo) - check_columns_existance(col_genus) - check_columns_existance(col_species) check_columns_existance(col_testcode) check_columns_existance(col_icu) check_columns_existance(col_keyantibiotics) - if (!is.null(col_mo)) { - # join to microorganisms data set - tbl <- tbl %>% - mutate_at(vars(col_mo), as.mo) %>% - left_join_microorganisms(by = col_mo) - col_genus <- "genus" - col_species <- "species" - } + # join to microorganisms data set + tbl <- tbl %>% + mutate_at(vars(col_mo), as.mo) %>% + left_join_microorganisms(by = col_mo) + col_genus <- "genus" + col_species <- "species" if (is.null(col_testcode)) { testcodes_exclude <- NULL } # remove testcodes if (!is.null(testcodes_exclude) & info == TRUE) { - cat('[Criteria] Excluded test codes:\n', toString(testcodes_exclude), '\n') + cat('[Criterion] Excluded test codes:\n', toString(testcodes_exclude), '\n') } if (is.null(col_icu)) { @@ -246,14 +268,14 @@ first_isolate <- function(tbl, } if (is.null(col_specimen)) { - filter_specimen <- NULL + specimen_group <- NULL } # filter on specimen group and keyantibiotics when they are filled in - if (!is.null(filter_specimen)) { + if (!is.null(specimen_group)) { check_columns_existance(col_specimen, tbl) if (info == TRUE) { - cat('[Criteria] Excluded other than specimen group \'', filter_specimen, '\'\n', sep = '') + cat('[Criterion] Excluded other than specimen group \'', specimen_group, '\'\n', sep = '') } } if (!is.null(col_keyantibiotics)) { @@ -274,11 +296,11 @@ first_isolate <- function(tbl, mutate(species = if_else(is.na(species) | species == "(no MO)", "", species), genus = if_else(is.na(genus) | genus == "(no MO)", "", genus)) - if (is.null(filter_specimen)) { + if (is.null(specimen_group)) { # not filtering on specimen if (icu_exclude == FALSE) { if (info == TRUE & !is.null(col_icu)) { - cat('[Criteria] Included isolates from ICU.\n') + cat('[Criterion] Included isolates from ICU.\n') } tbl <- tbl %>% arrange_at(c(col_patient_id, @@ -289,7 +311,7 @@ first_isolate <- function(tbl, row.end <- nrow(tbl) } else { if (info == TRUE) { - cat('[Criteria] Excluded isolates from ICU.\n') + cat('[Criterion] Excluded isolates from ICU.\n') } tbl <- tbl %>% arrange_at(c(col_icu, @@ -310,7 +332,7 @@ first_isolate <- function(tbl, # filtering on specimen and only analyse these row to save time if (icu_exclude == FALSE) { if (info == TRUE & !is.null(col_icu)) { - cat('[Criteria] Included isolates from ICU.\n') + cat('[Criterion] Included isolates from ICU.\n') } tbl <- tbl %>% arrange_at(c(col_specimen, @@ -319,14 +341,14 @@ first_isolate <- function(tbl, col_species, col_date)) suppressWarnings( - row.start <- which(tbl %>% pull(col_specimen) == filter_specimen) %>% min(na.rm = TRUE) + row.start <- which(tbl %>% pull(col_specimen) == specimen_group) %>% min(na.rm = TRUE) ) suppressWarnings( - row.end <- which(tbl %>% pull(col_specimen) == filter_specimen) %>% max(na.rm = TRUE) + row.end <- which(tbl %>% pull(col_specimen) == specimen_group) %>% max(na.rm = TRUE) ) } else { if (info == TRUE) { - cat('[Criteria] Excluded isolates from ICU.\n') + cat('[Criterion] Excluded isolates from ICU.\n') } tbl <- tbl %>% arrange_at(c(col_icu, @@ -336,11 +358,11 @@ first_isolate <- function(tbl, col_species, col_date)) suppressWarnings( - row.start <- which(tbl %>% pull(col_specimen) == filter_specimen + row.start <- which(tbl %>% pull(col_specimen) == specimen_group & tbl %>% pull(col_icu) == FALSE) %>% min(na.rm = TRUE) ) suppressWarnings( - row.end <- which(tbl %>% pull(col_specimen) == filter_specimen + row.end <- which(tbl %>% pull(col_specimen) == specimen_group & tbl %>% pull(col_icu) == FALSE) %>% max(na.rm = TRUE) ) } @@ -352,12 +374,10 @@ first_isolate <- function(tbl, message('No isolates found.') } # NAs where genus is unavailable - tbl <- tbl %>% - mutate(real_first_isolate = if_else(genus == '', NA, FALSE)) - if (output_logical == FALSE) { - tbl$real_first_isolate <- tbl %>% pull(real_first_isolate) %>% as.integer() - } - return(tbl %>% pull(real_first_isolate)) + return(tbl %>% + mutate(real_first_isolate = if_else(genus == '', NA, FALSE)) %>% + pull(real_first_isolate) + ) } # suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number()) @@ -388,14 +408,14 @@ first_isolate <- function(tbl, weighted.notice <- 'weighted ' if (info == TRUE) { if (type == 'keyantibiotics') { - cat('[Criteria] Inclusion based on key antibiotics, ') + cat('[Criterion] Inclusion based on key antibiotics, ') if (ignore_I == FALSE) { cat('not ') } cat('ignoring I.\n') } if (type == 'points') { - cat(paste0('[Criteria] Inclusion based on key antibiotics, using points threshold of ' + cat(paste0('[Criterion] Inclusion based on key antibiotics, using points threshold of ' , points_threshold, '.\n')) } } @@ -458,19 +478,62 @@ first_isolate <- function(tbl, pull(real_first_isolate) if (info == TRUE) { - message(paste0('Found ', - all_first %>% sum(na.rm = TRUE), - ' first ', weighted.notice, 'isolates (', - (all_first %>% sum(na.rm = TRUE) / scope.size) %>% percent(), - ' of isolates in scope [where genus was not empty] and ', - (all_first %>% sum(na.rm = TRUE) / tbl %>% nrow()) %>% percent(), - ' of total)')) - } - - if (output_logical == FALSE) { - all_first <- all_first %>% as.integer() + decimal.mark <- getOption("OutDec") + big.mark <- ifelse(decimal.mark != ",", ",", ".") + n_found <- base::sum(all_first, na.rm = TRUE) + p_found_total <- percent(n_found / nrow(tbl), force_zero = TRUE) + p_found_scope <- percent(n_found / scope.size, force_zero = TRUE) + # mark up number of found + n_found <- base::format(n_found, big.mark = big.mark, decimal.mark = decimal.mark) + if (p_found_total != p_found_scope) { + msg_txt <- paste0("=> Found ", + bold(paste0(n_found, " first ", weighted.notice, "isolates")), + " (", p_found_scope, " within scope and ", p_found_total, " of total)") + } else { + msg_txt <- paste0("=> Found ", + bold(paste0(n_found, " first ", weighted.notice, "isolates")), + " (", p_found_total, " of total)") + } + base::message(msg_txt) } all_first } + +#' @rdname first_isolate +#' @importFrom dplyr filter +#' @export +filter_first_isolate <- function(tbl, + col_date = NULL, + col_patient_id = NULL, + col_mo = NULL, + ...) { + filter(tbl, first_isolate(tbl = tbl, + col_date = col_date, + col_patient_id = col_patient_id, + col_mo = col_mo, + ...)) +} + +#' @rdname first_isolate +#' @importFrom dplyr %>% mutate filter +#' @export +filter_first_weighted_isolate <- function(tbl, + col_date = NULL, + col_patient_id = NULL, + col_mo = NULL, + col_keyantibiotics = NULL, + ...) { + tbl_keyab <- tbl %>% + mutate(keyab = suppressMessages(key_antibiotics(., + col_mo = col_mo, + ...))) %>% + mutate(firsts = first_isolate(., + col_date = col_date, + col_patient_id = col_patient_id, + col_mo = col_mo, + col_keyantibiotics = "keyab", + ...)) + tbl[which(tbl_keyab$firsts == TRUE),] +} diff --git a/R/freq.R b/R/freq.R index 9d8e2819..cbafcaf0 100755 --- a/R/freq.R +++ b/R/freq.R @@ -31,6 +31,7 @@ #' @param header a logical value indicating whether an informative header should be printed #' @param title text to show above frequency table, at default to tries to coerce from the variables passed to \code{x} #' @param na a character string to should be used to show empty (\code{NA}) values (only useful when \code{na.rm = FALSE}) +#' @param droplevels a logical value indicating whether in factors empty levels should be dropped #' @param sep a character string to separate the terms when selecting multiple columns #' @inheritParams base::format #' @param f a frequency table @@ -56,11 +57,12 @@ #' \item{Median, using \code{\link[stats]{median}}, with percentage since oldest} #' } #' +#' In factors, all factor levels that are not existing in the input data will be dropped. #' #' The function \code{top_freq} uses \code{\link[dplyr]{top_n}} internally and will include more than \code{n} rows if there are ties. #' @importFrom stats fivenum sd mad #' @importFrom grDevices boxplot.stats -#' @importFrom dplyr %>% arrange arrange_at desc filter_at funs group_by mutate mutate_at n_distinct pull select summarise tibble ungroup vars all_vars +#' @importFrom dplyr %>% arrange arrange_at desc filter_at funs group_by mutate mutate_at n n_distinct pull select summarise tibble ungroup vars all_vars #' @importFrom utils browseVignettes #' @importFrom hms is.hms #' @importFrom crayon red green silver @@ -183,6 +185,7 @@ frequency_tbl <- function(x, header = !markdown, title = NULL, na = "", + droplevels = TRUE, sep = " ", decimal.mark = getOption("OutDec"), big.mark = ifelse(decimal.mark != ",", ",", ".")) { @@ -190,23 +193,23 @@ frequency_tbl <- function(x, mult.columns <- 0 x.group = character(0) df <- NULL - + # x_haslevels <- !is.null(levels(x)) x.name <- NULL cols <- NULL - if (any(class(x) == 'list')) { + if (any(class(x) == "list")) { cols <- names(x) x <- as.data.frame(x, stringsAsFactors = FALSE) x.name <- "a list" - } else if (any(class(x) == 'matrix')) { + } else if (any(class(x) == "matrix")) { x <- as.data.frame(x, stringsAsFactors = FALSE) x.name <- "a matrix" cols <- colnames(x) - if (all(cols %like% 'V[0-9]')) { + if (all(cols %like% "V[0-9]")) { cols <- NULL } } - if (any(class(x) == 'data.frame')) { + if (any(class(x) == "data.frame")) { x.group <- group_vars(x) if (length(x.group) > 1) { x.group <- x.group[1L] @@ -225,13 +228,18 @@ frequency_tbl <- function(x, if (ndots < 10) { cols <- as.character(dots) if (!all(cols %in% colnames(x))) { - stop("one or more columns not found: `", paste(cols, collapse = "`, `"), '`', call. = FALSE) + stop("one or more columns not found: `", paste(cols, collapse = "`, `"), "`", call. = FALSE) } if (length(x.group) > 0) { x.group_cols <- c(x.group, cols) - df <- x %>% - group_by_at(vars(x.group_cols)) %>% - summarise(count = n()) + # if (droplevels == TRUE) { + # x <- x %>% mutate_at(vars(x.group_cols), droplevels) + # } + suppressWarnings( + df <- x %>% + group_by_at(vars(x.group_cols)) %>% + summarise(count = n()) + ) if (na.rm == TRUE) { df <- df %>% filter_at(vars(cols), all_vars(!is.na(.))) } @@ -250,16 +258,21 @@ frequency_tbl <- function(x, mutate_at(vars(x.group), funs(ifelse(lag(.) == ., "", .))) df[1, 1] <- df.topleft colnames(df)[1:2] <- c("group", "item") + + if (!is.null(levels(df$item)) & droplevels == TRUE) { + # is factor + df <- df %>% filter(count != 0) + } } if (length(cols) > 0) { x <- x[, cols] } } else if (ndots >= 10) { - stop('A maximum of 9 columns can be analysed at the same time.', call. = FALSE) + stop("A maximum of 9 columns can be analysed at the same time.", call. = FALSE) } else { cols <- NULL } - } else if (any(class(x) == 'table')) { + } else if (any(class(x) == "table")) { x <- as.data.frame(x, stringsAsFactors = FALSE) # now this DF contains 3 columns: the 2 vars and a Freq column # paste the first 2 cols and repeat them Freq times: @@ -274,18 +287,18 @@ frequency_tbl <- function(x, } if (!is.null(ncol(x))) { - if (ncol(x) == 1 & any(class(x) == 'data.frame')) { + if (ncol(x) == 1 & any(class(x) == "data.frame")) { x <- x %>% pull(1) } else if (ncol(x) < 10) { mult.columns <- ncol(x) x <- do.call(paste, c(x[colnames(x)], sep = sep)) } else { - stop('A maximum of 9 columns can be analysed at the same time.', call. = FALSE) + stop("A maximum of 9 columns can be analysed at the same time.", call. = FALSE) } } if (mult.columns > 1) { - NAs <- x[is.na(x) | x == trimws(strrep('NA ', mult.columns))] + NAs <- x[is.na(x) | x == trimws(strrep("NA ", mult.columns))] } else { NAs <- x[is.na(x)] } @@ -296,91 +309,109 @@ frequency_tbl <- function(x, class(x) <- x_class } - # if (sort.count == FALSE & 'factor' %in% class(x)) { - # warning("Sorting a factor sorts on factor level, not necessarily alphabetically.", call. = FALSE) - # } header_txt <- character(0) - markdown_line <- '' + markdown_line <- "" if (markdown == TRUE) { - markdown_line <- '\n' + markdown_line <- "\n" } - x_align <- 'l' + x_align <- "l" if (mult.columns > 0) { - header_txt <- header_txt %>% paste0(markdown_line, 'Columns: ', mult.columns) + header_txt <- header_txt %>% paste0(markdown_line, "Columns: ", mult.columns) } else { - header_txt <- header_txt %>% paste0(markdown_line, 'Class: ', class(x) %>% rev() %>% paste(collapse = " > ")) + header_txt <- header_txt %>% paste0(markdown_line, "Class: ", class(x) %>% rev() %>% paste(collapse = " > ")) if (!mode(x) %in% class(x)) { header_txt <- header_txt %>% paste0(silver(paste0(" (", mode(x), ")"))) } } if ((length(NAs) + length(x) > 0) > 0) { - na_txt <- paste0(NAs %>% length() %>% format(decimal.mark = decimal.mark, big.mark = big.mark), ' = ', + na_txt <- paste0(NAs %>% length() %>% format(decimal.mark = decimal.mark, big.mark = big.mark), " = ", (NAs %>% length() / (NAs %>% length() + x %>% length())) %>% percent(force_zero = TRUE, round = digits, decimal.mark = decimal.mark) %>% - sub('NaN', '0', ., fixed = TRUE)) + sub("NaN", "0", ., fixed = TRUE)) if (!na_txt %like% "^0 =") { na_txt <- red(na_txt) } else { na_txt <- green(na_txt) } - na_txt <- paste0('(of which NA: ', na_txt, ')') + na_txt <- paste0("(of which NA: ", na_txt, ")") } else { na_txt <- "" } - header_txt <- header_txt %>% paste0(markdown_line, '\nLength: ', (NAs %>% length() + x %>% length()) %>% format(decimal.mark = decimal.mark, big.mark = big.mark), - ' ', na_txt) - header_txt <- header_txt %>% paste0(markdown_line, '\nUnique: ', x %>% n_distinct() %>% format(decimal.mark = decimal.mark, big.mark = big.mark)) + if (!is.null(levels(x))) { + n_levels <- x %>% levels() %>% length() + n_levels_empty <- n_levels - x %>% droplevels() %>% levels() %>% length() + n_levels_list <- levels(x) + if (n_levels > 5) { + n_levels_list <- c(n_levels_list[1:5], "...") + } + if (is.ordered(x)) { + n_levels_list <- paste0(levels(x), collapse = " < ") + } else { + n_levels_list <- paste0(levels(x), collapse = ", ") + } + + header_txt <- header_txt %>% paste0(markdown_line, "\nLevels: ", n_levels_list) + # drop levels of non-existing factor values, + # since dplyr >= 0.8.0 does not do this anymore in group_by + if (droplevels == TRUE) { + x <- droplevels(x) + } + } + + header_txt <- header_txt %>% paste0(markdown_line, "\nLength: ", (NAs %>% length() + x %>% length()) %>% format(decimal.mark = decimal.mark, big.mark = big.mark), + " ", na_txt) + header_txt <- header_txt %>% paste0(markdown_line, "\nUnique: ", x %>% n_distinct() %>% format(decimal.mark = decimal.mark, big.mark = big.mark)) if (NROW(x) > 0 & any(class(x) == "character")) { - header_txt <- header_txt %>% paste0('\n') - header_txt <- header_txt %>% paste0(markdown_line, '\nShortest: ', x %>% base::nchar() %>% base::min(na.rm = TRUE) %>% format(decimal.mark = decimal.mark, big.mark = big.mark)) - header_txt <- header_txt %>% paste0(markdown_line, '\nLongest: ', x %>% base::nchar() %>% base::max(na.rm = TRUE) %>% format(decimal.mark = decimal.mark, big.mark = big.mark)) + header_txt <- header_txt %>% paste0("\n") + header_txt <- header_txt %>% paste0(markdown_line, "\nShortest: ", x %>% base::nchar() %>% base::min(na.rm = TRUE) %>% format(decimal.mark = decimal.mark, big.mark = big.mark)) + header_txt <- header_txt %>% paste0(markdown_line, "\nLongest: ", x %>% base::nchar() %>% base::max(na.rm = TRUE) %>% format(decimal.mark = decimal.mark, big.mark = big.mark)) } if (NROW(x) > 0 & any(class(x) == "mo")) { - header_txt <- header_txt %>% paste0('\n') - header_txt <- header_txt %>% paste0(markdown_line, '\nFamilies: ', x %>% mo_family() %>% n_distinct() %>% format(decimal.mark = decimal.mark, big.mark = big.mark)) - header_txt <- header_txt %>% paste0(markdown_line, '\nGenera: ', x %>% mo_genus() %>% n_distinct() %>% format(decimal.mark = decimal.mark, big.mark = big.mark)) - header_txt <- header_txt %>% paste0(markdown_line, '\nSpecies: ', x %>% mo_species() %>% n_distinct() %>% format(decimal.mark = decimal.mark, big.mark = big.mark)) + header_txt <- header_txt %>% paste0("\n") + header_txt <- header_txt %>% paste0(markdown_line, "\nFamilies: ", x %>% mo_family() %>% n_distinct() %>% format(decimal.mark = decimal.mark, big.mark = big.mark)) + header_txt <- header_txt %>% paste0(markdown_line, "\nGenera: ", x %>% mo_genus() %>% n_distinct() %>% format(decimal.mark = decimal.mark, big.mark = big.mark)) + header_txt <- header_txt %>% paste0(markdown_line, "\nSpecies: ", x %>% mo_species() %>% n_distinct() %>% format(decimal.mark = decimal.mark, big.mark = big.mark)) } if (NROW(x) > 0 & any(class(x) == "difftime") & !is.hms(x)) { - header_txt <- header_txt %>% paste0('\n') - header_txt <- header_txt %>% paste(markdown_line, '\nUnits: ', attributes(x)$units) + header_txt <- header_txt %>% paste0("\n") + header_txt <- header_txt %>% paste(markdown_line, "\nUnits: ", attributes(x)$units) x <- as.double(x) # after this, the numeric header_txt continues } - if (NROW(x) > 0 & any(class(x) %in% c('double', 'integer', 'numeric', 'raw', 'single'))) { + if (NROW(x) > 0 & any(class(x) %in% c("double", "integer", "numeric", "raw", "single"))) { # right align number Tukey_five <- stats::fivenum(x, na.rm = TRUE) - x_align <- 'r' - header_txt <- header_txt %>% paste0('\n') - header_txt <- header_txt %>% paste(markdown_line, '\nMean: ', x %>% base::mean(na.rm = TRUE) %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark)) - header_txt <- header_txt %>% paste0(markdown_line, '\nStd. dev.: ', x %>% stats::sd(na.rm = TRUE) %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark), - ' (CV: ', x %>% cv(na.rm = TRUE) %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark), - ', MAD: ', x %>% stats::mad(na.rm = TRUE) %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark), ')') - header_txt <- header_txt %>% paste0(markdown_line, '\nFive-Num: ', Tukey_five %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark) %>% trimws() %>% paste(collapse = ' | '), - ' (IQR: ', (Tukey_five[4] - Tukey_five[2]) %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark), - ', CQV: ', x %>% cqv(na.rm = TRUE) %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark), ')') + x_align <- "r" + header_txt <- header_txt %>% paste0("\n") + header_txt <- header_txt %>% paste(markdown_line, "\nMean: ", x %>% base::mean(na.rm = TRUE) %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark)) + header_txt <- header_txt %>% paste0(markdown_line, "\nStd. dev.: ", x %>% stats::sd(na.rm = TRUE) %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark), + " (CV: ", x %>% cv(na.rm = TRUE) %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark), + ", MAD: ", x %>% stats::mad(na.rm = TRUE) %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark), ")") + header_txt <- header_txt %>% paste0(markdown_line, "\nFive-Num: ", Tukey_five %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark) %>% trimws() %>% paste(collapse = " | "), + " (IQR: ", (Tukey_five[4] - Tukey_five[2]) %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark), + ", CQV: ", x %>% cqv(na.rm = TRUE) %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark), ")") outlier_length <- length(boxplot.stats(x)$out) - header_txt <- header_txt %>% paste0(markdown_line, '\nOutliers: ', outlier_length) + header_txt <- header_txt %>% paste0(markdown_line, "\nOutliers: ", outlier_length) if (outlier_length > 0) { - header_txt <- header_txt %>% paste0(' (unique count: ', boxplot.stats(x)$out %>% n_distinct(), ')') + header_txt <- header_txt %>% paste0(" (unique count: ", boxplot.stats(x)$out %>% n_distinct(), ")") } } if (NROW(x) > 0 & any(class(x) == "rsi")) { - header_txt <- header_txt %>% paste0('\n') + header_txt <- header_txt %>% paste0("\n") cnt_S <- sum(x == "S", na.rm = TRUE) cnt_IR <- sum(x %in% c("I", "R"), na.rm = TRUE) - header_txt <- header_txt %>% paste(markdown_line, '\n%IR: ', + header_txt <- header_txt %>% paste(markdown_line, "\n%IR: ", (cnt_IR / sum(!is.na(x), na.rm = TRUE)) %>% percent(force_zero = TRUE, round = digits, decimal.mark = decimal.mark), - paste0('(ratio S : IR = 1.0 : ', (cnt_IR / cnt_S) %>% format(digits = 1, nsmall = 1, decimal.mark = decimal.mark, big.mark = big.mark), ")")) + paste0("(ratio S : IR = 1.0 : ", (cnt_IR / cnt_S) %>% format(digits = 1, nsmall = 1, decimal.mark = decimal.mark, big.mark = big.mark), ")")) if (NROW(x) < 30) { - header_txt <- header_txt %>% paste(markdown_line, red('\nToo few isolates for reliable resistance interpretation.')) + header_txt <- header_txt %>% paste(markdown_line, red("\nToo few isolates for reliable resistance interpretation.")) } } @@ -389,29 +420,29 @@ frequency_tbl <- function(x, x <- x %>% as.POSIXlt() formatdates <- "%H:%M:%S" } - if (NROW(x) > 0 & any(class(x) %in% c('Date', 'POSIXct', 'POSIXlt'))) { - header_txt <- header_txt %>% paste0('\n') + if (NROW(x) > 0 & any(class(x) %in% c("Date", "POSIXct", "POSIXlt"))) { + header_txt <- header_txt %>% paste0("\n") mindate <- x %>% min(na.rm = TRUE) maxdate <- x %>% max(na.rm = TRUE) - maxdate_days <- difftime(maxdate, mindate, units = 'auto') %>% as.double() + maxdate_days <- difftime(maxdate, mindate, units = "auto") %>% as.double() mediandate <- x %>% median(na.rm = TRUE) - median_days <- difftime(mediandate, mindate, units = 'auto') %>% as.double() + median_days <- difftime(mediandate, mindate, units = "auto") %>% as.double() if (formatdates == "%H:%M:%S") { # hms - header_txt <- header_txt %>% paste0(markdown_line, '\nEarliest: ', mindate %>% format(formatdates) %>% trimws()) - header_txt <- header_txt %>% paste0(markdown_line, '\nLatest: ', maxdate %>% format(formatdates) %>% trimws(), - ' (+', difftime(maxdate, mindate, units = 'mins') %>% as.double() %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark), ' min.)') + header_txt <- header_txt %>% paste0(markdown_line, "\nEarliest: ", mindate %>% format(formatdates) %>% trimws()) + header_txt <- header_txt %>% paste0(markdown_line, "\nLatest: ", maxdate %>% format(formatdates) %>% trimws(), + " (+", difftime(maxdate, mindate, units = "mins") %>% as.double() %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark), " min.)") } else { # other date formats - header_txt <- header_txt %>% paste0(markdown_line, '\nOldest: ', mindate %>% format(formatdates) %>% trimws()) - header_txt <- header_txt %>% paste0(markdown_line, '\nNewest: ', maxdate %>% format(formatdates) %>% trimws(), - ' (+', difftime(maxdate, mindate, units = 'auto') %>% as.double() %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark), ')') + header_txt <- header_txt %>% paste0(markdown_line, "\nOldest: ", mindate %>% format(formatdates) %>% trimws()) + header_txt <- header_txt %>% paste0(markdown_line, "\nNewest: ", maxdate %>% format(formatdates) %>% trimws(), + " (+", difftime(maxdate, mindate, units = "auto") %>% as.double() %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark), ")") } - header_txt <- header_txt %>% paste0(markdown_line, '\nMedian: ', mediandate %>% format(formatdates) %>% trimws(), - ' (~', percent(median_days / maxdate_days, round = 0, decimal.mark = decimal.mark), ')') + header_txt <- header_txt %>% paste0(markdown_line, "\nMedian: ", mediandate %>% format(formatdates) %>% trimws(), + " (~", percent(median_days / maxdate_days, round = 0, decimal.mark = decimal.mark), ")") } - if (any(class(x) == 'POSIXlt')) { + if (any(class(x) == "POSIXlt")) { x <- x %>% format(formatdates) } @@ -427,9 +458,9 @@ frequency_tbl <- function(x, nmax <- length(x) } - column_names <- c('Item', 'Count', 'Percent', 'Cum. Count', 'Cum. Percent') - column_names_df <- c('item', 'count', 'percent', 'cum_count', 'cum_percent') - column_align <- c(x_align, 'r', 'r', 'r', 'r') + column_names <- c("Item", "Count", "Percent", "Cum. Count", "Cum. Percent") + column_names_df <- c("item", "count", "percent", "cum_count", "cum_percent") + column_align <- c(x_align, "r", "r", "r", "r") if (is.null(df)) { # create table with counts and percentages @@ -449,10 +480,10 @@ frequency_tbl <- function(x, column_align <- c("l", column_align) } - if (df$item %>% paste(collapse = ',') %like% '\033') { + if (df$item %>% paste(collapse = ",") %like% "\033") { # remove escape char # see https://en.wikipedia.org/wiki/Escape_character#ASCII_escape_character - df <- df %>% mutate(item = item %>% gsub('\033', ' ', ., fixed = TRUE)) + df <- df %>% mutate(item = item %>% gsub("\033", " ", ., fixed = TRUE)) } if (quote == TRUE) { @@ -475,9 +506,9 @@ frequency_tbl <- function(x, } if (markdown == TRUE) { - tbl_format <- 'markdown' + tbl_format <- "markdown" } else { - tbl_format <- 'pandoc' + tbl_format <- "pandoc" } if (!is.null(title)) { @@ -485,7 +516,7 @@ frequency_tbl <- function(x, } structure(.Data = df, - class = c('frequency_tbl', class(df)), + class = c("frequency_tbl", class(df)), opt = list(title = title, data = x.name, vars = cols, @@ -511,11 +542,11 @@ freq <- frequency_tbl #' @export #' @importFrom dplyr top_n pull top_freq <- function(f, n) { - if (!'frequency_tbl' %in% class(f)) { - stop('top_freq can only be applied to frequency tables', call. = FALSE) + if (!"frequency_tbl" %in% class(f)) { + stop("top_freq can only be applied to frequency tables", call. = FALSE) } if (!is.numeric(n) | length(n) != 1L) { - stop('For top_freq, `nmax` must be a number of length 1', call. = FALSE) + stop("For top_freq, `nmax` must be a number of length 1", call. = FALSE) } top <- f %>% top_n(n, count) vect <- top %>% pull(item) @@ -562,10 +593,10 @@ diff.frequency_tbl <- function(x, y, ...) { diff.percent = percent( diff / count.x, force_zero = TRUE)) %>% - mutate(diff = ifelse(diff %like% '^-', + mutate(diff = ifelse(diff %like% "^-", diff, paste0("+", diff)), - diff.percent = ifelse(diff.percent %like% '^-', + diff.percent = ifelse(diff.percent %like% "^-", diff.percent, paste0("+", diff.percent))) @@ -590,7 +621,7 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default = big.mark = ifelse(decimal.mark != ",", ",", "."), ...) { - opt <- attr(x, 'opt') + opt <- attr(x, "opt") if (length(opt$vars) == 0) { opt$vars <- NULL @@ -666,7 +697,7 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default = } if (NROW(x) == 0) { - cat('\n\nNo observations.\n') + cat("\n\nNo observations.\n") return(invisible()) } @@ -680,7 +711,7 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default = if (nrow(x) > opt$nmax & opt$tbl_format != "markdown") { x.rows <- nrow(x) - x.unprinted <- base::sum(x[(opt$nmax + 1):nrow(x), 'count'], na.rm = TRUE) + x.unprinted <- base::sum(x[(opt$nmax + 1):nrow(x), "count"], na.rm = TRUE) x.printed <- base::sum(x$count) - x.unprinted if (opt$nmax.set == TRUE) { @@ -692,18 +723,18 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default = x <- x[1:nmax,] if (opt$nmax.set == TRUE) { - footer <- paste('[ reached `nmax = ', opt$nmax, '`', sep = '') + footer <- paste("[ reached `nmax = ", opt$nmax, "`", sep = "") } else { footer <- '[ reached getOption("max.print.freq")' } footer <- paste(footer, - ' -- omitted ', + " -- omitted ", format(x.rows - opt$nmax, big.mark = opt$big.mark), - ' entries, n = ', + " entries, n = ", format(x.unprinted, big.mark = opt$big.mark), - ' (', + " (", (x.unprinted / (x.unprinted + x.printed)) %>% percent(force_zero = TRUE, decimal.mark = opt$decimal.mark), - ') ]\n', sep = '') + ") ]\n", sep = "") if (opt$tbl_format == "pandoc") { footer <- silver(footer) # only silver in regular printing } @@ -712,7 +743,7 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default = } if ("item" %in% colnames(x)) { - if (any(class(x$item) %in% c('double', 'integer', 'numeric', 'raw', 'single'))) { + if (any(class(x$item) %in% c("double", "integer", "numeric", "raw", "single"))) { x$item <- format(x$item, decimal.mark = opt$decimal.mark, big.mark = opt$big.mark) } } else { @@ -720,7 +751,7 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default = } if ("count" %in% colnames(x)) { if (all(x$count == 1)) { - warning('All observations are unique.', call. = FALSE) + warning("All observations are unique.", call. = FALSE) } x$count <- format(x$count, decimal.mark = opt$decimal.mark, big.mark = opt$big.mark) } else { @@ -762,7 +793,7 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default = if (opt$tbl_format == "markdown") { cat("\n\n") } else { - cat('\n') + cat("\n") } # reset old kable setting @@ -775,8 +806,8 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default = #' @exportMethod as.data.frame.frequency_tbl #' @export as.data.frame.frequency_tbl <- function(x, ...) { - attr(x, 'package') <- NULL - attr(x, 'opt') <- NULL + attr(x, "package") <- NULL + attr(x, "opt") <- NULL as.data.frame.data.frame(x, ...) } @@ -785,8 +816,8 @@ as.data.frame.frequency_tbl <- function(x, ...) { #' @export #' @importFrom dplyr as_tibble as_tibble.frequency_tbl <- function(x, validate = TRUE, ..., rownames = NA) { - attr(x, 'package') <- NULL - attr(x, 'opt') <- NULL + attr(x, "package") <- NULL + attr(x, "opt") <- NULL as_tibble(x = as.data.frame(x), validate = validate, ..., rownames = rownames) } @@ -794,10 +825,10 @@ as_tibble.frequency_tbl <- function(x, validate = TRUE, ..., rownames = NA) { #' @exportMethod hist.frequency_tbl #' @export #' @importFrom graphics hist -hist.frequency_tbl <- function(x, breaks = "Sturges", main = NULL, ...) { - opt <- attr(x, 'opt') +hist.frequency_tbl <- function(x, breaks = "Sturges", main = NULL, xlab = NULL, ...) { + opt <- attr(x, "opt") if (!class(x$item) %in% c("numeric", "double", "integer", "Date")) { - stop("'x' must be numeric or Date.", call. = FALSE) + stop("`x` must be numeric or Date.", call. = FALSE) } if (!is.null(opt$vars)) { title <- opt$vars @@ -814,14 +845,17 @@ hist.frequency_tbl <- function(x, breaks = "Sturges", main = NULL, ...) { if (is.null(main)) { main <- paste("Histogram of", title) } - hist(x, main = main, xlab = title, ...) + if (is.null(xlab)) { + xlab <- title + } + hist(x, main = main, xlab = xlab, breaks = breaks, ...) } #' @noRd #' @exportMethod plot.frequency_tbl #' @export plot.frequency_tbl <- function(x, y, ...) { - opt <- attr(x, 'opt') + opt <- attr(x, "opt") if (!is.null(opt$vars)) { title <- opt$vars } else { @@ -841,7 +875,7 @@ as.vector.frequency_tbl <- function(x, mode = "any") { #' @exportMethod format.frequency_tbl #' @export format.frequency_tbl <- function(x, digits = 1, ...) { - opt <- attr(x, 'opt') + opt <- attr(x, "opt") if (opt$nmax.set == TRUE) { nmax <- opt$nmax } else { diff --git a/R/key_antibiotics.R b/R/key_antibiotics.R index b23cfe6d..ab524357 100644 --- a/R/key_antibiotics.R +++ b/R/key_antibiotics.R @@ -26,6 +26,7 @@ #' @param GramPos_1,GramPos_2,GramPos_3,GramPos_4,GramPos_5,GramPos_6 column names of antibiotics for \strong{Gram positives}, case-insensitive #' @param GramNeg_1,GramNeg_2,GramNeg_3,GramNeg_4,GramNeg_5,GramNeg_6 column names of antibiotics for \strong{Gram negatives}, case-insensitive #' @param warnings give warning about missing antibiotic columns, they will anyway be ignored +#' @param ... other parameters passed on to function #' @details The function \code{key_antibiotics} returns a character vector with 12 antibiotic results for every isolate. These isolates can then be compared using \code{key_antibiotics_equal}, to check if two isolates have generally the same antibiogram. Missing and invalid values are replaced with a dot (\code{"."}). The \code{\link{first_isolate}} function only uses this function on the same microbial species from the same patient. Using this, an MRSA will be included after a susceptible \emph{S. aureus} (MSSA) found within the same episode (see \code{episode} parameter of \code{\link{first_isolate}}). Without key antibiotic comparison it wouldn't. #' #' At default, the antibiotics that are used for \strong{Gram positive bacteria} are (colum names): \cr @@ -40,22 +41,21 @@ #' @rdname key_antibiotics #' @export #' @importFrom dplyr %>% mutate if_else +#' @importFrom crayon blue bold #' @seealso \code{\link{first_isolate}} #' @examples #' # septic_patients is a dataset available in the AMR package #' ?septic_patients -#' my_patients <- septic_patients -#' + #' library(dplyr) #' # set key antibiotics to a new variable -#' my_patients <- my_patients %>% +#' my_patients <- septic_patients %>% #' mutate(keyab = key_antibiotics(.)) %>% #' mutate( #' # now calculate first isolates -#' first_regular = first_isolate(., "date", "patient_id", "mo"), +#' first_regular = first_isolate(., col_keyantibiotics = FALSE), #' # and first WEIGHTED isolates -#' first_weighted = first_isolate(., "date", "patient_id", "mo", -#' col_keyantibiotics = "keyab") +#' first_weighted = first_isolate(., col_keyantibiotics = "keyab") #' ) #' #' # Check the difference, in this data set it results in 7% more isolates: @@ -68,12 +68,12 @@ #' strainB <- "SSSIRSSSRSSS" #' #' key_antibiotics_equal(strainA, strainB) -#' # TRUE, because I is ignored (as are missing values) +#' # TRUE, because I is ignored (as well as missing values) #' #' key_antibiotics_equal(strainA, strainB, ignore_I = FALSE) #' # FALSE, because I is not ignored and so the 4th value differs key_antibiotics <- function(tbl, - col_mo = "mo", + col_mo = NULL, universal_1 = "amox", universal_2 = "amcl", universal_3 = "cfur", @@ -93,14 +93,16 @@ key_antibiotics <- function(tbl, GramNeg_5 = "cfta", GramNeg_6 = "mero", warnings = TRUE, - col_bactid = "bactid") { + ...) { - if (col_bactid %in% colnames(tbl)) { - col_mo <- col_bactid - warning("Use of `col_bactid` is deprecated. Use `col_mo` instead.") + # try to find columns based on type + # -- mo + if (is.null(col_mo) & "mo" %in% lapply(tbl, class)) { + col_mo <- colnames(tbl)[lapply(tbl, class) == "mo"][1] + message(blue(paste0("NOTE: Using column `", bold(col_mo), "` as input for `col_mo`."))) } - if (!col_mo %in% colnames(tbl)) { - stop('Column ', col_mo, ' not found.', call. = FALSE) + if (is.null(col_mo)) { + stop("`col_mo` must be set.", call. = FALSE) } # check columns @@ -140,13 +142,11 @@ key_antibiotics <- function(tbl, GramNeg_4, GramNeg_5, GramNeg_6) gram_negative <- gram_negative[!is.na(gram_negative)] - if (!tbl %>% pull(col_mo) %>% is.mo()) { - tbl[, col_mo] <- as.mo(tbl[, col_mo]) - } - # join microorganisms - tbl <- tbl %>% left_join_microorganisms(col_mo) - - tbl$key_ab <- NA_character_ + # join to microorganisms data set + tbl <- tbl %>% + mutate_at(vars(col_mo), as.mo) %>% + left_join_microorganisms(by = col_mo) %>% + mutate(key_ab = NA_character_) # Gram + tbl <- tbl %>% mutate(key_ab = diff --git a/R/mdro.R b/R/mdro.R index d922a370..7bf1e526 100755 --- a/R/mdro.R +++ b/R/mdro.R @@ -30,7 +30,7 @@ #' @return Ordered factor with levels \code{Negative < Positive, unconfirmed < Positive}. #' @rdname mdro #' @importFrom dplyr %>% -#' @importFrom crayon red blue +#' @importFrom crayon red blue bold #' @export #' @examples #' library(dplyr) @@ -101,8 +101,7 @@ mdro <- function(tbl, tobr = 'tobr', trim = 'trim', trsu = 'trsu', - vanc = 'vanc', - col_bactid = NULL) { + vanc = 'vanc') { if (!is.data.frame(tbl)) { stop("`tbl` must be a data frame.", call. = FALSE) @@ -110,14 +109,12 @@ mdro <- function(tbl, # try to find columns based on type # -- mo - if (!is.null(col_bactid)) { - col_mo <- col_bactid - warning("Use of `col_bactid` is deprecated. Use `col_mo` instead.") - } else if (is.null(col_mo) & "mo" %in% lapply(tbl, class)) { + if (is.null(col_mo) & "mo" %in% lapply(tbl, class)) { col_mo <- colnames(tbl)[lapply(tbl, class) == "mo"][1] - message("NOTE: Using column `", col_mo, "` as input for `col_mo`.") - } else if (!col_mo %in% colnames(tbl)) { - stop('Column ', col_mo, ' not found.', call. = FALSE) + message(blue(paste0("NOTE: Using column `", bold(col_mo), "` as input for `col_mo`."))) + } + if (is.null(col_mo)) { + stop("`col_mo` must be set.", call. = FALSE) } # strip whitespaces @@ -259,11 +256,8 @@ mdro <- function(tbl, } } - if (!tbl %>% pull(col_mo) %>% is.mo()) { - tbl[, col_mo] <- as.mo(tbl[, col_mo]) - } - tbl <- tbl %>% + mutate_at(vars(col_mo), as.mo) %>% # join to microorganisms data set left_join_microorganisms(by = col_mo) %>% # add unconfirmed to where genus is available diff --git a/R/misc.R b/R/misc.R index d10c0fd1..65925d0c 100755 --- a/R/misc.R +++ b/R/misc.R @@ -26,15 +26,19 @@ addin_insert_like <- function() { rstudioapi::insertText(" %like% ") } +# No export, no Rd +# works exactly like round(), but rounds `round(0.55, 1)` as 0.6 +round2 <- function(x, digits = 0) { + # https://stackoverflow.com/a/12688836/4575331 + (trunc((abs(x) * 10 ^ digits) + 0.5) / 10 ^ digits) * sign(x) +} + # No export, no Rd percent <- function(x, round = 1, force_zero = FALSE, decimal.mark = getOption("OutDec"), ...) { decimal.mark.options <- getOption("OutDec") options(OutDec = ".") - # https://stackoverflow.com/a/12688836/4575331 - round2 <- function(x, n) (trunc((abs(x) * 10 ^ n) + 0.5) / 10 ^ n) * sign(x) - val <- round2(x, round + 2) # round up 0.5 val <- round(x = val * 100, digits = round) # remove floating point error diff --git a/R/mo.R b/R/mo.R index 8c6a096c..d9669550 100644 --- a/R/mo.R +++ b/R/mo.R @@ -154,9 +154,7 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE, #' @rdname as.mo #' @export is.mo <- function(x) { - # bactid for older releases - # remove when is.bactid will be removed - identical(class(x), "mo") | identical(class(x), "bactid") + identical(class(x), "mo") } #' @rdname as.mo diff --git a/R/resistance_predict.R b/R/resistance_predict.R index 7d8933ad..b88a0b3b 100644 --- a/R/resistance_predict.R +++ b/R/resistance_predict.R @@ -44,7 +44,7 @@ #' @rdname resistance_predict #' @export #' @importFrom stats predict glm lm -#' @importFrom dplyr %>% pull mutate group_by_at summarise filter n_distinct arrange case_when +#' @importFrom dplyr %>% pull mutate mutate_at n group_by_at summarise filter filter_at all_vars n_distinct arrange case_when # @importFrom tidyr spread #' @examples #' \dontrun{ @@ -83,11 +83,11 @@ #' if (!require(ggplot2)) { #' #' data <- septic_patients %>% -#' filter(mo == "ESCCOL") %>% +#' filter(mo == as.mo("E. coli")) %>% #' resistance_predict(col_ab = "amox", -#' col_date = "date", -#' info = FALSE, -#' minimum = 15) +#' col_date = "date", +#' info = FALSE, +#' minimum = 15) #' #' ggplot(data, #' aes(x = year)) + @@ -137,9 +137,10 @@ resistance_predict <- function(tbl, tbl[, col_ab] <- gsub('I', 'R', tbl %>% pull(col_ab)) } - if (!tbl %>% pull(col_ab) %>% is.rsi()) { - tbl[, col_ab] <- tbl %>% pull(col_ab) %>% as.rsi() - } + tbl <- tbl %>% + mutate_at(col_ab, as.rsi) %>% + filter_at(col_ab, all_vars(!is.na(.))) + tbl[, col_ab] <- droplevels(tbl[, col_ab]) year <- function(x) { if (all(grepl('^[0-9]{4}$', x))) { diff --git a/R/rsi_calc.R b/R/rsi_calc.R index 24799870..c9b17a7c 100644 --- a/R/rsi_calc.R +++ b/R/rsi_calc.R @@ -73,7 +73,7 @@ rsi_calc <- function(..., print_warning <- FALSE type_trans <- as.integer(as.rsi(type)) - type_others <- setdiff(1:3, type_trans) + type_others <- base::setdiff(1:3, type_trans) if (is.data.frame(x)) { rsi_integrity_check <- character(0) diff --git a/man/AMR-deprecated.Rd b/man/AMR-deprecated.Rd index 85a24ded..fee1cb82 100644 --- a/man/AMR-deprecated.Rd +++ b/man/AMR-deprecated.Rd @@ -2,18 +2,9 @@ % Please edit documentation in R/deprecated.R \name{AMR-deprecated} \alias{AMR-deprecated} -\alias{as.bactid} -\alias{is.bactid} -\alias{guess_bactid} \alias{ratio} \title{Deprecated functions} \usage{ -as.bactid(...) - -is.bactid(...) - -guess_bactid(...) - ratio(x, ratio) } \description{ diff --git a/man/age.Rd b/man/age.Rd index d0ef60a1..4371b883 100644 --- a/man/age.Rd +++ b/man/age.Rd @@ -4,12 +4,12 @@ \alias{age} \title{Age in years of individuals} \usage{ -age(x, y = Sys.Date()) +age(x, reference = Sys.Date()) } \arguments{ -\item{x}{date(s) - will be coerced with \code{\link{as.POSIXlt}}} +\item{x}{date(s), will be coerced with \code{\link{as.POSIXlt}}} -\item{y}{reference date(s) - defaults to \code{\link{Sys.Date}} - will be coerced with \code{\link{as.POSIXlt}}} +\item{reference}{reference date(s) (defaults to today), will be coerced with \code{\link{as.POSIXlt}}} } \value{ Integer (no decimals) @@ -18,5 +18,5 @@ Integer (no decimals) Calculates age in years based on a reference date, which is the sytem time at default. } \seealso{ -age_groups +\code{\link{age_groups}} to splits age into groups } diff --git a/man/age_groups.Rd b/man/age_groups.Rd index e00e8c16..8c837ae8 100644 --- a/man/age_groups.Rd +++ b/man/age_groups.Rd @@ -9,13 +9,13 @@ age_groups(x, split_at = c(12, 25, 55, 75)) \arguments{ \item{x}{age, e.g. calculated with \code{\link{age}}} -\item{split_at}{values to split \code{x}, defaults to 0-11, 12-24, 26-54, 55-74 and 75+. See Details.} +\item{split_at}{values to split \code{x} at, defaults to age groups 0-11, 12-24, 26-54, 55-74 and 75+. See Details.} } \value{ Ordered \code{\link{factor}} } \description{ -Splits ages into groups defined by the \code{split} parameter. +Split ages into age groups defined by the \code{split} parameter. This allows for easier demographic (antimicrobial resistance) analysis. } \details{ To split ages, the input can be: @@ -65,7 +65,7 @@ septic_patients \%>\% ggplot_rsi(x = "age_group") } \seealso{ -age +\code{\link{age}} to determine ages based on one or more reference dates } \keyword{age} \keyword{age_group} diff --git a/man/eucast_rules.Rd b/man/eucast_rules.Rd index 9199f001..adddf147 100644 --- a/man/eucast_rules.Rd +++ b/man/eucast_rules.Rd @@ -40,7 +40,7 @@ eucast_rules(tbl, col_mo = NULL, info = TRUE, pita = "pita", poly = "poly", pris = "pris", qida = "qida", rifa = "rifa", roxi = "roxi", siso = "siso", teic = "teic", tetr = "tetr", tica = "tica", tige = "tige", tobr = "tobr", - trim = "trim", trsu = "trsu", vanc = "vanc", col_bactid = NULL) + trim = "trim", trsu = "trsu", vanc = "vanc") EUCAST_rules(...) @@ -59,8 +59,6 @@ interpretive_reading(...) \item{amcl, amik, amox, ampi, azit, azlo, aztr, cefa, cfep, cfot, cfox, cfra, cfta, cftr, cfur, chlo, cipr, clar, clin, clox, coli, czol, dapt, doxy, erta, eryt, fosf, fusi, gent, imip, kana, levo, linc, line, mero, mezl, mino, moxi, nali, neom, neti, nitr, norf, novo, oflo, oxac, peni, pipe, pita, poly, pris, qida, rifa, roxi, siso, teic, tetr, tica, tige, tobr, trim, trsu, vanc}{column name of an antibiotic, see Antibiotics} -\item{col_bactid}{deprecated, use \code{col_mo} instead.} - \item{...}{parameters that are passed on to \code{eucast_rules}} } \value{ diff --git a/man/first_isolate.Rd b/man/first_isolate.Rd index 68239b91..fb0698ce 100755 --- a/man/first_isolate.Rd +++ b/man/first_isolate.Rd @@ -2,6 +2,8 @@ % Please edit documentation in R/first_isolate.R \name{first_isolate} \alias{first_isolate} +\alias{filter_first_isolate} +\alias{filter_first_weighted_isolate} \title{Determine first (weighted) isolates} \source{ Methodology of this function is based on: \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition}, 2014, \emph{Clinical and Laboratory Standards Institute (CLSI)}. \url{https://clsi.org/standards/products/microbiology/documents/m39/}. @@ -11,10 +13,15 @@ first_isolate(tbl, col_date = NULL, col_patient_id = NULL, col_mo = NULL, col_testcode = NULL, col_specimen = NULL, col_icu = NULL, col_keyantibiotics = NULL, episode_days = 365, testcodes_exclude = NULL, icu_exclude = FALSE, - filter_specimen = NULL, output_logical = TRUE, - type = "keyantibiotics", ignore_I = TRUE, points_threshold = 2, - info = TRUE, col_bactid = NULL, col_genus = NULL, - col_species = NULL) + specimen_group = NULL, type = "keyantibiotics", ignore_I = TRUE, + points_threshold = 2, info = TRUE, ...) + +filter_first_isolate(tbl, col_date = NULL, col_patient_id = NULL, + col_mo = NULL, ...) + +filter_first_weighted_isolate(tbl, col_date = NULL, + col_patient_id = NULL, col_mo = NULL, col_keyantibiotics = NULL, + ...) } \arguments{ \item{tbl}{a \code{data.frame} containing isolates.} @@ -37,11 +44,9 @@ first_isolate(tbl, col_date = NULL, col_patient_id = NULL, \item{testcodes_exclude}{character vector with test codes that should be excluded (case-insensitive)} -\item{icu_exclude}{logical whether ICU isolates should be excluded} +\item{icu_exclude}{logical whether ICU isolates should be excluded (rows with value \code{TRUE} in column \code{col_icu})} -\item{filter_specimen}{specimen group or type that should be excluded} - -\item{output_logical}{return output as \code{logical} (will else be the values \code{0} or \code{1})} +\item{specimen_group}{value in column \code{col_specimen} to filter on} \item{type}{type to determine weighed isolates; can be \code{"keyantibiotics"} or \code{"points"}, see Details} @@ -51,21 +56,34 @@ first_isolate(tbl, col_date = NULL, col_patient_id = NULL, \item{info}{print progress} -\item{col_bactid}{(deprecated, use \code{col_mo} instead)} - -\item{col_genus}{(deprecated, use \code{col_mo} instead) column name of the genus of the microorganisms} - -\item{col_species}{(deprecated, use \code{col_mo} instead) column name of the species of the microorganisms} +\item{...}{parameters passed on to the \code{first_isolate} function} } \value{ -A vector to add to table, see Examples. +Logical vector } \description{ Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type. } \details{ \strong{WHY THIS IS SO IMPORTANT} \cr - To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https://www.ncbi.nlm.nih.gov/pubmed/17304462}{[1]}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}. +To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https://www.ncbi.nlm.nih.gov/pubmed/17304462}{[1]}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}. + +The function \code{filter_first_isolate} is essentially equal to: +\preformatted{ + tbl \%>\% + mutate(only_firsts = first_isolate(tbl, ...)) \%>\% + filter(only_firsts == TRUE) \%>\% + select(-only_firsts) +} +The function \code{filter_first_weighted_isolate} is essentially equal to: +\preformatted{ + tbl \%>\% + mutate(keyab = key_antibiotics(.)) \%>\% + mutate(only_weighted_firsts = first_isolate(tbl, + col_keyantibiotics = "keyab", ...)) \%>\% + filter(only_weighted_firsts == TRUE) \%>\% + select(-only_weighted_firsts) +} } \section{Key antibiotics}{ @@ -83,20 +101,29 @@ Determine first (weighted) isolates of all microorganisms of every patient per e ?septic_patients library(dplyr) -my_patients <- septic_patients \%>\% +# Filter on first isolates: +septic_patients \%>\% mutate(first_isolate = first_isolate(., col_date = "date", col_patient_id = "patient_id", - col_mo = "mo")) + col_mo = "mo")) \%>\% + filter(first_isolate == TRUE) + +# Which can be shortened to: +septic_patients \%>\% + filter_first_isolate() +# or for first weighted isolates: +septic_patients \%>\% + filter_first_weighted_isolate() # Now let's see if first isolates matter: -A <- my_patients \%>\% +A <- septic_patients \%>\% group_by(hospital_id) \%>\% summarise(count = n_rsi(gent), # gentamicin availability resistance = portion_IR(gent)) # gentamicin resistance -B <- my_patients \%>\% - filter(first_isolate == TRUE) \%>\% # the 1st isolate filter +B <- septic_patients \%>\% + filter_first_weighted_isolate() \%>\% # the 1st isolate filter group_by(hospital_id) \%>\% summarise(count = n_rsi(gent), # gentamicin availability resistance = portion_IR(gent)) # gentamicin resistance @@ -106,6 +133,7 @@ B <- my_patients \%>\% # Gentamicin resitance in hospital D appears to be 5.4\% higher than # when you (erroneously) would have used all isolates! + ## OTHER EXAMPLES: \dontrun{ @@ -122,29 +150,29 @@ tbl$first_isolate_weighed <- tbl$first_blood_isolate <- first_isolate(tbl, - filter_specimen = 'Blood') + specimen_group = 'Blood') tbl$first_blood_isolate_weighed <- first_isolate(tbl, - filter_specimen = 'Blood', + specimen_group = 'Blood', col_keyantibiotics = 'keyab') tbl$first_urine_isolate <- first_isolate(tbl, - filter_specimen = 'Urine') + specimen_group = 'Urine') tbl$first_urine_isolate_weighed <- first_isolate(tbl, - filter_specimen = 'Urine', + specimen_group = 'Urine', col_keyantibiotics = 'keyab') tbl$first_resp_isolate <- first_isolate(tbl, - filter_specimen = 'Respiratory') + specimen_group = 'Respiratory') tbl$first_resp_isolate_weighed <- first_isolate(tbl, - filter_specimen = 'Respiratory', + specimen_group = 'Respiratory', col_keyantibiotics = 'keyab') } } diff --git a/man/freq.Rd b/man/freq.Rd index cd478b27..e2b792eb 100755 --- a/man/freq.Rd +++ b/man/freq.Rd @@ -10,15 +10,16 @@ frequency_tbl(x, ..., sort.count = TRUE, nmax = getOption("max.print.freq"), na.rm = TRUE, row.names = TRUE, markdown = !interactive(), digits = 2, quote = FALSE, - header = !markdown, title = NULL, na = "", sep = " ", - decimal.mark = getOption("OutDec"), big.mark = ifelse(decimal.mark != - ",", ",", ".")) + header = !markdown, title = NULL, na = "", droplevels = TRUE, + sep = " ", decimal.mark = getOption("OutDec"), + big.mark = ifelse(decimal.mark != ",", ",", ".")) freq(x, ..., sort.count = TRUE, nmax = getOption("max.print.freq"), na.rm = TRUE, row.names = TRUE, markdown = !interactive(), digits = 2, quote = FALSE, header = !markdown, title = NULL, - na = "", sep = " ", decimal.mark = getOption("OutDec"), - big.mark = ifelse(decimal.mark != ",", ",", ".")) + na = "", droplevels = TRUE, sep = " ", + decimal.mark = getOption("OutDec"), big.mark = ifelse(decimal.mark != + ",", ",", ".")) top_freq(f, n) @@ -52,6 +53,8 @@ top_freq(f, n) \item{na}{a character string to should be used to show empty (\code{NA}) values (only useful when \code{na.rm = FALSE})} +\item{droplevels}{a logical value indicating whether in factors empty levels should be dropped} + \item{sep}{a character string to separate the terms when selecting multiple columns} \item{decimal.mark}{% @@ -94,6 +97,7 @@ For dates and times of any class, these additional values will be calculated wit \item{Median, using \code{\link[stats]{median}}, with percentage since oldest} } +In factors, all factor levels that are not existing in the input data will be dropped. The function \code{top_freq} uses \code{\link[dplyr]{top_n}} internally and will include more than \code{n} rows if there are ties. } diff --git a/man/key_antibiotics.Rd b/man/key_antibiotics.Rd index 70891a0d..a8382380 100755 --- a/man/key_antibiotics.Rd +++ b/man/key_antibiotics.Rd @@ -5,14 +5,13 @@ \alias{key_antibiotics_equal} \title{Key antibiotics for first \emph{weighted} isolates} \usage{ -key_antibiotics(tbl, col_mo = "mo", universal_1 = "amox", +key_antibiotics(tbl, col_mo = NULL, universal_1 = "amox", universal_2 = "amcl", universal_3 = "cfur", universal_4 = "pita", universal_5 = "cipr", universal_6 = "trsu", GramPos_1 = "vanc", GramPos_2 = "teic", GramPos_3 = "tetr", GramPos_4 = "eryt", GramPos_5 = "oxac", GramPos_6 = "rifa", GramNeg_1 = "gent", GramNeg_2 = "tobr", GramNeg_3 = "coli", GramNeg_4 = "cfot", - GramNeg_5 = "cfta", GramNeg_6 = "mero", warnings = TRUE, - col_bactid = "bactid") + GramNeg_5 = "cfta", GramNeg_6 = "mero", warnings = TRUE, ...) key_antibiotics_equal(x, y, type = c("keyantibiotics", "points"), ignore_I = TRUE, points_threshold = 2, info = FALSE) @@ -30,7 +29,7 @@ key_antibiotics_equal(x, y, type = c("keyantibiotics", "points"), \item{warnings}{give warning about missing antibiotic columns, they will anyway be ignored} -\item{col_bactid}{(deprecated, use \code{col_mo} instead)} +\item{...}{other parameters passed on to function} \item{x, y}{characters to compare} @@ -71,18 +70,15 @@ The function \code{key_antibiotics} returns a character vector with 12 antibioti \examples{ # septic_patients is a dataset available in the AMR package ?septic_patients -my_patients <- septic_patients - library(dplyr) # set key antibiotics to a new variable -my_patients <- my_patients \%>\% +my_patients <- septic_patients \%>\% mutate(keyab = key_antibiotics(.)) \%>\% mutate( # now calculate first isolates - first_regular = first_isolate(., "date", "patient_id", "mo"), + first_regular = first_isolate(., col_keyantibiotics = FALSE), # and first WEIGHTED isolates - first_weighted = first_isolate(., "date", "patient_id", "mo", - col_keyantibiotics = "keyab") + first_weighted = first_isolate(., col_keyantibiotics = "keyab") ) # Check the difference, in this data set it results in 7\% more isolates: @@ -95,7 +91,7 @@ strainA <- "SSSRR.S.R..S" strainB <- "SSSIRSSSRSSS" key_antibiotics_equal(strainA, strainB) -# TRUE, because I is ignored (as are missing values) +# TRUE, because I is ignored (as well as missing values) key_antibiotics_equal(strainA, strainB, ignore_I = FALSE) # FALSE, because I is not ignored and so the 4th value differs diff --git a/man/mdro.Rd b/man/mdro.Rd index 2e374a10..32291ab6 100644 --- a/man/mdro.Rd +++ b/man/mdro.Rd @@ -22,8 +22,7 @@ mdro(tbl, country = NULL, col_mo = NULL, info = TRUE, peni = "peni", pipe = "pipe", pita = "pita", poly = "poly", qida = "qida", rifa = "rifa", roxi = "roxi", siso = "siso", teic = "teic", tetr = "tetr", tica = "tica", tige = "tige", - tobr = "tobr", trim = "trim", trsu = "trsu", vanc = "vanc", - col_bactid = NULL) + tobr = "tobr", trim = "trim", trsu = "trsu", vanc = "vanc") brmo(..., country = "nl") @@ -160,8 +159,6 @@ eucast_exceptional_phenotypes(tbl, country = "EUCAST", ...) \item{vanc}{column name of an antibiotic, see Antibiotics} -\item{col_bactid}{deprecated, use \code{col_mo} instead.} - \item{...}{parameters that are passed on to methods} } \value{ diff --git a/man/resistance_predict.Rd b/man/resistance_predict.Rd index 7f9e872e..65849223 100644 --- a/man/resistance_predict.Rd +++ b/man/resistance_predict.Rd @@ -89,11 +89,11 @@ septic_patients \%>\% if (!require(ggplot2)) { data <- septic_patients \%>\% - filter(mo == "ESCCOL") \%>\% + filter(mo == as.mo("E. coli")) \%>\% resistance_predict(col_ab = "amox", - col_date = "date", - info = FALSE, - minimum = 15) + col_date = "date", + info = FALSE, + minimum = 15) ggplot(data, aes(x = year)) + diff --git a/tests/testthat/test-age.R b/tests/testthat/test-age.R index e42311ad..04264963 100644 --- a/tests/testthat/test-age.R +++ b/tests/testthat/test-age.R @@ -20,17 +20,17 @@ context("age.R") test_that("age works", { expect_equal(age(x = c("1980-01-01", "1985-01-01", "1990-01-01"), - y = "2019-01-01"), + reference = "2019-01-01"), c(39, 34, 29)) expect_error(age(x = c("1980-01-01", "1985-01-01", "1990-01-01"), - y = c("2019-01-01", "2019-01-01"))) + reference = c("2019-01-01", "2019-01-01"))) expect_error(age(x = c("1980-01-01", "1985-01-01", "1990-01-01"), - y = "1975-01-01")) + reference = "1975-01-01")) expect_warning(age(x = c("1800-01-01", "1805-01-01", "1810-01-01"), - y = "2019-01-01")) + reference = "2019-01-01")) }) test_that("age_groups works", { diff --git a/tests/testthat/test-deprecated.R b/tests/testthat/test-deprecated.R index ef08be81..49b4c2f0 100644 --- a/tests/testthat/test-deprecated.R +++ b/tests/testthat/test-deprecated.R @@ -20,11 +20,6 @@ context("deprecated.R") test_that("deprecated functions work", { - expect_identical(is.mo(as.mo("esco")), suppressWarnings(is.bactid(as.bactid("esco")))) - expect_warning(identical(is.mo(as.mo("esco")), is.bactid(as.bactid("esco")))) - - expect_identical(as.mo("esco"), suppressWarnings(guess_bactid("esco"))) - expect_error(suppressWarnings(ratio("A"))) expect_error(suppressWarnings(ratio(1, ratio = "abc"))) expect_error(suppressWarnings(ratio(c(1, 2), ratio = c(1, 2, 3)))) @@ -32,13 +27,4 @@ test_that("deprecated functions work", { expect_identical(suppressWarnings(ratio(c(772, 1611, 737), ratio = "1:2:1")), c(780, 1560, 780)) expect_identical(suppressWarnings(ratio(c(1752, 1895), ratio = c(1, 1))), c(1823.5, 1823.5)) - old_mo <- "ESCCOL" - class(old_mo) <- "bactid" - df_oldmo <- data.frame(test = old_mo) - # print - expect_output(print(old_mo)) - # test pull - library(dplyr) - expect_identical(df_oldmo %>% pull(test), old_mo) - }) diff --git a/tests/testthat/test-first_isolate.R b/tests/testthat/test-first_isolate.R index f580b61b..80196198 100755 --- a/tests/testthat/test-first_isolate.R +++ b/tests/testthat/test-first_isolate.R @@ -19,7 +19,7 @@ context("first_isolate.R") test_that("first isolates work", { - # septic_patients contains 1331 out of 2000 first isolates + # septic_patients contains 1315 out of 2000 first isolates expect_equal( sum( first_isolate(tbl = septic_patients, @@ -139,16 +139,15 @@ test_that("first isolates work", { mutate(first = first_isolate(., "date", "patient_id", col_mo = "mo", col_specimen = "specimen", - filter_specimen = "something_unexisting", - output_logical = FALSE))) + filter_specimen = "something_unexisting"))) # printing of exclusion message expect_output(septic_patients %>% - first_isolate(col_date = "date", - col_mo = "mo", - col_patient_id = "patient_id", - col_testcode = "gender", - testcodes_exclude = "M")) + first_isolate(col_date = "date", + col_mo = "mo", + col_patient_id = "patient_id", + col_testcode = "gender", + testcodes_exclude = "M")) # errors expect_error(first_isolate("date", "patient_id", col_mo = "mo")) @@ -158,18 +157,16 @@ test_that("first isolates work", { # look for columns itself expect_message(first_isolate(septic_patients)) - expect_message(first_isolate(septic_patients %>% - mutate(mo = as.character(mo)) %>% - left_join_microorganisms(), - col_genus = "genus", - col_species = "species")) + expect_error(first_isolate(septic_patients %>% + mutate(mo = as.character(mo)) %>% + left_join_microorganisms())) # if mo is not an mo class, result should be the same expect_identical(septic_patients %>% - mutate(mo = as.character(mo)) %>% - first_isolate(col_date = "date", - col_mo = "mo", - col_patient_id = "patient_id"), + mutate(mo = as.character(mo)) %>% + first_isolate(col_date = "date", + col_mo = "mo", + col_patient_id = "patient_id"), septic_patients %>% first_isolate(col_date = "date", col_mo = "mo", diff --git a/tests/testthat/test-mdro.R b/tests/testthat/test-mdro.R index d53bd301..ef4e334e 100755 --- a/tests/testthat/test-mdro.R +++ b/tests/testthat/test-mdro.R @@ -21,8 +21,8 @@ context("mdro.R") test_that("mdro works", { library(dplyr) - expect_error(suppressWarnings(mdro(septic_patients, "invalid", col_bactid = "mo", info = TRUE))) - expect_error(suppressWarnings(mdro(septic_patients, "fr", col_bactid = "mo", info = TRUE))) + expect_error(suppressWarnings(mdro(septic_patients, "invalid", col_mo = "mo", info = TRUE))) + expect_error(suppressWarnings(mdro(septic_patients, "fr", info = TRUE))) expect_error(suppressWarnings(mdro(septic_patients, country = c("de", "nl"), info = TRUE))) expect_error(suppressWarnings(mdro(septic_patients, col_mo = "invalid", info = TRUE))) diff --git a/tests/testthat/test-portion.R b/tests/testthat/test-portion.R index 9dfd85a6..a62a79e2 100755 --- a/tests/testthat/test-portion.R +++ b/tests/testthat/test-portion.R @@ -147,53 +147,3 @@ test_that("old rsi works", { }) - -test_that("prediction of rsi works", { - amox_R <- septic_patients %>% - filter(mo == "B_ESCHR_COL") %>% - rsi_predict(col_ab = "amox", - col_date = "date", - minimum = 10, - info = TRUE) %>% - pull("value") - # amox resistance will increase according to data set `septic_patients` - expect_true(amox_R[3] < amox_R[20]) - - expect_output(rsi_predict(tbl = filter(septic_patients, mo == "B_ESCHR_COL"), - model = "binomial", - col_ab = "amox", - col_date = "date", - info = TRUE)) - expect_output(rsi_predict(tbl = filter(septic_patients, mo == "B_ESCHR_COL"), - model = "loglin", - col_ab = "amox", - col_date = "date", - info = TRUE)) - expect_output(rsi_predict(tbl = filter(septic_patients, mo == "B_ESCHR_COL"), - model = "lin", - col_ab = "amox", - col_date = "date", - info = TRUE)) - - expect_error(rsi_predict(tbl = filter(septic_patients, mo == "B_ESCHR_COL"), - model = "INVALID MODEL", - col_ab = "amox", - col_date = "date", - info = TRUE)) - expect_error(rsi_predict(tbl = filter(septic_patients, mo == "B_ESCHR_COL"), - col_ab = "NOT EXISTING COLUMN", - col_date = "date", - info = TRUE)) - expect_error(rsi_predict(tbl = filter(septic_patients, mo == "B_ESCHR_COL"), - col_ab = "amox", - col_date = "NOT EXISTING COLUMN", - info = TRUE)) - # almost all E. coli are mero S in the Netherlands :) - expect_error(resistance_predict(tbl = filter(septic_patients, mo == "B_ESCHR_COL"), - col_ab = "mero", - col_date = "date", - info = TRUE)) - - expect_error(portion_df(c("A", "B", "C"))) - expect_error(portion_df(septic_patients[,"date"])) -}) diff --git a/tests/testthat/test-resistance_predict.R b/tests/testthat/test-resistance_predict.R new file mode 100644 index 00000000..dbdc1a6f --- /dev/null +++ b/tests/testthat/test-resistance_predict.R @@ -0,0 +1,71 @@ +# ==================================================================== # +# TITLE # +# Antimicrobial Resistance (AMR) Analysis # +# # +# AUTHORS # +# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) # +# # +# LICENCE # +# This package is free software; you can redistribute it and/or modify # +# it under the terms of the GNU General Public License version 2.0, # +# as published by the Free Software Foundation. # +# # +# This R package is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License version 2.0 for more details. # +# ==================================================================== # + +context("portion.R") + +test_that("prediction of rsi works", { + amox_R <- septic_patients %>% + filter(mo == "B_ESCHR_COL") %>% + rsi_predict(col_ab = "amox", + col_date = "date", + minimum = 10, + info = TRUE) %>% + pull("value") + # amox resistance will increase according to data set `septic_patients` + expect_true(amox_R[3] < amox_R[20]) + + library(dplyr) + + expect_output(rsi_predict(tbl = filter(septic_patients, mo == "B_ESCHR_COL"), + model = "binomial", + col_ab = "amox", + col_date = "date", + info = TRUE)) + expect_output(rsi_predict(tbl = filter(septic_patients, mo == "B_ESCHR_COL"), + model = "loglin", + col_ab = "amox", + col_date = "date", + info = TRUE)) + expect_output(rsi_predict(tbl = filter(septic_patients, mo == "B_ESCHR_COL"), + model = "lin", + col_ab = "amox", + col_date = "date", + info = TRUE)) + + expect_error(rsi_predict(tbl = filter(septic_patients, mo == "B_ESCHR_COL"), + model = "INVALID MODEL", + col_ab = "amox", + col_date = "date", + info = TRUE)) + expect_error(rsi_predict(tbl = filter(septic_patients, mo == "B_ESCHR_COL"), + col_ab = "NOT EXISTING COLUMN", + col_date = "date", + info = TRUE)) + expect_error(rsi_predict(tbl = filter(septic_patients, mo == "B_ESCHR_COL"), + col_ab = "amox", + col_date = "NOT EXISTING COLUMN", + info = TRUE)) + # almost all E. coli are mero S in the Netherlands :) + expect_error(resistance_predict(tbl = filter(septic_patients, mo == "B_ESCHR_COL"), + col_ab = "mero", + col_date = "date", + info = TRUE)) + + expect_error(portion_df(c("A", "B", "C"))) + expect_error(portion_df(septic_patients[,"date"])) +})