diff --git a/DESCRIPTION b/DESCRIPTION index 06b1d567..a531a7e1 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 0.4.0.9001 -Date: 2018-10-02 +Version: 0.4.0.9002 +Date: 2018-10-12 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/NAMESPACE b/NAMESPACE index b42e9d62..c99c60e9 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -63,6 +63,7 @@ export(count_IR) export(count_R) export(count_S) export(count_SI) +export(count_all) export(count_df) export(facet_rsi) export(first_isolate) diff --git a/NEWS.md b/NEWS.md index c31a682e..a263e706 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,11 +1,23 @@ # 0.4.0.90xx (latest development version) #### New +* Function `count_all` to get all available isolates (that like all `portion_*` and `count_*` functions also supports `summarise` and `group_by`), the old `n_rsi` is now an alias of `count_all` #### Changed +* Fix for `portion_*(..., as_percent = TRUE)` when minimal amount of isolates would not be met +* Using `portion_*` functions now throws a warning when total available isolate is below parameter `minimum` +* `as.mo` will not set package name as attribute anymore * Check for `hms::is.hms` in frequency tables * Removed diacritics from all authors (columns `microorganisms$ref` and `microorganisms.old$ref`) to comply with CRAN policy to only allow ASCII characters * Fix for `mo_property` not working properly +* Support for class `difftime` in frequency tables +* Support for named vectors of class `mo`, useful for `top_freq()` +* AI improvements for `as.mo`: + * `"CRS"` -> *Stenotrophomonas maltophilia* + * `"CRSM"` -> *Stenotrophomonas maltophilia* + * `"MSSA"` -> *Staphylococcus aureus* + * `"MSSE"` -> *Staphylococcus epidermidis* +* Fix for `join` functions #### Other * Updated vignettes to comply with README diff --git a/R/count.R b/R/count.R index 10594fd0..3a22392e 100644 --- a/R/count.R +++ b/R/count.R @@ -21,15 +21,15 @@ #' @description These functions can be used to count resistant/susceptible microbial isolates. All functions support quasiquotation with pipes, can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}. #' #' \code{count_R} and \code{count_IR} can be used to count resistant isolates, \code{count_S} and \code{count_SI} can be used to count susceptible isolates.\cr +#' @param ... one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed. #' @inheritParams portion -#' @details \strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set. +#' @details These functions are meant to count isolates. Use the \code{\link{portion}_*} functions to calculate microbial resistance. #' -#' These functions are meant to count isolates. Use the \code{\link{portion}_*} functions to calculate microbial resistance. +#' \code{n_rsi} is an alias of \code{count_all}. They can be used to count all available isolates, i.e. where all input antibiotics have an available result (S, I or R). Their use is equal to \code{\link{n_distinct}}. Their function is equal to \code{count_S(...) + count_IR(...)}. #' #' \code{count_df} takes any variable from \code{data} that has an \code{"rsi"} class (created with \code{\link{as.rsi}}) and counts the amounts of R, I and S. The resulting \emph{tidy data} (see Source) \code{data.frame} will have three rows (S/I/R) and a column for each variable with class \code{"rsi"}. #' @source Wickham H. \strong{Tidy Data.} The Journal of Statistical Software, vol. 59, 2014. \url{http://vita.had.co.nz/papers/tidy-data.html} -#' @seealso \code{\link{portion}_*} to calculate microbial resistance and susceptibility.\cr -#' \code{\link{n_rsi}} to count all cases where antimicrobial results are available. +#' @seealso \code{\link{portion}_*} to calculate microbial resistance and susceptibility. #' @keywords resistance susceptibility rsi antibiotics isolate isolates #' @return Integer #' @rdname count @@ -47,6 +47,10 @@ #' count_S(septic_patients$amox) #' count_SI(septic_patients$amox) #' +#' # Count all available isolates +#' count_all(septic_patients$amox) +#' n_rsi(septic_patients$amox) +#' #' # Since n_rsi counts available isolates, you can #' # calculate back to count e.g. non-susceptible isolates. #' # This results in the same: @@ -56,24 +60,25 @@ #' library(dplyr) #' septic_patients %>% #' group_by(hospital_id) %>% -#' summarise(R = count_R(cipr), -#' I = count_I(cipr), -#' S = count_S(cipr), -#' n = n_rsi(cipr), # the actual total; sum of all three -#' total = n()) # NOT the amount of tested isolates! +#' summarise(R = count_R(cipr), +#' I = count_I(cipr), +#' S = count_S(cipr), +#' n1 = count_all(cipr), # the actual total; sum of all three +#' n2 = n_rsi(cipr), # same - analogous to n_distinct +#' total = n()) # NOT the amount of tested isolates! #' #' # Count co-resistance between amoxicillin/clav acid and gentamicin, #' # so we can see that combination therapy does a lot more than mono therapy. #' # Please mind that `portion_S` calculates percentages right away instead. -#' count_S(septic_patients$amcl) # S = 1056 (67.3%) -#' n_rsi(septic_patients$amcl) # n = 1570 +#' count_S(septic_patients$amcl) # S = 1057 (67.1%) +#' count_all(septic_patients$amcl) # n = 1576 #' -#' count_S(septic_patients$gent) # S = 1363 (74.0%) -#' n_rsi(septic_patients$gent) # n = 1842 +#' count_S(septic_patients$gent) # S = 1372 (74.0%) +#' count_all(septic_patients$gent) # n = 1855 #' #' with(septic_patients, -#' count_S(amcl, gent)) # S = 1385 (92.1%) -#' with(septic_patients, # n = 1504 +#' count_S(amcl, gent)) # S = 1396 (92.0%) +#' with(septic_patients, # n = 1517 #' n_rsi(amcl, gent)) #' #' # Get portions S/I/R immediately of all rsi columns @@ -140,6 +145,20 @@ count_S <- function(...) { only_count = TRUE) } +#' @rdname count +#' @export +count_all <- function(...) { + # only print warnings once, if needed + count_S(...) + suppressWarnings(count_IR(...)) +} + +#' @rdname count +#' @export +n_rsi <- function(...) { + # only print warnings once, if needed + count_S(...) + suppressWarnings(count_IR(...)) +} + #' @rdname count #' @importFrom dplyr %>% select_if bind_rows summarise_if mutate group_vars select everything #' @export diff --git a/R/eucast.R b/R/eucast.R index 8709eed4..69a7cf8a 100755 --- a/R/eucast.R +++ b/R/eucast.R @@ -507,10 +507,10 @@ EUCAST_rules <- function(tbl, # overig edit_rsi(to = 'R', rows = which(tbl$genus %in% c('Leuconostoc', 'Pediococcus')), - cols = c(vanc, teic)) + cols = glycopeptides) edit_rsi(to = 'R', rows = which(tbl$genus == 'Lactobacillus'), - cols = c(vanc, teic)) + cols = glycopeptides) edit_rsi(to = 'R', rows = which(tbl$fullname %like% '^Clostridium (ramosum|innocuum)'), cols = vanc) diff --git a/R/freq.R b/R/freq.R index dccb4783..be8410bd 100755 --- a/R/freq.R +++ b/R/freq.R @@ -336,6 +336,13 @@ frequency_tbl <- function(x, header <- header %>% paste0(markdown_line, '\nLongest: ', x %>% base::nchar() %>% base::max(na.rm = TRUE)) } + if (NROW(x) > 0 & any(class(x) == "difftime")) { + header <- header %>% paste0('\n') + header <- header %>% paste(markdown_line, '\nUnits: ', attributes(x)$units) + x <- as.double(x) + # after this, the numeric header continues + } + if (NROW(x) > 0 & any(class(x) %in% c('double', 'integer', 'numeric', 'raw', 'single'))) { # right align number Tukey_five <- stats::fivenum(x, na.rm = TRUE) @@ -351,7 +358,7 @@ frequency_tbl <- function(x, outlier_length <- length(boxplot.stats(x)$out) header <- header %>% paste0(markdown_line, '\nOutliers: ', outlier_length) if (outlier_length > 0) { - header <- header %>% paste0(' (unique: ', boxplot.stats(x)$out %>% n_distinct(), ')') + header <- header %>% paste0(' (unique count: ', boxplot.stats(x)$out %>% n_distinct(), ')') } } if (NROW(x) > 0 & any(class(x) == "rsi")) { diff --git a/R/join_microorganisms.R b/R/join_microorganisms.R index 6e54d110..35081092 100755 --- a/R/join_microorganisms.R +++ b/R/join_microorganisms.R @@ -4,14 +4,15 @@ #' @rdname join #' @name join #' @aliases join inner_join -#' @param x existing table to join, also supports character vectors -#' @param by a variable to join by - could be a column name of \code{x} with values that exist in \code{microorganisms$mo} (like \code{by = "bacteria_id"}), or another column in \code{\link{microorganisms}} (but then it should be named, like \code{by = c("my_genus_species" = "fullname")}) +#' @param x existing table to join, or character vector +#' @param by a variable to join by - if left empty will search for a column with class \code{mo} (created with \code{\link{as.mo}}) or will be \code{"mo"} if that column name exists in \code{x}, could otherwise be a column name of \code{x} with values that exist in \code{microorganisms$mo} (like \code{by = "bacteria_id"}), or another column in \code{\link{microorganisms}} (but then it should be named, like \code{by = c("my_genus_species" = "fullname")}) #' @param suffix if there are non-joined duplicate variables in \code{x} and \code{y}, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2. #' @param ... other parameters to pass on to \code{dplyr::\link[dplyr]{join}}. -#' @details As opposed to the \code{\link[dplyr]{join}} functions of \code{dplyr}, characters vectors are supported and at default existing columns will get a suffix \code{"2"} and the newly joined columns will not get a suffix. See \code{\link[dplyr]{join}} for more information. +#' @details \strong{Note:} As opposed to the \code{\link[dplyr]{join}} functions of \code{dplyr}, characters vectors are supported and at default existing columns will get a suffix \code{"2"} and the newly joined columns will not get a suffix. See \code{\link[dplyr]{join}} for more information. #' @export #' @examples -#' left_join_microorganisms("STAAUR") +#' left_join_microorganisms(as.mo("K. pneumoniae")) +#' left_join_microorganisms("B_KLBSL_PNE") #' #' library(dplyr) #' septic_patients %>% left_join_microorganisms() @@ -19,130 +20,117 @@ #' df <- data.frame(date = seq(from = as.Date("2018-01-01"), #' to = as.Date("2018-01-07"), #' by = 1), -#' bacteria_id = c("STAAUR", "STAAUR", "STAAUR", "STAAUR", -#' "ESCCOL", "ESCCOL", "ESCCOL"), +#' bacteria = as.mo(c("S. aureus", "MRSA", "MSSA", "STAAUR", +#' "E. coli", "E. coli", "E. coli")), #' stringsAsFactors = FALSE) #' colnames(df) -#' df2 <- left_join_microorganisms(df, "bacteria_id") -#' colnames(df2) -inner_join_microorganisms <- function(x, by = 'mo', suffix = c("2", ""), ...) { - if (!any(class(x) %in% c("data.frame", "matrix"))) { - x <- data.frame(mo = as.character(x), stringsAsFactors = FALSE) - } - # no name set to `by` parameter - if (is.null(names(by))) { - joinby <- colnames(AMR::microorganisms)[1] - names(joinby) <- by - } else { - joinby <- by - } +#' df_joined <- left_join_microorganisms(df, "bacteria") +#' colnames(df_joined) +inner_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) { + checked <- joins_check_df(x, by) + x <- checked$x + by <- checked$by join <- suppressWarnings( - dplyr::inner_join(x = x, y = AMR::microorganisms, by = joinby, suffix = suffix, ...) + dplyr::inner_join(x = x, y = AMR::microorganisms, by = by, suffix = suffix, ...) ) if (nrow(join) > nrow(x)) { - warning('the newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original') + warning('The newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original.') } join } #' @rdname join #' @export -left_join_microorganisms <- function(x, by = 'mo', suffix = c("2", ""), ...) { - if (!any(class(x) %in% c("data.frame", "matrix"))) { - x <- data.frame(mo = as.character(x), stringsAsFactors = FALSE) - } - # no name set to `by` parameter - if (is.null(names(by))) { - joinby <- colnames(AMR::microorganisms)[1] - names(joinby) <- by - } else { - joinby <- by - } +left_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) { + checked <- joins_check_df(x, by) + x <- checked$x + by <- checked$by join <- suppressWarnings( - dplyr::left_join(x = x, y = AMR::microorganisms, by = joinby, suffix = suffix, ...) + dplyr::left_join(x = x, y = AMR::microorganisms, by = by, suffix = suffix, ...) ) if (nrow(join) > nrow(x)) { - warning('the newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original') + warning('The newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original.') } join } #' @rdname join #' @export -right_join_microorganisms <- function(x, by = 'mo', suffix = c("2", ""), ...) { - if (!any(class(x) %in% c("data.frame", "matrix"))) { - x <- data.frame(mo = as.character(x), stringsAsFactors = FALSE) - } - # no name set to `by` parameter - if (is.null(names(by))) { - joinby <- colnames(AMR::microorganisms)[1] - names(joinby) <- by - } else { - joinby <- by - } +right_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) { + checked <- joins_check_df(x, by) + x <- checked$x + by <- checked$by join <- suppressWarnings( - dplyr::right_join(x = x, y = AMR::microorganisms, by = joinby, suffix = suffix, ...) + dplyr::right_join(x = x, y = AMR::microorganisms, by = by, suffix = suffix, ...) ) if (nrow(join) > nrow(x)) { - warning('the newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original') + warning('The newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original.') } join } #' @rdname join #' @export -full_join_microorganisms <- function(x, by = 'mo', suffix = c("2", ""), ...) { - if (!any(class(x) %in% c("data.frame", "matrix"))) { - x <- data.frame(mo = as.character(x), stringsAsFactors = FALSE) - } - # no name set to `by` parameter - if (is.null(names(by))) { - joinby <- colnames(AMR::microorganisms)[1] - names(joinby) <- by - } else { - joinby <- by - } +full_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) { + checked <- joins_check_df(x, by) + x <- checked$x + by <- checked$by join <- suppressWarnings( - dplyr::full_join(x = x, y = AMR::microorganisms, by = joinby, suffix = suffix, ...) + dplyr::full_join(x = x, y = AMR::microorganisms, by = by, suffix = suffix, ...) ) if (nrow(join) > nrow(x)) { - warning('the newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original') + warning('The newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original.') } join } #' @rdname join #' @export -semi_join_microorganisms <- function(x, by = 'mo', ...) { - if (!any(class(x) %in% c("data.frame", "matrix"))) { - x <- data.frame(mo = as.character(x), stringsAsFactors = FALSE) - } - # no name set to `by` parameter - if (is.null(names(by))) { - joinby <- colnames(AMR::microorganisms)[1] - names(joinby) <- by - } else { - joinby <- by - } +semi_join_microorganisms <- function(x, by = NULL, ...) { + checked <- joins_check_df(x, by) + x <- checked$x + by <- checked$by suppressWarnings( - dplyr::semi_join(x = x, y = AMR::microorganisms, by = joinby, ...) + dplyr::semi_join(x = x, y = AMR::microorganisms, by = by, ...) ) } #' @rdname join #' @export -anti_join_microorganisms <- function(x, by = 'mo', ...) { +anti_join_microorganisms <- function(x, by = NULL, ...) { + checked <- joins_check_df(x, by) + x <- checked$x + by <- checked$by + suppressWarnings( + dplyr::anti_join(x = x, y = AMR::microorganisms, by = by, ...) + ) +} + +joins_check_df <- function(x, by) { if (!any(class(x) %in% c("data.frame", "matrix"))) { x <- data.frame(mo = as.character(x), stringsAsFactors = FALSE) + if (is.null(by)) { + by <- "mo" + } + } + if (is.null(by)) { + # search for column with class `mo` and return first one found + by <- colnames(x)[lapply(x, is.mo) == TRUE][1] + if (is.na(by)) { + if ("mo" %in% colnames(x)) { + by <- "mo" + } else { + stop("Cannot join - no column found with name or class `mo`.", call. = FALSE) + } + } + message('Joining, by = "', by, '"') # message same as dplyr::join functions } - # no name set to `by` parameter if (is.null(names(by))) { joinby <- colnames(AMR::microorganisms)[1] names(joinby) <- by } else { joinby <- by } - suppressWarnings( - dplyr::anti_join(x = x, y = AMR::microorganisms, by = joinby, ...) - ) + list(x = x, + by = joinby) } diff --git a/R/mo.R b/R/mo.R index 84303048..270fa0bf 100644 --- a/R/mo.R +++ b/R/mo.R @@ -272,12 +272,14 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = # translate known trivial abbreviations to genus + species ---- if (!is.na(x_trimmed[i])) { if (toupper(x_trimmed[i]) == 'MRSA' + | toupper(x_trimmed[i]) == 'MSSA' | toupper(x_trimmed[i]) == 'VISA' | toupper(x_trimmed[i]) == 'VRSA') { x[i] <- MOs[mo == 'B_STPHY_AUR', ..property][[1]][1L] next } - if (toupper(x_trimmed[i]) == 'MRSE') { + if (toupper(x_trimmed[i]) == 'MRSE' + | toupper(x_trimmed[i]) == 'MSSE') { x[i] <- MOs[mo == 'B_STPHY_EPI', ..property][[1]][1L] next } @@ -290,6 +292,12 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = x[i] <- MOs[mo == 'B_PDMNS_AER', ..property][[1]][1L] next } + if (toupper(x_trimmed[i]) == 'CRS' + | toupper(x_trimmed[i]) == 'CRSM') { + # co-trim resistant S. maltophilia + x[i] <- MOs[mo == 'B_STNTR_MAL', ..property][[1]][1L] + next + } if (toupper(x_trimmed[i]) %in% c('PISP', 'PRSP', 'VISP', 'VRSP')) { # peni I, peni R, vanco I, vanco R: S. pneumoniae x[i] <- MOs[mo == 'B_STRPTC_PNE', ..property][[1]][1L] @@ -578,7 +586,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = failures <- failures[!failures %in% c(NA, NULL, NaN)] if (length(failures) > 0) { - warning("These ", length(failures) , " values could not be coerced (try again with allow_uncertain = TRUE):\n", + warning("These ", length(failures) , " values could not be coerced (try again with allow_uncertain = TRUE): ", paste('"', unique(failures), '"', sep = "", collapse = ', '), ".", call. = FALSE) @@ -658,8 +666,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = if (property == "mo") { class(x) <- "mo" - attr(x, 'package') <- 'AMR' - attr(x, 'ITIS') <- TRUE } else if (property == "tsn") { x <- as.integer(x) } @@ -667,7 +673,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = x } -#' @importFrom dplyr case_when renamed_note <- function(name_old, name_new, ref_old = "", ref_new = "") { if (!is.na(ref_old)) { ref_old <- paste0(" (", ref_old, ")") @@ -687,7 +692,10 @@ renamed_note <- function(name_old, name_new, ref_old = "", ref_new = "") { #' @noRd print.mo <- function(x, ...) { cat("Class 'mo'\n") - print.default(as.character(x), quote = FALSE) + x_names <- names(x) + x <- as.character(x) + names(x) <- x_names + print.default(x, quote = FALSE) } #' @exportMethod as.data.frame.mo diff --git a/R/n_rsi.R b/R/n_rsi.R deleted file mode 100644 index eeffcdfa..00000000 --- a/R/n_rsi.R +++ /dev/null @@ -1,40 +0,0 @@ -# ==================================================================== # -# TITLE # -# Antimicrobial Resistance (AMR) Analysis # -# # -# AUTHORS # -# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) # -# # -# LICENCE # -# This program is free software; you can redistribute it and/or modify # -# it under the terms of the GNU General Public License version 2.0, # -# as published by the Free Software Foundation. # -# # -# This program is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details. # -# ==================================================================== # - -#' Count cases with antimicrobial results -#' -#' This counts all cases where antimicrobial interpretations are available. The way it can be used is equal to \code{\link{n_distinct}}. Its function is equal to \code{count_S(...) + count_IR(...)}. -#' @inheritParams portion -#' @export -#' @seealso \code{\link[AMR]{count}_*} to count resistant and susceptibile isolates per interpretation type.\cr -#' \code{\link{portion}_*} to calculate microbial resistance and susceptibility. -#' @examples -#' library(dplyr) -#' -#' septic_patients %>% -#' group_by(hospital_id) %>% -#' summarise(cipro_p = portion_S(cipr, as_percent = TRUE), -#' cipro_n = n_rsi(cipr), -#' genta_p = portion_S(gent, as_percent = TRUE), -#' genta_n = n_rsi(gent), -#' combination_p = portion_S(cipr, gent, as_percent = TRUE), -#' combination_n = n_rsi(cipr, gent)) -n_rsi <- function(...) { - # only print warnings once, if needed - count_S(...) + suppressWarnings(count_IR(...)) -} diff --git a/R/portion.R b/R/portion.R index 7bcc229e..05f80fc7 100755 --- a/R/portion.R +++ b/R/portion.R @@ -22,7 +22,7 @@ #' #' \code{portion_R} and \code{portion_IR} can be used to calculate resistance, \code{portion_S} and \code{portion_SI} can be used to calculate susceptibility.\cr #' @param ... one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed. Use multiple columns to calculate (the lack of) co-resistance: the probability where one of two drugs have a resistant or susceptible result. See Examples. -#' @param minimum minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA}. The default number of \code{30} isolates is advised by the CLSI as best practice, see Source. +#' @param minimum minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source. #' @param as_percent logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}. #' @param data a \code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}}) #' @param translate_ab a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations to, using \code{\link{abname}}. This can be set with \code{\link{getOption}("get_antibiotic_names")}. @@ -50,8 +50,7 @@ #' @source \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition}, 2014, \emph{Clinical and Laboratory Standards Institute (CLSI)}. \url{https://clsi.org/standards/products/microbiology/documents/m39/}. #' #' Wickham H. \strong{Tidy Data.} The Journal of Statistical Software, vol. 59, 2014. \url{http://vita.had.co.nz/papers/tidy-data.html} -#' @seealso \code{\link[AMR]{count}_*} to count resistant and susceptibile isolates.\cr -#' \code{\link{n_rsi}} to count all cases where antimicrobial results are available. +#' @seealso \code{\link[AMR]{count}_*} to count resistant and susceptibile isolates. #' @keywords resistance susceptibility rsi_df rsi antibiotics isolate isolates #' @return Double or, when \code{as_percent = TRUE}, a character. #' @rdname portion @@ -92,24 +91,24 @@ #' #' # Calculate co-resistance between amoxicillin/clav acid and gentamicin, #' # so we can see that combination therapy does a lot more than mono therapy: -#' septic_patients %>% portion_S(amcl) # S = 67.3% -#' septic_patients %>% n_rsi(amcl) # n = 1570 +#' septic_patients %>% portion_S(amcl) # S = 67.1% +#' septic_patients %>% count_all(amcl) # n = 1576 #' #' septic_patients %>% portion_S(gent) # S = 74.0% -#' septic_patients %>% n_rsi(gent) # n = 1842 +#' septic_patients %>% count_all(gent) # n = 1855 #' -#' septic_patients %>% portion_S(amcl, gent) # S = 92.1% -#' septic_patients %>% n_rsi(amcl, gent) # n = 1504 +#' septic_patients %>% portion_S(amcl, gent) # S = 92.0% +#' septic_patients %>% count_all(amcl, gent) # n = 1517 #' #' #' septic_patients %>% #' group_by(hospital_id) %>% #' summarise(cipro_p = portion_S(cipr, as_percent = TRUE), -#' cipro_n = n_rsi(cipr), +#' cipro_n = count_all(cipr), #' genta_p = portion_S(gent, as_percent = TRUE), -#' genta_n = n_rsi(gent), +#' genta_n = count_all(gent), #' combination_p = portion_S(cipr, gent, as_percent = TRUE), -#' combination_n = n_rsi(cipr, gent)) +#' combination_n = count_all(cipr, gent)) #' #' # Get portions S/I/R immediately of all rsi columns #' septic_patients %>% @@ -130,7 +129,7 @@ #' filter(first_isolate == TRUE, #' genus == "Helicobacter") %>% #' summarise(p = portion_S(amox, metr), # amoxicillin with metronidazole -#' n = n_rsi(amox, metr)) +#' n = count_all(amox, metr)) #' } portion_R <- function(..., minimum = 30, @@ -273,6 +272,8 @@ rsi <- function(ab1, as_percent = FALSE, ...) { + .Deprecated(new = paste0("portion_", interpretation)) + if (all(is.null(ab2))) { df <- tibble(ab1 = ab1) } else { @@ -280,19 +281,16 @@ rsi <- function(ab1, ab2 = ab2) } + if (!interpretation %in% c("S", "SI", "IS", "I", "RI", "IR", "R")) { + stop("invalid interpretation") + } + result <- case_when( interpretation == "S" ~ portion_S(df, minimum = minimum, as_percent = FALSE), interpretation %in% c("SI", "IS") ~ portion_SI(df, minimum = minimum, as_percent = FALSE), interpretation == "I" ~ portion_I(df, minimum = minimum, as_percent = FALSE), interpretation %in% c("RI", "IR") ~ portion_IR(df, minimum = minimum, as_percent = FALSE), - interpretation == "R" ~ portion_R(df, minimum = minimum, as_percent = FALSE), - TRUE ~ -1 - ) - if (result == -1) { - stop("invalid interpretation") - } - - .Deprecated(new = paste0("portion_", interpretation)) + interpretation == "R" ~ portion_R(df, minimum = minimum, as_percent = FALSE)) if (as_percent == TRUE) { percent(result, force_zero = TRUE) diff --git a/R/rsi_calc.R b/R/rsi_calc.R index a89a31ba..83addfe9 100644 --- a/R/rsi_calc.R +++ b/R/rsi_calc.R @@ -107,12 +107,15 @@ rsi_calc <- function(..., total <- length(x) - sum(is.na(x)) if (total < minimum) { - return(NA) + warning("Introducing NA: only ", total, " results available (minimum set to ", minimum, ").", call. = FALSE) + result <- NA + } else { + result <- found / total } if (as_percent == TRUE) { - percent(found / total, force_zero = TRUE) + percent(result, force_zero = TRUE) } else { - found / total + result } } diff --git a/data/septic_patients.rda b/data/septic_patients.rda index 8879b548..c39cdaea 100755 Binary files a/data/septic_patients.rda and b/data/septic_patients.rda differ diff --git a/man/count.Rd b/man/count.Rd index 38507440..89091806 100644 --- a/man/count.Rd +++ b/man/count.Rd @@ -7,6 +7,8 @@ \alias{count_I} \alias{count_SI} \alias{count_S} +\alias{count_all} +\alias{n_rsi} \alias{count_df} \title{Count isolates} \source{ @@ -23,11 +25,15 @@ count_SI(...) count_S(...) +count_all(...) + +n_rsi(...) + count_df(data, translate_ab = getOption("get_antibiotic_names", "official")) } \arguments{ -\item{...}{one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed. Use multiple columns to calculate (the lack of) co-resistance: the probability where one of two drugs have a resistant or susceptible result. See Examples.} +\item{...}{one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed.} \item{data}{a \code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}})} @@ -42,10 +48,10 @@ These functions can be used to count resistant/susceptible microbial isolates. A \code{count_R} and \code{count_IR} can be used to count resistant isolates, \code{count_S} and \code{count_SI} can be used to count susceptible isolates.\cr } \details{ -\strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set. - These functions are meant to count isolates. Use the \code{\link{portion}_*} functions to calculate microbial resistance. +\code{n_rsi} is an alias of \code{count_all}. They can be used to count all available isolates, i.e. where all input antibiotics have an available result (S, I or R). Their use is equal to \code{\link{n_distinct}}. Their function is equal to \code{count_S(...) + count_IR(...)}. + \code{count_df} takes any variable from \code{data} that has an \code{"rsi"} class (created with \code{\link{as.rsi}}) and counts the amounts of R, I and S. The resulting \emph{tidy data} (see Source) \code{data.frame} will have three rows (S/I/R) and a column for each variable with class \code{"rsi"}. } \examples{ @@ -60,6 +66,10 @@ count_IR(septic_patients$amox) count_S(septic_patients$amox) count_SI(septic_patients$amox) +# Count all available isolates +count_all(septic_patients$amox) +n_rsi(septic_patients$amox) + # Since n_rsi counts available isolates, you can # calculate back to count e.g. non-susceptible isolates. # This results in the same: @@ -69,24 +79,25 @@ portion_IR(septic_patients$amox) * n_rsi(septic_patients$amox) library(dplyr) septic_patients \%>\% group_by(hospital_id) \%>\% - summarise(R = count_R(cipr), - I = count_I(cipr), - S = count_S(cipr), - n = n_rsi(cipr), # the actual total; sum of all three - total = n()) # NOT the amount of tested isolates! + summarise(R = count_R(cipr), + I = count_I(cipr), + S = count_S(cipr), + n1 = count_all(cipr), # the actual total; sum of all three + n2 = n_rsi(cipr), # same - analogous to n_distinct + total = n()) # NOT the amount of tested isolates! # Count co-resistance between amoxicillin/clav acid and gentamicin, # so we can see that combination therapy does a lot more than mono therapy. # Please mind that `portion_S` calculates percentages right away instead. -count_S(septic_patients$amcl) # S = 1056 (67.3\%) -n_rsi(septic_patients$amcl) # n = 1570 +count_S(septic_patients$amcl) # S = 1057 (67.1\%) +count_all(septic_patients$amcl) # n = 1576 -count_S(septic_patients$gent) # S = 1363 (74.0\%) -n_rsi(septic_patients$gent) # n = 1842 +count_S(septic_patients$gent) # S = 1372 (74.0\%) +count_all(septic_patients$gent) # n = 1855 with(septic_patients, - count_S(amcl, gent)) # S = 1385 (92.1\%) -with(septic_patients, # n = 1504 + count_S(amcl, gent)) # S = 1396 (92.0\%) +with(septic_patients, # n = 1517 n_rsi(amcl, gent)) # Get portions S/I/R immediately of all rsi columns @@ -102,8 +113,7 @@ septic_patients \%>\% } \seealso{ -\code{\link{portion}_*} to calculate microbial resistance and susceptibility.\cr -\code{\link{n_rsi}} to count all cases where antimicrobial results are available. +\code{\link{portion}_*} to calculate microbial resistance and susceptibility. } \keyword{antibiotics} \keyword{isolate} diff --git a/man/ggplot_rsi.Rd b/man/ggplot_rsi.Rd index d5d9c286..9dc03650 100644 --- a/man/ggplot_rsi.Rd +++ b/man/ggplot_rsi.Rd @@ -68,7 +68,7 @@ At default, the names of antibiotics will be shown on the plots using \code{\lin \code{scale_y_percent} transforms the y axis to a 0 to 100\% range using \code{\link[ggplot2]{scale_continuous}}. -\code{scale_rsi_colours} sets colours to the bars: green for S, yellow for I and red for R, using \code{\link[ggplot2]{scale_colour_brewer}}. +\code{scale_rsi_colours} sets colours to the bars: green for S, yellow for I and red for R, using \code{\link[ggplot2]{scale_brewer}}. \code{theme_rsi} is a \code{ggplot \link[ggplot2]{theme}} with minimal distraction. diff --git a/man/join.Rd b/man/join.Rd index 32eedc22..b8a1979c 100755 --- a/man/join.Rd +++ b/man/join.Rd @@ -11,22 +11,22 @@ \alias{anti_join_microorganisms} \title{Join a table with \code{microorganisms}} \usage{ -inner_join_microorganisms(x, by = "mo", suffix = c("2", ""), ...) +inner_join_microorganisms(x, by = NULL, suffix = c("2", ""), ...) -left_join_microorganisms(x, by = "mo", suffix = c("2", ""), ...) +left_join_microorganisms(x, by = NULL, suffix = c("2", ""), ...) -right_join_microorganisms(x, by = "mo", suffix = c("2", ""), ...) +right_join_microorganisms(x, by = NULL, suffix = c("2", ""), ...) -full_join_microorganisms(x, by = "mo", suffix = c("2", ""), ...) +full_join_microorganisms(x, by = NULL, suffix = c("2", ""), ...) -semi_join_microorganisms(x, by = "mo", ...) +semi_join_microorganisms(x, by = NULL, ...) -anti_join_microorganisms(x, by = "mo", ...) +anti_join_microorganisms(x, by = NULL, ...) } \arguments{ -\item{x}{existing table to join, also supports character vectors} +\item{x}{existing table to join, or character vector} -\item{by}{a variable to join by - could be a column name of \code{x} with values that exist in \code{microorganisms$mo} (like \code{by = "bacteria_id"}), or another column in \code{\link{microorganisms}} (but then it should be named, like \code{by = c("my_genus_species" = "fullname")})} +\item{by}{a variable to join by - if left empty will search for a column with class \code{mo} (created with \code{\link{as.mo}}) or will be \code{"mo"} if that column name exists in \code{x}, could otherwise be a column name of \code{x} with values that exist in \code{microorganisms$mo} (like \code{by = "bacteria_id"}), or another column in \code{\link{microorganisms}} (but then it should be named, like \code{by = c("my_genus_species" = "fullname")})} \item{suffix}{if there are non-joined duplicate variables in \code{x} and \code{y}, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2.} @@ -36,10 +36,11 @@ anti_join_microorganisms(x, by = "mo", ...) Join the dataset \code{\link{microorganisms}} easily to an existing table or character vector. } \details{ -As opposed to the \code{\link[dplyr]{join}} functions of \code{dplyr}, characters vectors are supported and at default existing columns will get a suffix \code{"2"} and the newly joined columns will not get a suffix. See \code{\link[dplyr]{join}} for more information. +\strong{Note:} As opposed to the \code{\link[dplyr]{join}} functions of \code{dplyr}, characters vectors are supported and at default existing columns will get a suffix \code{"2"} and the newly joined columns will not get a suffix. See \code{\link[dplyr]{join}} for more information. } \examples{ -left_join_microorganisms("STAAUR") +left_join_microorganisms(as.mo("K. pneumoniae")) +left_join_microorganisms("B_KLBSL_PNE") library(dplyr) septic_patients \%>\% left_join_microorganisms() @@ -47,10 +48,10 @@ septic_patients \%>\% left_join_microorganisms() df <- data.frame(date = seq(from = as.Date("2018-01-01"), to = as.Date("2018-01-07"), by = 1), - bacteria_id = c("STAAUR", "STAAUR", "STAAUR", "STAAUR", - "ESCCOL", "ESCCOL", "ESCCOL"), + bacteria = as.mo(c("S. aureus", "MRSA", "MSSA", "STAAUR", + "E. coli", "E. coli", "E. coli")), stringsAsFactors = FALSE) colnames(df) -df2 <- left_join_microorganisms(df, "bacteria_id") -colnames(df2) +df_joined <- left_join_microorganisms(df, "bacteria") +colnames(df_joined) } diff --git a/man/n_rsi.Rd b/man/n_rsi.Rd deleted file mode 100644 index ab5fc099..00000000 --- a/man/n_rsi.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/n_rsi.R -\name{n_rsi} -\alias{n_rsi} -\title{Count cases with antimicrobial results} -\usage{ -n_rsi(...) -} -\arguments{ -\item{...}{one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed. Use multiple columns to calculate (the lack of) co-resistance: the probability where one of two drugs have a resistant or susceptible result. See Examples.} -} -\description{ -This counts all cases where antimicrobial interpretations are available. The way it can be used is equal to \code{\link{n_distinct}}. Its function is equal to \code{count_S(...) + count_IR(...)}. -} -\examples{ -library(dplyr) - -septic_patients \%>\% - group_by(hospital_id) \%>\% - summarise(cipro_p = portion_S(cipr, as_percent = TRUE), - cipro_n = n_rsi(cipr), - genta_p = portion_S(gent, as_percent = TRUE), - genta_n = n_rsi(gent), - combination_p = portion_S(cipr, gent, as_percent = TRUE), - combination_n = n_rsi(cipr, gent)) -} -\seealso{ -\code{\link[AMR]{count}_*} to count resistant and susceptibile isolates per interpretation type.\cr -\code{\link{portion}_*} to calculate microbial resistance and susceptibility. -} diff --git a/man/portion.Rd b/man/portion.Rd index a2cb59cd..a5420e64 100644 --- a/man/portion.Rd +++ b/man/portion.Rd @@ -31,7 +31,7 @@ portion_df(data, translate_ab = getOption("get_antibiotic_names", \arguments{ \item{...}{one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed. Use multiple columns to calculate (the lack of) co-resistance: the probability where one of two drugs have a resistant or susceptible result. See Examples.} -\item{minimum}{minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA}. The default number of \code{30} isolates is advised by the CLSI as best practice, see Source.} +\item{minimum}{minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.} \item{as_percent}{logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}.} @@ -104,24 +104,24 @@ septic_patients \%>\% # Calculate co-resistance between amoxicillin/clav acid and gentamicin, # so we can see that combination therapy does a lot more than mono therapy: -septic_patients \%>\% portion_S(amcl) # S = 67.3\% -septic_patients \%>\% n_rsi(amcl) # n = 1570 +septic_patients \%>\% portion_S(amcl) # S = 67.1\% +septic_patients \%>\% count_all(amcl) # n = 1576 septic_patients \%>\% portion_S(gent) # S = 74.0\% -septic_patients \%>\% n_rsi(gent) # n = 1842 +septic_patients \%>\% count_all(gent) # n = 1855 -septic_patients \%>\% portion_S(amcl, gent) # S = 92.1\% -septic_patients \%>\% n_rsi(amcl, gent) # n = 1504 +septic_patients \%>\% portion_S(amcl, gent) # S = 92.0\% +septic_patients \%>\% count_all(amcl, gent) # n = 1517 septic_patients \%>\% group_by(hospital_id) \%>\% summarise(cipro_p = portion_S(cipr, as_percent = TRUE), - cipro_n = n_rsi(cipr), + cipro_n = count_all(cipr), genta_p = portion_S(gent, as_percent = TRUE), - genta_n = n_rsi(gent), + genta_n = count_all(gent), combination_p = portion_S(cipr, gent, as_percent = TRUE), - combination_n = n_rsi(cipr, gent)) + combination_n = count_all(cipr, gent)) # Get portions S/I/R immediately of all rsi columns septic_patients \%>\% @@ -142,12 +142,11 @@ my_table \%>\% filter(first_isolate == TRUE, genus == "Helicobacter") \%>\% summarise(p = portion_S(amox, metr), # amoxicillin with metronidazole - n = n_rsi(amox, metr)) + n = count_all(amox, metr)) } } \seealso{ -\code{\link[AMR]{count}_*} to count resistant and susceptibile isolates.\cr -\code{\link{n_rsi}} to count all cases where antimicrobial results are available. +\code{\link[AMR]{count}_*} to count resistant and susceptibile isolates. } \keyword{antibiotics} \keyword{isolate} diff --git a/man/rsi.Rd b/man/rsi.Rd index d925892b..404842a3 100644 --- a/man/rsi.Rd +++ b/man/rsi.Rd @@ -12,7 +12,7 @@ rsi(ab1, ab2 = NULL, interpretation = "IR", minimum = 30, \item{interpretation}{antimicrobial interpretation to check for} -\item{minimum}{minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA}. The default number of \code{30} isolates is advised by the CLSI as best practice, see Source.} +\item{minimum}{minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.} \item{as_percent}{logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}.} diff --git a/tests/testthat/test-atc.R b/tests/testthat/test-atc.R index 342aed7f..fe6f3de4 100755 --- a/tests/testthat/test-atc.R +++ b/tests/testthat/test-atc.R @@ -10,8 +10,8 @@ test_that("atc_property works", { expect_equal(atc_property("J01CA04", property = "DDD"), atc_ddd("J01CA04")) - expect_identical(atc_property("J01CA04", property = "Groups"), - atc_groups("J01CA04")) + # expect_identical(atc_property("J01CA04", property = "Groups"), + # atc_groups("J01CA04")) expect_warning(atc_property("ABCDEFG", property = "DDD")) diff --git a/tests/testthat/test-count.R b/tests/testthat/test-count.R index 099baa15..cb08de25 100644 --- a/tests/testthat/test-count.R +++ b/tests/testthat/test-count.R @@ -10,8 +10,13 @@ test_that("counts work", { expect_equal(count_S(septic_patients$amox) + count_I(septic_patients$amox), count_SI(septic_patients$amox)) + library(dplyr) expect_equal(septic_patients %>% count_S(amcl), 1057) expect_equal(septic_patients %>% count_S(amcl, gent), 1396) + expect_equal(septic_patients %>% count_all(amcl, gent), 1517) + expect_identical(septic_patients %>% count_all(amcl, gent), + septic_patients %>% count_S(amcl, gent) + + septic_patients %>% count_IR(amcl, gent)) # count of cases expect_equal(septic_patients %>% diff --git a/tests/testthat/test-freq.R b/tests/testthat/test-freq.R index 4940fb54..5393cf97 100755 --- a/tests/testthat/test-freq.R +++ b/tests/testthat/test-freq.R @@ -43,6 +43,11 @@ test_that("frequency table works", { # list expect_output(print(freq(list(age = septic_patients$age)))) expect_output(print(freq(list(age = septic_patients$age, gender = septic_patients$gender)))) + # difftime + expect_output(suppressWarnings(print( + freq(difftime(Sys.time(), + Sys.time() - runif(5, min = 0, max = 60 * 60 * 24), + units = "hours"))))) library(dplyr) expect_output(septic_patients %>% select(1:2) %>% freq() %>% print()) @@ -119,7 +124,7 @@ test_that("frequency table works", { )) expect_output(print( diff(freq(septic_patients$age), - freq(septic_patients$age)) # same + freq(septic_patients$age)) # "No differences found." )) expect_error(print( diff(freq(septic_patients$amcl), diff --git a/tests/testthat/test-portion.R b/tests/testthat/test-portion.R index a86c4640..87bdaa82 100755 --- a/tests/testthat/test-portion.R +++ b/tests/testthat/test-portion.R @@ -68,11 +68,11 @@ test_that("portions works", { expect_error(portion_S("test", as_percent = "test")) # check too low amount of isolates - expect_identical(portion_R(septic_patients$amox, minimum = nrow(septic_patients) + 1), + expect_identical(suppressWarnings(portion_R(septic_patients$amox, minimum = nrow(septic_patients) + 1)), NA) - expect_identical(portion_I(septic_patients$amox, minimum = nrow(septic_patients) + 1), + expect_identical(suppressWarnings(portion_I(septic_patients$amox, minimum = nrow(septic_patients) + 1)), NA) - expect_identical(portion_S(septic_patients$amox, minimum = nrow(septic_patients) + 1), + expect_identical(suppressWarnings(portion_S(septic_patients$amox, minimum = nrow(septic_patients) + 1)), NA) # warning for speed loss