diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index afecc0cc9..6a9f0c00b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -54,7 +54,9 @@ before_script: - echo 'LANG="en_US.utf8"' >> .Renviron - echo 'LANGUAGE="en_US.utf8"' > ~/.Renviron -R-release-test-only: +# ---- TEST + +R-release: stage: test when: always allow_failure: false @@ -68,6 +70,23 @@ R-release-test-only: paths: - installed_deps/ +R-devel: + stage: test + when: always + image: rocker/r-devel + allow_failure: false + script: + - Rscriptdevel -e 'sessionInfo()' + # install missing and outdated packages + - Rscriptdevel -e 'source(".gitlab-ci.R"); gl_update_pkg_all(repos = "https://cran.rstudio.com", quiet = TRUE, install_pkgdown = TRUE, install_lintr = TRUE)' + - Rscriptdevel -e 'devtools::test(stop_on_failure = FALSE)' + cache: + key: devel + paths: + - installed_deps/ + +# ---- CHECK + R-release: stage: check when: on_success @@ -120,6 +139,8 @@ R-devel: key: devel paths: - installed_deps/ + +# ---- OTHER lintr: stage: lint diff --git a/DESCRIPTION b/DESCRIPTION index a6f94aa75..f17d41563 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 1.1.0.9003 -Date: 2020-05-01 +Version: 1.1.0.9004 +Date: 2020-05-16 Title: Antimicrobial Resistance Analysis Authors@R: c( person(role = c("aut", "cre"), @@ -36,30 +36,24 @@ Description: Functions to simplify the analysis and prediction of Antimicrobial Standards Institute (2014) . Depends: R (>= 3.1.0) +Enhances: + ggplot2 Imports: backports, cleaner, - crayon (>= 1.3.0), - data.table (>= 1.9.0), - dplyr (>= 0.7.0), - ggplot2, - knitr (>= 1.0.0), - microbenchmark, pillar, - R6, - rlang (>= 0.3.1), tidyr (>= 1.0.0), - vctrs (>= 0.2.4) + vctrs Suggests: - covr (>= 3.0.1), - curl, - readxl, + covr, + dplyr, + knitr, + microbenchmark, rmarkdown, - rstudioapi, - rvest (>= 0.3.2), - testthat (>= 1.0.2), - xml2 (>= 1.0.0) -VignetteBuilder: knitr + rvest, + testthat, + utils +VignetteBuilder: knitr,rmarkdown URL: https://msberends.gitlab.io/AMR, https://gitlab.com/msberends/AMR BugReports: https://gitlab.com/msberends/AMR/issues License: GPL-2 | file LICENSE diff --git a/NAMESPACE b/NAMESPACE index 7d92f0857..b6511af91 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -66,11 +66,11 @@ S3method(skewness,matrix) S3method(summary,mic) S3method(summary,mo) S3method(summary,rsi) -S3method(vec_cast,character.ab) -S3method(vec_cast,character.mo) S3method(vec_cast.ab,ab) S3method(vec_cast.ab,character) S3method(vec_cast.ab,default) +S3method(vec_cast.character,ab) +S3method(vec_cast.character,mo) S3method(vec_cast.mo,character) S3method(vec_cast.mo,default) S3method(vec_cast.mo,mo) @@ -201,7 +201,6 @@ export(mo_url) export(mo_year) export(mrgn) export(n_rsi) -export(p.symbol) export(p_symbol) export(pca) export(portion_I) @@ -297,6 +296,8 @@ exportMethods(summary.mo) exportMethods(summary.rsi) exportMethods(vec_cast.character.ab) exportMethods(vec_cast.character.mo) +exportMethods(vec_ptype2.character.ab) +exportMethods(vec_ptype2.character.mo) exportMethods(vec_ptype_abbr.ab) exportMethods(vec_ptype_abbr.disk) exportMethods(vec_ptype_abbr.mic) @@ -307,69 +308,8 @@ exportMethods(vec_ptype_full.disk) exportMethods(vec_ptype_full.mic) exportMethods(vec_ptype_full.mo) exportMethods(vec_ptype_full.rsi) -importFrom(R6,R6Class) importFrom(cleaner,freq) importFrom(cleaner,freq.default) -importFrom(cleaner,percentage) -importFrom(cleaner,top_freq) -importFrom(crayon,bgGreen) -importFrom(crayon,bgRed) -importFrom(crayon,bgYellow) -importFrom(crayon,black) -importFrom(crayon,blue) -importFrom(crayon,bold) -importFrom(crayon,green) -importFrom(crayon,italic) -importFrom(crayon,magenta) -importFrom(crayon,make_style) -importFrom(crayon,red) -importFrom(crayon,silver) -importFrom(crayon,strip_style) -importFrom(crayon,underline) -importFrom(crayon,white) -importFrom(crayon,yellow) -importFrom(data.table,as.data.table) -importFrom(data.table,data.table) -importFrom(data.table,setkey) -importFrom(dplyr,"%>%") -importFrom(dplyr,all_vars) -importFrom(dplyr,any_vars) -importFrom(dplyr,arrange) -importFrom(dplyr,arrange_at) -importFrom(dplyr,between) -importFrom(dplyr,bind_rows) -importFrom(dplyr,case_when) -importFrom(dplyr,desc) -importFrom(dplyr,distinct) -importFrom(dplyr,everything) -importFrom(dplyr,filter) -importFrom(dplyr,filter_all) -importFrom(dplyr,filter_at) -importFrom(dplyr,funs) -importFrom(dplyr,group_by) -importFrom(dplyr,group_by_at) -importFrom(dplyr,group_vars) -importFrom(dplyr,if_else) -importFrom(dplyr,lag) -importFrom(dplyr,left_join) -importFrom(dplyr,mutate) -importFrom(dplyr,mutate_all) -importFrom(dplyr,mutate_at) -importFrom(dplyr,n) -importFrom(dplyr,n_distinct) -importFrom(dplyr,n_groups) -importFrom(dplyr,pull) -importFrom(dplyr,rename) -importFrom(dplyr,row_number) -importFrom(dplyr,select) -importFrom(dplyr,select_if) -importFrom(dplyr,slice) -importFrom(dplyr,summarise) -importFrom(dplyr,summarise_if) -importFrom(dplyr,tibble) -importFrom(dplyr,transmute) -importFrom(dplyr,ungroup) -importFrom(dplyr,vars) importFrom(graphics,arrows) importFrom(graphics,axis) importFrom(graphics,barplot) @@ -377,12 +317,7 @@ importFrom(graphics,par) importFrom(graphics,plot) importFrom(graphics,points) importFrom(graphics,text) -importFrom(knitr,kable) -importFrom(microbenchmark,microbenchmark) importFrom(pillar,pillar_shaft) -importFrom(rlang,as_label) -importFrom(rlang,enquos) -importFrom(rlang,eval_tidy) importFrom(stats,complete.cases) importFrom(stats,glm) importFrom(stats,lm) @@ -393,10 +328,8 @@ importFrom(stats,qchisq) importFrom(stats,var) importFrom(tidyr,pivot_longer) importFrom(tidyr,pivot_wider) -importFrom(utils,adist) -importFrom(utils,browseURL) -importFrom(utils,menu) importFrom(vctrs,vec_cast) +importFrom(vctrs,vec_cast.character) importFrom(vctrs,vec_default_cast) importFrom(vctrs,vec_ptype2.character) importFrom(vctrs,vec_ptype_abbr) diff --git a/NEWS.md b/NEWS.md index 1bab1f02b..f5bc4abd1 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,10 +1,18 @@ -# AMR 1.1.0.9003 -## Last updated: 01-May-2020 +# AMR 1.1.0.9004 +## Last updated: 16-May-2020 + +### Breaking +* Removed previously deprecated function `p.symbol()` - it was replaced with `p_symbol()` ### Changed * Small fix for some text input that could not be coerced as valid MIC values -* Better support for the tidyverse. The tidyverse now heavily relies on the `vctrs` package for data transformation and data joining. In newer versions of e.g. the `dplyr` package, a function like `bind_rows()` would not preserve the right class for microorganisms (class `mo`) and antibiotics (class `ab`). This is fixed in this version. +* Fix for cases where some functions of newer versions of the `dplyr` package (such as `bind_rows()`) would not preserve the right class for microorganisms (class `mo`) and antibiotics (class `ab`) * Fixed interpretation of generic CLSI interpretation rules (thanks to Anthony Underwood) +* Added official drug names to verbose output of `eucast_rules()` + +### Other +* Removed dependency on **all** packages that were needed for the `AMR` package to work properly: `crayon`, `data.table`, `dplyr`, `ggplot2`, `R6`, `rlang` and `tidyr`. This is a major code change, but will probably not be noticeable by users. Making this package independent on especially the tidyverse (packages `dplyr`, `ggplot2` and `tidyr`) tremendously increases sustainability on the long term, since tidyverse functions change quite often. Most of our functions are replaced with versions that only rely on base R, which keeps this package fully functional for many years to come, without requiring a lot of maintenance to keep up with other packages anymore. The only dependencies that remained are for extending methods of other packages, like `pillar` and `vctrs` for printing and working with tibbles using our classes `mo` and `ab`. + # AMR 1.1.0 diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R index ca8796024..dc07bfad9 100755 --- a/R/aa_helper_functions.R +++ b/R/aa_helper_functions.R @@ -19,6 +19,48 @@ # Visit our website for more info: https://msberends.gitlab.io/AMR. # # ==================================================================== # +# functions from dplyr, will perhaps become poorman +distinct <- function(.data, ..., .keep_all = FALSE) { + check_is_dataframe(.data) + if ("grouped_data" %in% class(.data)) { + distinct.grouped_data(.data, ..., .keep_all = .keep_all) + } else { + distinct.default(.data, ..., .keep_all = .keep_all) + } +} +distinct.default <- function(.data, ..., .keep_all = FALSE) { + names <- rownames(.data) + rownames(.data) <- NULL + if (length(deparse_dots(...)) == 0) { + selected <- .data + } else { + selected <- select(.data, ...) + } + rows <- as.integer(rownames(unique(selected))) + if (isTRUE(.keep_all)) { + res <- .data[rows, , drop = FALSE] + } else { + res <- selected[rows, , drop = FALSE] + } + rownames(res) <- names[rows] + res +} +distinct.grouped_data <- function(.data, ..., .keep_all = FALSE) { + apply_grouped_function(.data, "distinct", ..., .keep_all = .keep_all) +} +filter_join_worker <- function(x, y, by = NULL, type = c("anti", "semi")) { + type <- match.arg(type, choices = c("anti", "semi"), several.ok = FALSE) + if (is.null(by)) { + by <- intersect(names(x), names(y)) + join_message(by) + } + rows <- interaction(x[, by]) %in% interaction(y[, by]) + if (type == "anti") rows <- !rows + res <- x[rows, , drop = FALSE] + rownames(res) <- NULL + res +} + # No export, no Rd addin_insert_in <- function() { rstudioapi::insertText(" %in% ") @@ -36,7 +78,7 @@ check_dataset_integrity <- function() { "species", "subspecies", "rank", "col_id", "species_id", "source", "ref", "prevalence", "snomed") %in% colnames(microorganisms), - na.rm = TRUE) & NROW(microorganisms) == NROW(microorganismsDT) + na.rm = TRUE) & NROW(microorganisms) == NROW(MO_lookup) check_antibiotics <- all(c("ab", "atc", "cid", "name", "group", "atc_group1", "atc_group2", "abbreviations", "synonyms", "oral_ddd", "oral_units", @@ -51,12 +93,11 @@ check_dataset_integrity <- function() { invisible(TRUE) } -#' @importFrom crayon blue bold red -#' @importFrom dplyr %>% pull search_type_in_df <- function(x, type) { # try to find columns based on type found <- NULL - + + x <- as.data.frame(x, stringsAsFactors = FALSE) colnames(x) <- trimws(colnames(x)) # -- mo @@ -89,14 +130,14 @@ search_type_in_df <- function(x, type) { if (any(colnames(x) %like% "^(specimen date|specimen_date|spec_date)")) { # WHONET support found <- colnames(x)[colnames(x) %like% "^(specimen date|specimen_date|spec_date)"][1] - if (!any(class(x %>% pull(found)) %in% c("Date", "POSIXct"))) { - stop(red(paste0("ERROR: Found column `", bold(found), "` to be used as input for `col_", type, + if (!any(class(pull(x, found)) %in% c("Date", "POSIXct"))) { + stop(font_red(paste0("ERROR: Found column `", font_bold(found), "` to be used as input for `col_", type, "`, but this column contains no valid dates. Transform its values to valid dates first.")), call. = FALSE) } } else { for (i in seq_len(ncol(x))) { - if (any(class(x %>% pull(i)) %in% c("Date", "POSIXct"))) { + if (any(class(pull(x, i)) %in% c("Date", "POSIXct"))) { found <- colnames(x)[i] break } @@ -127,7 +168,7 @@ search_type_in_df <- function(x, type) { if (!is.null(found)) { # this column should contain logicals if (!is.logical(x[, found, drop = TRUE])) { - message(red(paste0("NOTE: Column `", bold(found), "` found as input for `col_", type, + message(font_red(paste0("NOTE: Column `", font_bold(found), "` found as input for `col_", type, "`, but this column does not contain 'logical' values (TRUE/FALSE) and was ignored."))) found <- NULL } @@ -135,11 +176,11 @@ search_type_in_df <- function(x, type) { } if (!is.null(found)) { - msg <- paste0("NOTE: Using column `", bold(found), "` as input for `col_", type, "`.") + msg <- paste0("NOTE: Using column `", font_bold(found), "` as input for `col_", type, "`.") if (type %in% c("keyantibiotics", "specimen")) { - msg <- paste(msg, "Use", bold(paste0("col_", type), "= FALSE"), "to prevent this.") + msg <- paste(msg, "Use", font_bold(paste0("col_", type), "= FALSE"), "to prevent this.") } - message(blue(msg)) + message(font_blue(msg)) } found } @@ -147,10 +188,11 @@ search_type_in_df <- function(x, type) { stopifnot_installed_package <- function(package) { # no "utils::installed.packages()" since it requires non-staged install since R 3.6.0 # https://developer.r-project.org/Blog/public/2019/02/14/staged-install/index.html - tryCatch(get(".packageName", envir = asNamespace(package)), - error = function(e) stop("package '", package, "' required but not installed", - ' - try to install it with: install.packages("', package, '")', - call. = FALSE)) + sapply(package, function(x) + tryCatch(get(".packageName", envir = asNamespace(x)), + error = function(e) stop("package '", x, "' required but not installed.", + "\nTry to install it with: install.packages(\"", x, "\")", + call. = FALSE))) return(invisible()) } @@ -206,3 +248,184 @@ dataset_UTF8_to_ASCII <- function(df) { } df } + + +# replace crayon::has_color +has_colour <- function() { + if (Sys.getenv("TERM") == "dumb") { + return(FALSE) + } + if (tolower(Sys.info()["sysname"]) == "windows") { + if (Sys.getenv("ConEmuANSI") == "ON" | Sys.getenv("CMDER_ROOT") != "") { + return(TRUE) + } else { + return(FALSE) + } + } + "COLORTERM" %in% names(Sys.getenv()) | grepl("^screen|^xterm|^vt100|color|ansi|cygwin|linux", + Sys.getenv("TERM"), + ignore.case = TRUE, + perl = TRUE) +} + + +# the crayon colours +try_colour <- function(..., before, after, collapse = " ") { + txt <- paste0(unlist(list(...)), collapse = collapse) + if (isTRUE(has_colour())) { + if (is.null(collapse)) { + paste0(before, txt, after, collapse = NULL) + } else { + paste0(before, txt, after, collapse = "") + } + } else { + txt + } +} +font_black <- function(..., collapse = " ") { + try_colour(..., before = "\033[38;5;232m", after = "\033[39m", collapse = collapse) +} +font_blue <- function(..., collapse = " ") { + try_colour(..., before = "\033[34m", after = "\033[39m", collapse = collapse) +} +font_green <- function(..., collapse = " ") { + try_colour(..., before = "\033[32m", after = "\033[39m", collapse = collapse) +} +font_magenta <- function(..., collapse = " ") { + try_colour(..., before = "\033[35m", after = "\033[39m", collapse = collapse) +} +font_red <- function(..., collapse = " ") { + try_colour(..., before = "\033[31m", after = "\033[39m", collapse = collapse) +} +font_silver <- function(..., collapse = " ") { + try_colour(..., before = "\033[90m", after = "\033[39m", collapse = collapse) +} +font_white <- function(..., collapse = " ") { + try_colour(..., before = "\033[37m", after = "\033[39m", collapse = collapse) +} +font_yellow <- function(..., collapse = " ") { + try_colour(..., before = "\033[33m", after = "\033[39m", collapse = collapse) +} +font_subtle <- function(..., collapse = " ") { + try_colour(..., before = "\033[38;5;246m", after = "\033[39m", collapse = collapse) +} +font_grey <- function(..., collapse = " ") { + try_colour(..., before = "\033[38;5;249m", after = "\033[39m", collapse = collapse) +} +font_green_bg <- function(..., collapse = " ") { + try_colour(..., before = "\033[42m", after = "\033[49m", collapse = collapse) +} +font_red_bg <- function(..., collapse = " ") { + try_colour(..., before = "\033[41m", after = "\033[49m", collapse = collapse) +} +font_yellow_bg <- function(..., collapse = " ") { + try_colour(..., before = "\033[43m", after = "\033[49m", collapse = collapse) +} +font_bold <- function(..., collapse = " ") { + try_colour(..., before = "\033[1m", after = "\033[22m", collapse = collapse) +} +font_italic <- function(..., collapse = " ") { + try_colour(..., before = "\033[3m", after = "\033[23m", collapse = collapse) +} +font_underline <- function(..., collapse = " ") { + try_colour(..., before = "\033[4m", after = "\033[24m", collapse = collapse) +} +font_stripstyle <- function(x) { + # from crayon:::ansi_regex + gsub("(?:(?:\\x{001b}\\[)|\\x{009b})(?:(?:[0-9]{1,3})?(?:(?:;[0-9]{0,3})*)?[A-M|f-m])|\\x{001b}[A-M]", "", x, perl = TRUE) +} + +progress_estimated <- function(n = 1, n_min = 0, ...) { + # initiate with: + # progress <- progressbar(n) + # on.exit(close(progress)) + # + # update with: + # progress$tick() + if (n >= n_min) { + pb <- utils::txtProgressBar(max = n, style = 3) + pb$tick <- function() { + pb$up(pb$getVal() + 1) + } + pb + } else { + pb <- list() + pb$tick <- function() { + invisible() + } + pb$kill <- function() { + invisible() + } + structure(pb, class = "txtProgressBar") + } +} + +# works exactly like round(), but rounds `round(44.55, 1)` as 44.6 instead of 44.5 +# and adds decimal zeroes until `digits` is reached when force_zero = TRUE +round2 <- function(x, digits = 0, force_zero = TRUE) { + x <- as.double(x) + # https://stackoverflow.com/a/12688836/4575331 + val <- (trunc((abs(x) * 10 ^ digits) + 0.5) / 10 ^ digits) * sign(x) + if (digits > 0 & force_zero == TRUE) { + values_trans <- val[val != as.integer(val) & !is.na(val)] + val[val != as.integer(val) & !is.na(val)] <- paste0(values_trans, + strrep("0", + max(0, + digits - nchar( + format( + as.double( + gsub(".*[.](.*)$", + "\\1", + values_trans)), + scientific = FALSE))))) + } + as.double(val) +} + + +# percentage from our other package: 'cleaner' +percentage <- function(x, digits = NULL, ...) { + + # getdecimalplaces() function + getdecimalplaces <- function(x, minimum = 0, maximum = 3) { + if (maximum < minimum) { + maximum <- minimum + } + if (minimum > maximum) { + minimum <- maximum + } + max_places <- max(unlist(lapply(strsplit(sub("0+$", "", + as.character(x * 100)), ".", fixed = TRUE), + function(y) ifelse(length(y) == 2, nchar(y[2]), 0))), na.rm = TRUE) + max(min(max_places, + maximum, na.rm = TRUE), + minimum, na.rm = TRUE) + } + + # format_percentage() function + format_percentage <- function(x, digits = NULL, ...) { + if (is.null(digits)) { + digits <- getdecimalplaces(x) + } + + # round right: percentage(0.4455) and format(as.percentage(0.4455), 1) should return "44.6%", not "44.5%" + x_formatted <- format(round2(as.double(x), digits = digits + 2) * 100, + scientific = FALSE, + digits = digits, + nsmall = digits, + ...) + x_formatted <- paste0(x_formatted, "%") + x_formatted[!grepl(pattern = "^[0-9.,e-]+$", x = x)] <- NA_character_ + x_formatted + } + + # the actual working part + x <- as.double(x) + if (is.null(digits)) { + # max one digit if undefined + digits <- getdecimalplaces(x, minimum = 0, maximum = 1) + } + format_percentage(structure(.Data = as.double(x), + class = c("percentage", "numeric")), + digits = digits, ...) +} diff --git a/R/aa_helper_functions_dplyr.R b/R/aa_helper_functions_dplyr.R new file mode 100644 index 000000000..32315def0 --- /dev/null +++ b/R/aa_helper_functions_dplyr.R @@ -0,0 +1,775 @@ +# ==================================================================== # +# TITLE # +# Antimicrobial Resistance (AMR) Analysis # +# # +# SOURCE # +# https://gitlab.com/msberends/AMR # +# # +# LICENCE # +# (c) 2018-2020 Berends MS, Luz CF et al. # +# # +# This R package is free software; you can freely use and distribute # +# it for both personal and commercial purposes under the terms of the # +# GNU General Public License version 2.0 (GNU GPL-2), as published by # +# the Free Software Foundation. # +# # +# We created this package for both routine data analysis and academic # +# research and it was publicly released in the hope that it will be # +# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. # +# Visit our website for more info: https://msberends.gitlab.io/AMR. # +# ==================================================================== # + +# ------------------------------------------------ +# THIS FILE WAS CREATED AUTOMATICALLY! +# Source file: data-raw/reproduction_of_poorman.R +# ------------------------------------------------ + +# Poorman: a package to replace all dplyr functions with base R so we can lose dependency on dplyr. +# These functions were downloaded from https://github.com/nathaneastwood/poorman, +# from this commit: https://github.com/nathaneastwood/poorman/tree/7d76d77f8f7bc663bf30fb5a161abb49801afa17 +# +# All code below was released under MIT license, that permits 'free of charge, to any person obtaining a +# copy of the software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +# is furnished to do so', given that a copyright notice is given in the software. +# +# Copyright notice as found on https://github.com/nathaneastwood/poorman/blob/master/LICENSE on 2 May 2020: +# YEAR: 2020 +# COPYRIGHT HOLDER: Nathan Eastwood + +arrange <- function(.data, ...) { + check_is_dataframe(.data) + if ("grouped_data" %in% class(.data)) { + arrange.grouped_data(.data, ...) + } else { + arrange.default(.data, ...) + } +} + +arrange.default <- function(.data, ...) { + rows <- eval.parent(substitute(with(.data, order(...)))) + .data[rows, , drop = FALSE] +} + +arrange.grouped_data <- function(.data, ...) { + apply_grouped_function(.data, "arrange", ...) +} +between <- function(x, left, right) { + if (!is.null(attr(x, "class")) && !inherits(x, c("Date", "POSIXct"))) { + warning("`between()` called on numeric vector with S3 class") + } + if (!is.double(x)) x <- as.numeric(x) + x >= as.numeric(left) & x <= as.numeric(right) +} +count <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) { + groups <- get_groups(x) + if (!missing(...)) x <- group_by(x, ..., .add = TRUE) + wt <- deparse_var(wt) + res <- do.call(tally, list(x, wt, sort, name)) + if (length(groups) > 0L) res <- do.call(group_by, list(res, as.name(groups))) + res +} + +tally <- function(x, wt = NULL, sort = FALSE, name = NULL) { + name <- check_name(x, name) + wt <- deparse_var(wt) + res <- do.call(summarise, set_names(list(x, as.name(tally_n(x, wt))), c(".data", name))) + res <- ungroup(res) + if (isTRUE(sort)) res <- do.call(arrange, list(res, call("desc", as.name(name)))) + rownames(res) <- NULL + res +} + +add_count <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) { + name <- check_name(x, name) + row_names <- rownames(x) + wt <- deparse_var(wt) + if (!missing(...)) x <- group_by(x, ..., .add = TRUE) + res <- do.call(add_tally, list(x, wt, sort, name)) + res[row_names, ] +} + +add_tally <- function(x, wt = NULL, sort = FALSE, name = NULL) { + wt <- deparse_var(wt) + n <- tally_n(x, wt) + name <- check_name(x, name) + res <- do.call(mutate, set_names(list(x, as.name(n)), c(".data", name))) + + if (isTRUE(sort)) { + do.call(arrange, list(res, call("desc", as.name(name)))) + } else { + res + } +} + +tally_n <- function(x, wt) { + if (is.null(wt) && "n" %in% colnames(x)) { + message("Using `n` as weighting variable") + wt <- "n" + } + context$.data <- x + on.exit(rm(list = ".data", envir = context)) + if (is.null(wt)) { + "n()" + } else { + paste0("sum(", wt, ", na.rm = TRUE)") + } +} + +check_name <- function(df, name) { + if (is.null(name)) { + if ("n" %in% colnames(df)) { + stop( + "Column 'n' is already present in output\n", + "* Use `name = \"new_name\"` to pick a new name" + ) + } + return("n") + } + + if (!is.character(name) || length(name) != 1) { + stop("`name` must be a single string") + } + + name +} +desc <- function(x) -xtfrm(x) +select_env <- new.env() + +peek_vars <- function() { + get(".col_names", envir = select_env) +} + +context <- new.env() + +n <- function() { + do.call(nrow, list(quote(.data)), envir = context) +} +filter <- function(.data, ...) { + check_is_dataframe(.data) + if ("grouped_data" %in% class(.data)) { + filter.grouped_data(.data, ...) + } else { + filter.default(.data, ...) + } +} + +filter.default <- function(.data, ...) { + conditions <- paste(deparse_dots(...), collapse = " & ") + context$.data <- .data + on.exit(rm(.data, envir = context)) + .data[do.call(with, list(.data, str2lang(unname(conditions)))), ] +} + +filter.grouped_data <- function(.data, ...) { + rows <- rownames(.data) + res <- apply_grouped_function(.data, "filter", ...) + res[rows[rows %in% rownames(res)], ] +} +group_by <- function(.data, ..., .add = FALSE) { + check_is_dataframe(.data) + pre_groups <- get_groups(.data) + groups <- deparse_dots(...) + if (isTRUE(.add)) groups <- unique(c(pre_groups, groups)) + unknown <- !(groups %in% colnames(.data)) + if (any(unknown)) stop("Invalid groups: ", groups[unknown]) + structure(.data, class = c("grouped_data", class(.data)), groups = groups) +} + +ungroup <- function(x, ...) { + check_is_dataframe(x) + rm_groups <- deparse_dots(...) + groups <- attr(x, "groups") + if (length(rm_groups) == 0L) rm_groups <- groups + attr(x, "groups") <- groups[!(groups %in% rm_groups)] + if (length(attr(x, "groups")) == 0L) { + attr(x, "groups") <- NULL + class(x) <- class(x)[!(class(x) %in% "grouped_data")] + } + x +} + +get_groups <- function(x) { + attr(x, "groups", exact = TRUE) +} + +has_groups <- function(x) { + groups <- get_groups(x) + if (is.null(groups)) FALSE else TRUE +} + +set_groups <- function(x, groups) { + attr(x, "groups") <- groups + x +} + +apply_grouped_function <- function(.data, fn, ...) { + groups <- get_groups(.data) + grouped <- split_into_groups(.data, groups) + res <- do.call(rbind, unname(lapply(grouped, fn, ...))) + if (any(groups %in% colnames(res))) { + class(res) <- c("grouped_data", class(res)) + attr(res, "groups") <- groups[groups %in% colnames(res)] + } + res +} + +split_into_groups <- function(.data, groups) { + class(.data) <- "data.frame" + group_factors <- lapply(groups, function(x, .data) as.factor(.data[, x]), .data) + res <- split(x = .data, f = group_factors) + res +} + +print.grouped_data <- function(x, ..., digits = NULL, quote = FALSE, right = TRUE, row.names = TRUE, max = NULL) { + class(x) <- "data.frame" + print(x, ..., digits = digits, quote = quote, right = right, row.names = row.names, max = max) + cat("\nGroups: ", paste(attr(x, "groups", exact = TRUE), collapse = ", "), "\n\n") +} +if_else <- function(condition, true, false, missing = NULL) { + if (!is.logical(condition)) stop("`condition` must be a logical vector.") + cls_true <- class(true) + cls_false <- class(false) + cls_missing <- class(missing) + if (!identical(cls_true, cls_false)) { + stop("The class of `true` <", class(true), "> is not the same as the class of `false` <", class(false), ">") + } + if (!is.null(missing) && !identical(cls_true, cls_missing)) { + stop("`missing` must be a ", cls_true, " vector, not a ", cls_missing, " vector.") + } + res <- ifelse(condition, true, false) + if (!is.null(missing)) res[is.na(res)] <- missing + attributes(res) <- attributes(true) + res +} + +inner_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { + join_worker(x = x, y = y, by = by, suffix = suffix, sort = FALSE) +} + +left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { + join_worker(x = x, y = y, by = by, suffix = suffix, all.x = TRUE) +} + +right_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { + join_worker(x = x, y = y, by = by, suffix = suffix, all.y = TRUE) +} + +full_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { + join_worker(x = x, y = y, by = by, suffix = suffix, all = TRUE) +} + +join_worker <- function(x, y, by = NULL, suffix = c(".x", ".y"), ...) { + x[, ".join_id"] <- seq_len(nrow(x)) + if (is.null(by)) { + by <- intersect(names(x), names(y)) + join_message(by) + merged <- merge(x = x, y = y, by = by, suffixes = suffix, ...)[, union(names(x), names(y))] + } else if (is.null(names(by))) { + merged <- merge(x = x, y = y, by = by, suffixes = suffix, ...) + } else { + merged <- merge(x = x, y = y, by.x = names(by), by.y = by, suffixes = suffix, ...) + } + merged <- merged[order(merged[, ".join_id"]), colnames(merged) != ".join_id"] + rownames(merged) <- NULL + merged +} + +join_message <- function(by) { + if (length(by) > 1L) { + message("Joining, by = c(\"", paste0(by, collapse = "\", \""), "\")\n", sep = "") + } else { + message("Joining, by = \"", by, "\"\n", sep = "") + } +} + +anti_join <- function(x, y, by = NULL) { + filter_join_worker(x, y, by, type = "anti") +} + +semi_join <- function(x, y, by = NULL) { + filter_join_worker(x, y, by, type = "semi") +} + +# filter_join_worker <- function(x, y, by = NULL, type = c("anti", "semi")) { +# type <- match.arg(type, choices = c("anti", "semi"), several.ok = FALSE) +# if (is.null(by)) { +# by <- intersect(names(x), names(y)) +# join_message(by) +# } +# rows <- interaction(x[, by]) %in% interaction(y[, by]) +# if (type == "anti") rows <- !rows +# res <- x[rows, ] +# rownames(res) <- NULL +# res +# } +lag <- function (x, n = 1L, default = NA) { + if (inherits(x, "ts")) stop("`x` must be a vector, not a `ts` object, do you want `stats::lag()`?") + if (length(n) != 1L || !is.numeric(n) || n < 0L) stop("`n` must be a nonnegative integer scalar") + if (n == 0L) return(x) + tryCatch( + storage.mode(default) <- typeof(x), + warning = function(w) { + stop("Cannot convert `default` <", typeof(default), "> to `x` <", typeof(x), ">") + } + ) + xlen <- length(x) + n <- pmin(n, xlen) + res <- c(rep(default, n), x[seq_len(xlen - n)]) + attributes(res) <- attributes(x) + res +} + +lead <- function (x, n = 1L, default = NA) { + if (length(n) != 1L || !is.numeric(n) || n < 0L) stop("n must be a nonnegative integer scalar") + if (n == 0L) return(x) + tryCatch( + storage.mode(default) <- typeof(x), + warning = function(w) { + stop("Cannot convert `default` <", typeof(default), "> to `x` <", typeof(x), ">") + } + ) + xlen <- length(x) + n <- pmin(n, xlen) + res <- c(x[-seq_len(n)], rep(default, n)) + attributes(res) <- attributes(x) + res +} +mutate <- function(.data, ...) { + check_is_dataframe(.data) + if ("grouped_data" %in% class(.data)) { + mutate.grouped_data(.data, ...) + } else { + mutate.default(.data, ...) + } +} + +mutate.default <- function(.data, ...) { + conditions <- deparse_dots(...) + cond_names <- names(conditions) + unnamed <- which(nchar(cond_names) == 0L) + if (is.null(cond_names)) { + names(conditions) <- conditions + } else if (length(unnamed) > 0L) { + names(conditions)[unnamed] <- conditions[unnamed] + } + not_matched <- names(conditions)[!names(conditions) %in% names(.data)] + .data[, not_matched] <- NA + context$.data <- .data + on.exit(rm(.data, envir = context)) + for (i in seq_along(conditions)) { + .data[, names(conditions)[i]] <- do.call(with, list(.data, str2lang(unname(conditions)[i]))) + } + .data +} + +mutate.grouped_data <- function(.data, ...) { + rows <- rownames(.data) + res <- apply_grouped_function(.data, "mutate", ...) + res[rows, ] +} +n_distinct <- function(..., na.rm = FALSE) { + res <- c(...) + if (is.list(res)) return(nrow(unique(as.data.frame(res, stringsAsFactors = FALSE)))) + if (isTRUE(na.rm)) res <- res[!is.na(res)] + length(unique(res)) +} +`%>%` <- function(lhs, rhs) { + lhs <- substitute(lhs) + rhs <- substitute(rhs) + eval(as.call(c(rhs[[1L]], lhs, as.list(rhs[-1L]))), envir = parent.frame()) +} +pull <- function(.data, var = -1) { + var_deparse <- deparse_var(var) + col_names <- colnames(.data) + if (!(var_deparse %in% col_names) & grepl("^[[:digit:]]+L|[[:digit:]]", var_deparse)) { + var <- as.integer(gsub("L", "", var_deparse)) + var <- if_else(var < 1L, rev(col_names)[abs(var)], col_names[var]) + } else if (var_deparse %in% col_names) { + var <- var_deparse + } + .data[, var] +} +relocate <- function(.data, ..., .before = NULL, .after = NULL) { + check_is_dataframe(.data) + data_names <- colnames(.data) + col_pos <- select_positions(.data, ...) + + .before <- deparse_var(.before) + .after <- deparse_var(.after) + has_before <- !is.null(.before) + has_after <- !is.null(.after) + + if (has_before && has_after) { + stop("You must supply only one of `.before` and `.after`") + } else if (has_before) { + where <- min(match(.before, data_names)) + col_pos <- c(setdiff(col_pos, where), where) + } else if (has_after) { + where <- max(match(.after, data_names)) + col_pos <- c(where, setdiff(col_pos, where)) + } else { + where <- 1L + col_pos <- union(col_pos, where) + } + lhs <- setdiff(seq(1L, where - 1L), col_pos) + rhs <- setdiff(seq(where + 1L, ncol(.data)), col_pos) + col_pos <- unique(c(lhs, col_pos, rhs)) + col_pos <- col_pos[col_pos <= length(data_names)] + + res <- .data[col_pos] + if (has_groups(.data)) res <- set_groups(res, get_groups(.data)) + res +} +rename <- function(.data, ...) { + check_is_dataframe(.data) + new_names <- names(deparse_dots(...)) + if (length(new_names) == 0L) { + warning("You didn't give any new names") + return(.data) + } + col_pos <- select_positions(.data, ...) + old_names <- colnames(.data)[col_pos] + new_names_zero <- nchar(new_names) == 0L + if (any(new_names_zero)) { + warning("You didn't provide new names for: ", paste0("`", old_names[new_names_zero], collapse = ", "), "`") + new_names[new_names_zero] <- old_names[new_names_zero] + } + colnames(.data)[col_pos] <- new_names + .data +} +rownames_to_column <- function(.data, var = "rowname") { + check_is_dataframe(.data) + col_names <- colnames(.data) + if (var %in% col_names) stop("Column `", var, "` already exists in `.data`") + .data[, var] <- rownames(.data) + rownames(.data) <- NULL + .data[, c(var, setdiff(col_names, var))] +} + +select <- function(.data, ...) { + map <- names(deparse_dots(...)) + col_pos <- select_positions(.data, ..., group_pos = TRUE) + res <- .data[, col_pos, drop = FALSE] + to_map <- nchar(map) > 0L + colnames(res)[to_map] <- map[to_map] + if (has_groups(.data)) res <- set_groups(res, get_groups(.data)) + res +} +starts_with <- function(match, ignore.case = TRUE, vars = peek_vars()) { + grep(pattern = paste0("^", paste0(match, collapse = "|^")), x = vars, ignore.case = ignore.case) +} + +ends_with <- function(match, ignore.case = TRUE, vars = peek_vars()) { + grep(pattern = paste0(paste0(match, collapse = "$|"), "$"), x = vars, ignore.case = ignore.case) +} + +contains <- function(match, ignore.case = TRUE, vars = peek_vars()) { + matches <- lapply( + match, + function(x) { + if (isTRUE(ignore.case)) { + match_u <- toupper(x) + match_l <- tolower(x) + pos_u <- grep(pattern = match_u, x = toupper(vars), fixed = TRUE) + pos_l <- grep(pattern = match_l, x = tolower(vars), fixed = TRUE) + unique(c(pos_l, pos_u)) + } else { + grep(pattern = x, x = vars, fixed = TRUE) + } + } + ) + unique(matches) +} + +matches <- function(match, ignore.case = TRUE, perl = FALSE, vars = peek_vars()) { + grep(pattern = match, x = vars, ignore.case = ignore.case, perl = perl) +} + +num_range <- function(prefix, range, width = NULL, vars = peek_vars()) { + if (!is.null(width)) { + range <- sprintf(paste0("%0", width, "d"), range) + } + find <- paste0(prefix, range) + if (any(duplicated(vars))) { + stop("Column names must be unique") + } else { + x <- match(find, vars) + x[!is.na(x)] + } +} + +all_of <- function(x, vars = peek_vars()) { + x_ <- !x %in% vars + if (any(x_)) { + which_x_ <- which(x_) + if (length(which_x_) == 1L) { + stop("The column ", x[which_x_], " does not exist.") + } else { + stop("The columns ", paste(x[which_x_], collapse = ", "), " do not exist.") + } + } else { + which(vars %in% x) + } +} + +any_of <- function(x, vars = peek_vars()) { + which(vars %in% x) +} + +everything <- function(vars = peek_vars()) { + seq_along(vars) +} + +last_col <- function(offset = 0L, vars = peek_vars()) { + if (!is_wholenumber(offset)) stop("`offset` must be an integer") + n <- length(vars) + if (offset && n <= offset) { + stop("`offset` must be smaller than the number of `vars`") + } else if (n == 0) { + stop("Can't select last column when `vars` is empty") + } else { + n - offset + } +} +select_positions <- function(.data, ..., group_pos = FALSE) { + cols <- eval(substitute(alist(...))) + data_names <- colnames(.data) + select_env$.col_names <- data_names + on.exit(rm(list = ".col_names", envir = select_env)) + exec_env <- parent.frame(2L) + pos <- unlist(lapply(cols, eval_expr, exec_env = exec_env)) + if (isTRUE(group_pos)) { + groups <- get_groups(.data) + missing_groups <- !(groups %in% cols) + if (any(missing_groups)) { + message("Adding missing grouping variables: `", paste(groups[missing_groups], collapse = "`, `"), "`") + pos <- c(match(groups[missing_groups], data_names), pos) + } + } + unique(pos) +} + +eval_expr <- function(x, exec_env) { + type <- typeof(x) + switch( + type, + "integer" = x, + "double" = as.integer(x), + "character" = select_char(x), + "symbol" = select_symbol(x, exec_env = exec_env), + "language" = eval_call(x), + stop("Expressions of type <", typeof(x), "> cannot be evaluated for use when subsetting.") + ) +} + +select_char <- function(expr) { + pos <- match(expr, select_env$.col_names) + if (is.na(pos)) stop("Column `", expr, "` does not exist") + pos +} + +select_symbol <- function(expr, exec_env) { + res <- try(select_char(as.character(expr)), silent = TRUE) + if (inherits(res, "try-error")) { + res <- tryCatch( + select_char(eval(expr, envir = exec_env)), + error = function(e) stop("Column ", expr, " does not exist.") + ) + } + res +} + +eval_call <- function(x) { + type <- as.character(x[[1]]) + switch( + type, + `:` = select_seq(x), + `!` = select_negate(x), + `-` = select_minus(x), + `c` = select_c(x), + `(` = select_bracket(x), + select_context(x) + ) +} + +select_seq <- function(expr) { + x <- eval_expr(expr[[2]]) + y <- eval_expr(expr[[3]]) + x:y +} + +select_negate <- function(expr) { + x <- if (is_negated_colon(expr)) { + expr <- call(":", expr[[2]][[2]], expr[[2]][[3]][[2]]) + eval_expr(expr) + } else { + eval_expr(expr[[2]]) + } + x * -1L +} + +is_negated_colon <- function(expr) { + expr[[1]] == "!" && length(expr[[2]]) > 1L && expr[[2]][[1]] == ":" && expr[[2]][[3]][[1]] == "!" +} + +select_minus <- function(expr) { + x <- eval_expr(expr[[2]]) + x * -1L +} + +select_c <- function(expr) { + lst_expr <- as.list(expr) + lst_expr[[1]] <- NULL + unlist(lapply(lst_expr, eval_expr)) +} + +select_bracket <- function(expr) { + eval_expr(expr[[2]]) +} + +select_context <- function(expr) { + eval(expr, envir = context$.data) +} +slice <- function(.data, ...) { + check_is_dataframe(.data) + if ("grouped_data" %in% class(.data)) { + slice.grouped_data(.data, ...) + } else { + slice.default(.data, ...) + } +} + +slice.default <- function(.data, ...) { + rows <- c(...) + stopifnot(is.numeric(rows) | is.integer(rows)) + if (all(rows > 0L)) rows <- rows[rows <= nrow(.data)] + .data[rows, ] +} + +slice.grouped_data <- function(.data, ...) { + apply_grouped_function(.data, "slice", ...) +} +summarise <- function(.data, ...) { + check_is_dataframe(.data) + if ("grouped_data" %in% class(.data)) { + summarise.grouped_data(.data, ...) + } else { + summarise.default(.data, ...) + } +} + +summarise.default <- function(.data, ...) { + fns <- vapply(substitute(...()), deparse, NA_character_) + context$.data <- .data + on.exit(rm(.data, envir = context)) + if (has_groups(.data)) { + group <- unique(.data[, get_groups(.data), drop = FALSE]) + if (nrow(group) == 0L) return(NULL) + } + res <- lapply(fns, function(x) do.call(with, list(.data, str2lang(x)))) + res <- as.data.frame(res) + fn_names <- names(fns) + colnames(res) <- if (is.null(fn_names)) fns else fn_names + if (has_groups(.data)) res <- cbind(group, res) + res +} + +summarise.grouped_data <- function(.data, ...) { + groups <- get_groups(.data) + res <- apply_grouped_function(.data, "summarise", ...) + res <- res[do.call(order, lapply(groups, function(x) res[, x])), ] + rownames(res) <- NULL + res +} + +summarize <- summarise +summarize.default <- summarise.default +summarize.grouped_data <- summarise.grouped_data +transmute <- function(.data, ...) { + check_is_dataframe(.data) + if ("grouped_data" %in% class(.data)) { + transmute.grouped_data(.data, ...) + } else { + transmute.default(.data, ...) + } +} + +transmute.default <- function(.data, ...) { + conditions <- deparse_dots(...) + mutated <- mutate(.data, ...) + mutated[, names(conditions), drop = FALSE] +} + +transmute.grouped_data <- function(.data, ...) { + rows <- rownames(.data) + res <- apply_grouped_function(.data, "transmute", ...) + res[rows, ] +} +deparse_dots <- function(...) { + vapply(substitute(...()), deparse, NA_character_) +} + +deparse_var <- function(var) { + sub_var <- eval(substitute(substitute(var)), parent.frame()) + if (is.symbol(sub_var)) var <- as.character(sub_var) + var +} + +check_is_dataframe <- function(.data) { + parent_fn <- all.names(sys.call(-1L), max.names = 1L) + if (!is.data.frame(.data)) stop(parent_fn, " must be given a data.frame") + invisible() +} + +is_wholenumber <- function(x) { + x %% 1L == 0L +} + +set_names <- function(object = nm, nm) { + names(object) <- nm + object +} + +cume_dist <- function(x) { + rank(x, ties.method = "max", na.last = "keep") / sum(!is.na(x)) +} + +dense_rank <- function(x) { + match(x, sort(unique(x))) +} + +min_rank <- function(x) { + rank(x, ties.method = "min", na.last = "keep") +} + +ntile <- function (x = row_number(), n) { + if (!missing(x)) x <- row_number(x) + len <- length(x) - sum(is.na(x)) + n <- as.integer(floor(n)) + if (len == 0L) { + rep(NA_integer_, length(x)) + } else { + n_larger <- as.integer(len %% n) + n_smaller <- as.integer(n - n_larger) + size <- len / n + larger_size <- as.integer(ceiling(size)) + smaller_size <- as.integer(floor(size)) + larger_threshold <- larger_size * n_larger + bins <- if_else( + x <= larger_threshold, + (x + (larger_size - 1L)) / larger_size, + (x + (-larger_threshold + smaller_size - 1L)) / smaller_size + n_larger + ) + as.integer(floor(bins)) + } +} + +percent_rank <- function(x) { + (min_rank(x) - 1) / (sum(!is.na(x)) - 1) +} + +row_number <- function(x) { + if (missing(x)) seq_len(n()) else rank(x, ties.method = "first", na.last = "keep") +} diff --git a/R/ab.R b/R/ab.R index 0e5de7f96..55262b3c6 100755 --- a/R/ab.R +++ b/R/ab.R @@ -27,7 +27,6 @@ #' @param ... arguments passed on to internal functions #' @rdname as.ab #' @inheritSection WHOCC WHOCC -#' @importFrom dplyr %>% filter slice pull #' @details All entries in the [antibiotics] data set have three different identifiers: a human readable EARS-Net code (column `ab`, used by ECDC and WHONET), an ATC code (column `atc`, used by WHO), and a CID code (column `cid`, Compound ID, used by PubChem). The data set contains more than 5,000 official brand names from many different countries, as found in PubChem. #' #' Use the [ab_property()] functions to get properties based on the returned antibiotic ID, see Examples. @@ -409,6 +408,6 @@ c.ab <- function(x, ...) { #' @export pillar_shaft.ab <- function(x, ...) { out <- format(x) - out[is.na(x)] <- pillar::style_na("NA") + out[is.na(x)] <- font_red("NA") pillar::new_pillar_shaft_simple(out, align = "left", min_width = 4) } diff --git a/R/age.R b/R/age.R index bc62f4e8c..2337f9879 100755 --- a/R/age.R +++ b/R/age.R @@ -29,7 +29,6 @@ #' @param na.rm a logical to indicate whether missing values should be removed #' @return An [integer] (no decimals) if `exact = FALSE`, a [double] (with decimals) otherwise #' @seealso To split ages into groups, use the [age_groups()] function. -#' @importFrom dplyr if_else #' @inheritSection AMR Read more on our website! #' @export #' @examples @@ -54,7 +53,7 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE) { # from https://stackoverflow.com/a/25450756/4575331 years_gap <- reference$year - x$year - ages <- if_else(reference$mon < x$mon | (reference$mon == x$mon & reference$mday < x$mday), + ages <- ifelse(reference$mon < x$mon | (reference$mon == x$mon & reference$mday < x$mday), as.integer(years_gap - 1), as.integer(years_gap)) diff --git a/R/amr.R b/R/amr.R index 7b315d11c..1feb81cbc 100644 --- a/R/amr.R +++ b/R/amr.R @@ -60,6 +60,4 @@ #' #' @name AMR #' @rdname AMR -#' @importFrom microbenchmark microbenchmark -#' @importFrom knitr kable NULL diff --git a/R/atc_online.R b/R/atc_online.R index 703521889..e421696eb 100644 --- a/R/atc_online.R +++ b/R/atc_online.R @@ -56,7 +56,6 @@ #' - `"ml"` = milliliter (e.g. eyedrops) #' @export #' @rdname atc_online -#' @importFrom dplyr %>% #' @inheritSection AMR Read more on our website! #' @source #' @examples @@ -77,12 +76,10 @@ atc_online_property <- function(atc_code, administration = "O", url = "https://www.whocc.no/atc_ddd_index/?code=%s&showdescription=no") { + stopifnot_installed_package(c("curl", "rvest", "xml2")) + check_dataset_integrity() - - if (!all(c("curl", "rvest", "xml2") %in% rownames(utils::installed.packages()))) { - stop("Packages 'xml2', 'rvest' and 'curl' are required for this function") - } - + if (!all(atc_code %in% antibiotics)) { atc_code <- as.character(ab_atc(atc_code)) } diff --git a/R/availability.R b/R/availability.R index 85f791e22..c7c16cfb3 100644 --- a/R/availability.R +++ b/R/availability.R @@ -28,7 +28,6 @@ #' @details The function returns a [`data.frame`] with columns `"resistant"` and `"visual_resistance"`. The values in that columns are calculated with [resistance()]. #' @return [`data.frame`] with column names of `tbl` as row names #' @inheritSection AMR Read more on our website! -#' @importFrom cleaner percentage #' @export #' @examples #' availability(example_isolates) diff --git a/R/bug_drug_combinations.R b/R/bug_drug_combinations.R index d2c670fe1..5df13f477 100644 --- a/R/bug_drug_combinations.R +++ b/R/bug_drug_combinations.R @@ -32,7 +32,6 @@ #' @param ... arguments passed on to `FUN` #' @inheritParams rsi_df #' @inheritParams base::formatC -#' @importFrom dplyr %>% rename group_by select mutate filter summarise ungroup #' @importFrom tidyr pivot_longer #' @details The function [format()] calculates the resistance per bug-drug combination. Use `combine_IR = FALSE` (default) to test R vs. S+I and `combine_IR = TRUE` to test R+I vs. S. #' @@ -74,13 +73,15 @@ bug_drug_combinations <- function(x, stop("`col_mo` must be set.", call. = FALSE) } - x <- x %>% - as.data.frame(stringsAsFactors = FALSE) %>% - mutate(mo = x %>% - pull(col_mo) %>% - FUN(...)) %>% - group_by(mo) %>% - select_if(is.rsi) %>% + select_rsi <- function(.data) { + .data[, c(col_mo, names(which(sapply(.data, is.rsi))))] + } + + x <- x %>% as.data.frame(stringsAsFactors = FALSE) + x$mo <- FUN(x[, col_mo, drop = TRUE]) + + x <- x %>% + select_rsi() %>% pivot_longer(-mo, names_to = "ab") %>% group_by(mo, ab) %>% summarise(S = sum(value == "S", na.rm = TRUE), @@ -93,9 +94,7 @@ bug_drug_combinations <- function(x, structure(.Data = x, class = c("bug_drug_combinations", class(x))) } -#' @importFrom dplyr everything rename %>% ungroup group_by summarise mutate_all arrange everything lag #' @importFrom tidyr pivot_wider -#' @importFrom cleaner percentage #' @exportMethod format.bug_drug_combinations #' @export #' @rdname bug_drug_combinations @@ -110,10 +109,10 @@ format.bug_drug_combinations <- function(x, decimal.mark = getOption("OutDec"), big.mark = ifelse(decimal.mark == ",", ".", ","), ...) { - x <- x %>% filter(total >= minimum) + x <- x %>% subset(total >= minimum) if (remove_intrinsic_resistant == TRUE) { - x <- x %>% filter(R != total) + x <- x %>% subset(R != total) } if (combine_SI == TRUE | combine_IR == FALSE) { x$isolates <- x$R @@ -137,26 +136,46 @@ format.bug_drug_combinations <- function(x, ab_txt } + remove_NAs <- function(.data) { + as.data.frame(sapply(.data, function(x) ifelse(is.na(x), "", x), simplify = FALSE)) + } + + create_var <- function(.data, ...) { + dots <- list(...) + for (i in seq_len(length(dots))) { + .data[, names(dots)[i]] <- dots[[i]] + } + .data + } + y <- x %>% - mutate(ab = as.ab(ab), - ab_txt = give_ab_name(ab = ab, format = translate_ab, language = language)) %>% + create_var(ab = as.ab(x$ab), + ab_txt = give_ab_name(ab = x$ab, format = translate_ab, language = language)) %>% group_by(ab, ab_txt, mo) %>% summarise(isolates = sum(isolates, na.rm = TRUE), total = sum(total, na.rm = TRUE)) %>% - ungroup() %>% - mutate(txt = paste0(percentage(isolates / total, decimal.mark = decimal.mark, big.mark = big.mark), - " (", trimws(format(isolates, big.mark = big.mark)), "/", - trimws(format(total, big.mark = big.mark)), ")")) %>% + ungroup() + + y <- y %>% + create_var(txt = paste0(percentage(y$isolates / y$total, decimal.mark = decimal.mark, big.mark = big.mark), + " (", trimws(format(y$isolates, big.mark = big.mark)), "/", + trimws(format(y$total, big.mark = big.mark)), ")")) %>% select(ab, ab_txt, mo, txt) %>% arrange(mo) %>% pivot_wider(names_from = mo, values_from = txt) %>% - mutate_all(~ifelse(is.na(.), "", .)) %>% - mutate(ab_group = ab_group(ab, language = language), - ab_txt) %>% - select(ab_group, ab_txt, everything(), -ab) %>% - arrange(ab_group, ab_txt) %>% - mutate(ab_group = ifelse(ab_group != lag(ab_group) | is.na(lag(ab_group)), ab_group, "")) + remove_NAs() + select_ab_vars <- function(.data) { + .data[, c("ab_group", "ab_txt", colnames(.data)[!colnames(.data) %in% c("ab_group", "ab_txt", "ab")])] + } + + y <- y %>% + create_var(ab_group = ab_group(y$ab, language = language)) %>% + select_ab_vars() %>% + arrange(ab_group, ab_txt) + y <- y %>% + create_var(ab_group = ifelse(y$ab_group != lag(y$ab_group) | is.na(lag(y$ab_group)), y$ab_group, "")) + if (add_ab_group == FALSE) { y <- y %>% select(-ab_group) %>% rename("Drug" = ab_txt) colnames(y)[1] <- translate_AMR(colnames(y)[1], language = get_locale(), only_unknown = FALSE) @@ -170,8 +189,7 @@ format.bug_drug_combinations <- function(x, #' @exportMethod print.bug_drug_combinations #' @export -#' @importFrom crayon blue print.bug_drug_combinations <- function(x, ...) { print(as.data.frame(x, stringsAsFactors = FALSE)) - message(blue("NOTE: Use 'format()' on this result to get a publicable/printable format.")) + message(font_blue("NOTE: Use 'format()' on this result to get a publicable/printable format.")) } diff --git a/R/catalogue_of_life.R b/R/catalogue_of_life.R index cc3b18d30..3ee1976ff 100755 --- a/R/catalogue_of_life.R +++ b/R/catalogue_of_life.R @@ -83,13 +83,7 @@ NULL #' @return a [`list`], which prints in pretty format #' @inheritSection catalogue_of_life Catalogue of Life #' @inheritSection AMR Read more on our website! -#' @importFrom crayon bold underline -#' @importFrom dplyr filter #' @export -#' @examples -#' library(dplyr) -#' microorganisms %>% freq(kingdom) -#' microorganisms %>% group_by(kingdom) %>% freq(phylum, nmax = NULL) catalogue_of_life_version <- function() { check_dataset_integrity() @@ -118,11 +112,11 @@ catalogue_of_life_version <- function() { #' @noRd print.catalogue_of_life_version <- function(x, ...) { lst <- x - cat(paste0(bold("Included in this AMR package are:\n\n"), - underline(lst$catalogue_of_life$version), "\n", + cat(paste0(font_bold("Included in this AMR package are:\n\n"), + font_underline(lst$catalogue_of_life$version), "\n", " Available at: ", lst$catalogue_of_life$url, "\n", " Number of included species: ", format(lst$catalogue_of_life$n, big.mark = ","), "\n", - underline(paste0(lst$deutsche_sammlung_von_mikroorganismen_und_zellkulturen$version, " (", + font_underline(paste0(lst$deutsche_sammlung_von_mikroorganismen_und_zellkulturen$version, " (", lst$deutsche_sammlung_von_mikroorganismen_und_zellkulturen$yearmonth, ")")), "\n", " Available at: ", lst$deutsche_sammlung_von_mikroorganismen_und_zellkulturen$url, "\n", " Number of included species: ", format(lst$deutsche_sammlung_von_mikroorganismen_und_zellkulturen$n, big.mark = ","), "\n\n", diff --git a/R/count.R b/R/count.R index 20e96c16b..a8382546d 100755 --- a/R/count.R +++ b/R/count.R @@ -34,7 +34,7 @@ #' #' The function [n_rsi()] is an alias of [count_all()]. They can be used to count all available isolates, i.e. where all input antibiotics have an available result (S, I or R). Their use is equal to [n_distinct()]. Their function is equal to `count_susceptible(...) + count_resistant(...)`. #' -#' The function [count_df()] takes any variable from `data` that has an [`rsi`] class (created with [as.rsi()]) and counts the number of S's, I's and R's. The function [rsi_df()] works exactly like [count_df()], but adds the percentage of S, I and R. +#' The function [count_df()] takes any variable from `data` that has an [`rsi`] class (created with [as.rsi()]) and counts the number of S's, I's and R's. It also supports grouped variables. The function [rsi_df()] works exactly like [count_df()], but adds the percentage of S, I and R. #' @inheritSection proportion Combination therapy #' @seealso [`proportion_*`][proportion] to calculate microbial resistance and susceptibility. #' @return An [`integer`] diff --git a/R/deprecated.R b/R/deprecated.R index 58b69ad7f..eb6208084 100755 --- a/R/deprecated.R +++ b/R/deprecated.R @@ -27,13 +27,6 @@ #' @export #' @keywords internal #' @name AMR-deprecated -#' @rdname AMR-deprecated -p.symbol <- function(...) { - .Deprecated("p_symbol()", package = "AMR") - p_symbol(...) -} - -#' @rdname AMR-deprecated #' @export portion_R <- function(...) { .Deprecated("resistance()", package = "AMR") diff --git a/R/disk.R b/R/disk.R index aac7303a4..2ceed22cd 100644 --- a/R/disk.R +++ b/R/disk.R @@ -92,7 +92,6 @@ all_valid_disks <- function(x) { #' @rdname as.disk #' @export -#' @importFrom dplyr %>% is.disk <- function(x) { inherits(x, "disk") } @@ -123,7 +122,7 @@ print.disk <- function(x, ...) { #' @export pillar_shaft.disk <- function(x, ...) { out <- trimws(format(x)) - out[is.na(x)] <- pillar::style_na(NA) + out[is.na(x)] <- font_red(NA) pillar::new_pillar_shaft_simple(out, align = "right", min_width = 3) } diff --git a/R/eucast_rules.R b/R/eucast_rules.R index 8c277eb39..fe9bc96b3 100755 --- a/R/eucast_rules.R +++ b/R/eucast_rules.R @@ -141,9 +141,6 @@ EUCAST_VERSION_EXPERT_RULES <- "3.1, 2016" #' @aliases EUCAST #' @rdname eucast_rules #' @export -#' @importFrom dplyr %>% select pull mutate_at vars group_by summarise n -#' @importFrom crayon bold bgGreen bgYellow bgRed black green blue italic strip_style white red make_style -#' @importFrom utils menu #' @return The input of `x`, possibly with edited values of antibiotics. Or, if `verbose = TRUE`, a [`data.frame`] with all original and new values of the affected bug-drug combinations. #' @source #' - EUCAST Expert Rules. Version 2.0, 2012. \cr @@ -211,7 +208,7 @@ eucast_rules <- function(x, if ("rstudioapi" %in% rownames(utils::installed.packages())) { q_continue <- rstudioapi::showQuestion("Using verbose = TRUE with eucast_rules()", txt) } else { - q_continue <- menu(choices = c("OK", "Cancel"), graphics = TRUE, title = txt) + q_continue <- utils::menu(choices = c("OK", "Cancel"), graphics = FALSE, title = txt) } if (q_continue %in% c(FALSE, 2)) { message("Cancelled, returning original data") @@ -242,52 +239,50 @@ eucast_rules <- function(x, decimal.mark <- getOption("OutDec") big.mark <- ifelse(decimal.mark != ",", ",", ".") - formatnr <- function(x) { - trimws(format(x, big.mark = big.mark, decimal.mark = decimal.mark)) + formatnr <- function(x, big = big.mark, dec = decimal.mark) { + trimws(format(x, big.mark = big, decimal.mark = dec)) } - grey <- make_style("grey") - warned <- FALSE txt_error <- function() { - if (info == TRUE) cat("", bgRed(white(" ERROR ")), "\n\n") + if (info == TRUE) cat("", font_red_bg(font_white(" ERROR ")), "\n\n") } txt_warning <- function() { if (warned == FALSE) { - if (info == TRUE) cat("", bgYellow(black(" WARNING "))) + if (info == TRUE) cat("", font_yellow_bg(font_black(" WARNING "))) } warned <<- TRUE } txt_ok <- function(no_added, no_changed) { if (warned == FALSE) { if (no_added + no_changed == 0) { - cat(pillar::style_subtle(" (no changes)\n")) + cat(font_subtle(" (no changes)\n")) } else { # opening - cat(grey(" (")) + cat(font_grey(" (")) # additions if (no_added > 0) { if (no_added == 1) { - cat(green("1 value added")) + cat(font_green("1 value added")) } else { - cat(green(formatnr(no_added), "values added")) + cat(font_green(formatnr(no_added), "values added")) } } # separator if (no_added > 0 & no_changed > 0) { - cat(grey(", ")) + cat(font_grey(", ")) } # changes if (no_changed > 0) { if (no_changed == 1) { - cat(blue("1 value changed")) + cat(font_blue("1 value changed")) } else { - cat(blue(formatnr(no_changed), "values changed")) + cat(font_blue(formatnr(no_changed), "values changed")) } } # closing - cat(grey(")\n")) + cat(font_grey(")\n")) } warned <<- FALSE } @@ -450,8 +445,11 @@ eucast_rules <- function(x, x_original[rows, cols] <<- to, warning = function(w) { if (w$message %like% "invalid factor level") { - x_original <<- x_original %>% mutate_at(vars(cols), ~factor(x = as.character(.), levels = c(to, levels(.)))) - x <<- x %>% mutate_at(vars(cols), ~factor(x = as.character(.), levels = c(to, levels(.)))) + xyz <- sapply(cols, function(col) { + x_original[, col] <<- factor(x = as.character(pull(x_original, col)), levels = c(to, levels(pull(x_original, col)))) + x[, col] <<- factor(x = as.character(pull(x, col)), levels = c(to, levels(pull(x, col)))) + invisible() + }) x_original[rows, cols] <<- to warning('Value "', to, '" added to the factor levels of column(s) `', paste(cols, collapse = "`, `"), "` because this value was not an existing factor level.\nA better way is to use as.rsi() on beforehand on antimicrobial columns to guarantee the right structure.", call. = FALSE) txt_warning() @@ -493,9 +491,9 @@ eucast_rules <- function(x, mo_fullname = x[rows, "fullname"], old = as.rsi(as.character(old[, cols[i]]), warn = FALSE), new = as.rsi(as.character(x[rows, cols[i]])), - rule = strip_style(rule[1]), - rule_group = strip_style(rule[2]), - rule_name = strip_style(rule[3]), + rule = font_stripstyle(rule[1]), + rule_group = font_stripstyle(rule[2]), + rule_name = font_stripstyle(rule[3]), stringsAsFactors = FALSE) colnames(verbose_new) <- c("row", "col", "mo_fullname", "old", "new", "rule", "rule_group", "rule_name") verbose_new <- verbose_new %>% filter(old != new | is.na(old)) @@ -517,18 +515,16 @@ eucast_rules <- function(x, x_original <- x # join to microorganisms data set - suppressWarnings( - x <- x %>% - mutate_at(vars(col_mo), as.mo) %>% - left_join_microorganisms(by = col_mo, suffix = c("_oldcols", "")) %>% - mutate(gramstain = mo_gramstain(pull(., col_mo), language = "en"), - genus_species = paste(genus, species)) %>% - as.data.frame(stringsAsFactors = FALSE) - ) - + x <- as.data.frame(x, stringsAsFactors = FALSE) + x[, col_mo] <- as.mo(x[, col_mo, drop = TRUE]) + x <- x %>% + left_join_microorganisms(by = col_mo, suffix = c("_oldcols", "")) + x$gramstain <- mo_gramstain(x[, col_mo, drop = TRUE]) + x$genus_species <- paste(x$genus, x$species) + if (ab_missing(AMP) & !ab_missing(AMX)) { # ampicillin column is missing, but amoxicillin is available - message(blue(paste0("NOTE: Using column `", bold(AMX), "` as input for ampicillin (J01CA01) since many EUCAST rules depend on it."))) + message(font_blue(paste0("NOTE: Using column `", font_bold(AMX), "` as input for ampicillin (J01CA01) since many EUCAST rules depend on it."))) AMP <- AMX } @@ -642,8 +638,8 @@ eucast_rules <- function(x, if (info == TRUE & !rule_group_current %like% "other" & eucast_notification_shown == FALSE) { cat(paste0( - "\n----\nRules by the ", bold("European Committee on Antimicrobial Susceptibility Testing (EUCAST)"), - "\n", blue("http://eucast.org/"), "\n")) + "\n----\nRules by the ", font_bold("European Committee on Antimicrobial Susceptibility Testing (EUCAST)"), + "\n", font_blue("http://eucast.org/"), "\n")) eucast_notification_shown <- TRUE } @@ -652,25 +648,23 @@ eucast_rules <- function(x, # Print rule (group) ------------------------------------------------------ if (rule_group_current != rule_group_previous) { # is new rule group, one of Breakpoints, Expert Rules and Other - cat(bold( - case_when( - rule_group_current %like% "breakpoint" ~ - paste0("\nEUCAST Clinical Breakpoints (", - red(paste0("v", EUCAST_VERSION_BREAKPOINTS)), ")\n"), - rule_group_current %like% "expert" ~ + cat(font_bold( + ifelse( + rule_group_current %like% "breakpoint", + paste0("\nEUCAST Clinical Breakpoints (", + font_red(paste0("v", EUCAST_VERSION_BREAKPOINTS)), ")\n"), + ifelse( + rule_group_current %like% "expert", paste0("\nEUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes (", - red(paste0("v", EUCAST_VERSION_EXPERT_RULES)), ")\n"), - TRUE ~ - "\nOther rules by this AMR package\n" - ) - )) + font_red(paste0("v", EUCAST_VERSION_EXPERT_RULES)), ")\n"), + "\nOther rules by this AMR package\n")))) } # Print rule ------------------------------------------------------------- if (rule_current != rule_previous) { # is new rule within group, print its name if (rule_current %in% c(microorganisms$family, microorganisms$fullname)) { - cat(italic(rule_current)) + cat(font_italic(rule_current)) } else { cat(rule_current) } @@ -789,8 +783,8 @@ eucast_rules <- function(x, verbose_info <- verbose_info %>% arrange(row, rule_group, rule_name, col) - cat(paste0("\n", grey(strrep("-", options()$width - 1)), "\n")) - cat(bold(paste("EUCAST rules", paste0(wouldve, "affected"), + cat(paste0("\n", font_grey(strrep("-", options()$width - 1)), "\n")) + cat(font_bold(paste("EUCAST rules", paste0(wouldve, "affected"), formatnr(n_distinct(verbose_info$row)), "out of", formatnr(nrow(x_original)), "rows, making a total of", formatnr(nrow(verbose_info)), "edits\n"))) @@ -802,62 +796,59 @@ eucast_rules <- function(x, if (n_added == 0) { colour <- cat # is function } else { - colour <- green # is function + colour <- font_green # is function } cat(colour(paste0("=> ", wouldve, "added ", - bold(formatnr(verbose_info %>% + font_bold(formatnr(verbose_info %>% filter(is.na(old)) %>% nrow()), "test results"), "\n"))) if (n_added > 0) { - verbose_info %>% + added_summary <- verbose_info %>% filter(is.na(old)) %>% group_by(new) %>% - summarise(n = n()) %>% - mutate(plural = ifelse(n > 1, "s", ""), - txt = paste0(formatnr(n), " test result", plural, " added as ", new)) %>% - pull(txt) %>% - paste(" -", ., collapse = "\n") %>% - cat() + summarise(n = n()) + cat(paste(" -", + paste0(formatnr(added_summary$n), " test result", ifelse(added_summary$n > 1, "s", ""), + " added as ", added_summary$new), collapse = "\n")) } # print changed values ---- if (n_changed == 0) { colour <- cat # is function } else { - colour <- blue # is function + colour <- font_blue # is function } if (n_added + n_changed > 0) { cat("\n") } cat(colour(paste0("=> ", wouldve, "changed ", - bold(formatnr(verbose_info %>% + font_bold(formatnr(verbose_info %>% filter(!is.na(old)) %>% nrow()), "test results"), "\n"))) if (n_changed > 0) { - verbose_info %>% + changed_summary <- verbose_info %>% filter(!is.na(old)) %>% group_by(old, new) %>% - summarise(n = n()) %>% - mutate(plural = ifelse(n > 1, "s", ""), - txt = paste0(formatnr(n), " test result", plural, " changed from ", old, " to ", new)) %>% - pull(txt) %>% - paste(" -", ., collapse = "\n") %>% - cat() + summarise(n = n()) + cat(paste(" -", + paste0(formatnr(changed_summary$n), " test result", ifelse(changed_summary$n > 1, "s", ""), " changed from ", + changed_summary$old, " to ", changed_summary$new), collapse = "\n")) cat("\n") } - cat(paste0(grey(strrep("-", options()$width - 1)), "\n")) + cat(paste0(font_grey(strrep("-", options()$width - 1)), "\n")) if (verbose == FALSE & nrow(verbose_info) > 0) { - cat(paste("\nUse", bold("eucast_rules(..., verbose = TRUE)"), "(on your original data) to get a data.frame with all specified edits instead.\n\n")) + cat(paste("\nUse", font_bold("eucast_rules(..., verbose = TRUE)"), "(on your original data) to get a data.frame with all specified edits instead.\n\n")) } else if (verbose == TRUE) { - cat(paste0("\nUsed 'Verbose mode' (", bold("verbose = TRUE"), "), which returns a data.frame with all specified edits.\nUse ", bold("verbose = FALSE"), " to apply the rules on your data.\n\n")) + cat(paste0("\nUsed 'Verbose mode' (", font_bold("verbose = TRUE"), "), which returns a data.frame with all specified edits.\nUse ", font_bold("verbose = FALSE"), " to apply the rules on your data.\n\n")) } } # Return data set --------------------------------------------------------- if (verbose == TRUE) { + rownames(verbose_info) <- NULL verbose_info } else { x_original diff --git a/R/filter_ab_class.R b/R/filter_ab_class.R index b9e71622f..3c5248678 100644 --- a/R/filter_ab_class.R +++ b/R/filter_ab_class.R @@ -19,9 +19,9 @@ # Visit our website for more info: https://msberends.gitlab.io/AMR. # # ==================================================================== # -#' Filter isolates on result in antibiotic class +#' Filter isolates on result in antimicrobial class #' -#' Filter isolates on results in specific antibiotic variables based on their antibiotic class. This makes it easy to filter on isolates that were tested for e.g. any aminoglycoside. +#' Filter isolates on results in specific antimicrobial classes. This makes it easy to filter on isolates that were tested for e.g. any aminoglycoside. #' @inheritSection lifecycle Stable lifecycle #' @param x a data set #' @param ab_class an antimicrobial class, like `"carbapenems"`, as can be found in [`antibiotics$group`][antibiotics] @@ -30,10 +30,9 @@ #' @param ... parameters passed on to `filter_at` from the `dplyr` package #' @details The `group` column in [antibiotics] data set will be searched for `ab_class` (case-insensitive). If no results are found, the `atc_group1` and `atc_group2` columns will be searched. Next, `x` will be checked for column names with a value in any abbreviations, codes or official names found in the [antibiotics] data set. #' @rdname filter_ab_class -#' @importFrom dplyr filter_at %>% select vars any_vars all_vars -#' @importFrom crayon bold blue #' @export #' @examples +#' \dontrun{ #' library(dplyr) #' #' # filter on isolates that have any result for any aminoglycoside @@ -62,6 +61,7 @@ #' example_isolates %>% #' filter_aminoglycosides("R", "all") %>% #' filter_fluoroquinolones("R", "all") +#' } filter_ab_class <- function(x, ab_class, result = NULL, @@ -76,17 +76,23 @@ filter_ab_class <- function(x, } # make result = "SI" work too: result <- unlist(strsplit(result, "")) - + if (!all(result %in% c("S", "I", "R"))) { stop("`result` must be one or more of: S, I, R", call. = FALSE) } if (!all(scope %in% c("any", "all"))) { stop("`scope` must be one of: any, all", call. = FALSE) } - - vars_df <- colnames(x)[tolower(colnames(x)) %in% tolower(ab_class_vars(ab_class))] + + # get only columns with class ab, mic or disk - those are AMR results + vars_df <- colnames(x)[sapply(x, function(y) is.rsi(y) | is.mic(y) | is.disk(y))] + vars_df_ab <- suppressWarnings(as.ab(vars_df)) + # get the columns with a group names in the chosen ab class + vars_df <- vars_df[which(ab_group(vars_df_ab) %like% ab_class | + ab_atc_group1(vars_df_ab) %like% ab_class | + ab_atc_group2(vars_df_ab) %like% ab_class)] ab_group <- find_ab_group(ab_class) - + if (length(vars_df) > 0) { if (length(result) == 1) { operator <- " is " @@ -95,10 +101,10 @@ filter_ab_class <- function(x, } if (scope == "any") { scope_txt <- " or " - scope_fn <- any_vars + scope_fn <- any } else { scope_txt <- " and " - scope_fn <- all_vars + scope_fn <- all if (length(vars_df) > 1) { operator <- gsub("is", "are", operator) } @@ -108,14 +114,13 @@ filter_ab_class <- function(x, } else { scope <- "column " } - message(blue(paste0("Filtering on ", ab_group, ": ", scope, - paste(bold(paste0("`", vars_df, "`")), collapse = scope_txt), operator, toString(result)))) - x %>% - filter_at(vars(vars_df), - scope_fn(. %in% result), - ...) + message(font_blue(paste0("Filtering on ", ab_group, ": ", scope, + paste0(font_bold(paste0("`", vars_df, "`"), collapse = NULL), collapse = scope_txt), operator, toString(result)))) + x[as.logical(by(x, seq_len(nrow(x)), function(row) scope_fn(unlist(row[, vars_df]) %in% result, na.rm = TRUE))), , drop = FALSE] } else { - warning(paste0("no antibiotics of class ", ab_group, " found, leaving data unchanged"), call. = FALSE) + message(font_blue(paste0("NOTE: no antimicrobial agents of class ", ab_group, + " (such as ", find_ab_names(ab_group), + ") found, data left unchanged."))) x } } @@ -276,38 +281,6 @@ filter_tetracyclines <- function(x, ...) } -#' @importFrom dplyr %>% filter_at vars any_vars select -ab_class_vars <- function(ab_class) { - ab_class <- gsub("[^a-z0-9]+", ".*", ab_class) - ab_vars <- antibiotics %>% - filter(group %like% ab_class) %>% - select(ab:name, abbreviations, synonyms) %>% - unlist() %>% - as.matrix() %>% - as.character() %>% - paste(collapse = "|") %>% - strsplit("|", fixed = TRUE) %>% - unlist() %>% - unique() - ab_vars <- ab_vars[!ab_vars %in% c(NA, "", "NA") & nchar(ab_vars) > 2] - if (length(ab_vars) == 0) { - # try again, searching atc_group1 and atc_group2 columns - ab_vars <- antibiotics %>% - filter_at(vars(c("atc_group1", "atc_group2")), any_vars(. %like% ab_class)) %>% - select(ab:name, abbreviations, synonyms) %>% - unlist() %>% - as.matrix() %>% - as.character() %>% - paste(collapse = "|") %>% - strsplit("|", fixed = TRUE) %>% - unlist() %>% - unique() - ab_vars <- ab_vars[!ab_vars %in% c(NA, "", "NA") & nchar(ab_vars) > 2] - } - ab_vars -} - -#' @importFrom dplyr %>% filter pull find_ab_group <- function(ab_class) { ifelse(ab_class %in% c("aminoglycoside", "carbapenem", @@ -318,10 +291,19 @@ find_ab_group <- function(ab_class) { "tetracycline"), paste0(ab_class, "s"), antibiotics %>% - filter(ab %in% ab_class_vars(ab_class)) %>% + subset(group %like% ab_class | + atc_group1 %like% ab_class | + atc_group2 %like% ab_class) %>% pull(group) %>% unique() %>% tolower() %>% paste(collapse = "/") ) } + +find_ab_names <- function(ab_group) { + drugs <- antibiotics[which(antibiotics$group %like% ab_group), "name"] + paste0(ab_name(sample(drugs, size = min(4, length(drugs)), replace = FALSE), + tolower = TRUE, language = NULL), + collapse = ", ") +} diff --git a/R/first_isolate.R b/R/first_isolate.R index 1657b94fe..3d21899be 100755 --- a/R/first_isolate.R +++ b/R/first_isolate.R @@ -75,11 +75,8 @@ #' @rdname first_isolate #' @seealso [key_antibiotics()] #' @export -#' @importFrom dplyr arrange_at lag between row_number filter mutate arrange pull ungroup -#' @importFrom crayon blue bold silver -# @importFrom clean percentage #' @return A [`logical`] vector -#' @source Methodology of this function is based on: +#' @source Methodology of this function is strictly based on: #' #' **M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition**, 2014, *Clinical and Laboratory Standards Institute (CLSI)*. . #' @inheritSection AMR Read more on our website! @@ -87,6 +84,7 @@ #' # `example_isolates` is a dataset available in the AMR package. #' # See ?example_isolates. #' +#' \dontrun{ #' library(dplyr) #' # Filter on first isolates: #' example_isolates %>% @@ -107,13 +105,11 @@ #' #' # Have a look at A and B. #' # B is more reliable because every isolate is counted only once. -#' # Gentamicin resitance in hospital D appears to be 3.7% higher than +#' # Gentamicin resistance in hospital D appears to be 3.7% higher than #' # when you (erroneously) would have used all isolates for analysis. #' #' #' ## OTHER EXAMPLES: -#' -#' \dontrun{ #' #' # Short-hand versions: #' example_isolates %>% @@ -151,10 +147,6 @@ first_isolate <- function(x, include_unknown = FALSE, ...) { - if (!is.data.frame(x)) { - stop("`x` must be a data.frame.", call. = FALSE) - } - dots <- unlist(list(...)) if (length(dots) != 0) { # backwards compatibility with old parameters @@ -167,24 +159,30 @@ first_isolate <- function(x, } } + if (!is.data.frame(x)) { + stop("`x` must be a data.frame.", call. = FALSE) + } + # remove data.table, grouping from tibbles, etc. + x <- as.data.frame(x, stringsAsFactors = FALSE) + # try to find columns based on type # -- mo if (is.null(col_mo)) { col_mo <- search_type_in_df(x = x, type = "mo") - } - if (is.null(col_mo)) { - stop("`col_mo` must be set.", call. = FALSE) + if (is.null(col_mo)) { + stop("`col_mo` must be set.", call. = FALSE) + } } # -- date if (is.null(col_date)) { col_date <- search_type_in_df(x = x, type = "date") + if (is.null(col_date)) { + stop("`col_date` must be set.", call. = FALSE) + } } - if (is.null(col_date)) { - stop("`col_date` must be set.", call. = FALSE) - } - # convert to Date (pipes/pull for supporting tibbles too) - dates <- x %>% pull(col_date) %>% as.Date() + # convert to Date + dates <- as.Date(x[, col_date, drop = TRUE]) dates[is.na(dates)] <- as.Date("1970-01-01") x[, col_date] <- dates @@ -192,15 +190,15 @@ first_isolate <- function(x, if (is.null(col_patient_id)) { if (all(c("First name", "Last name", "Sex") %in% colnames(x))) { # WHONET support - x <- x %>% mutate(patient_id = paste(`First name`, `Last name`, Sex)) + x$patient_id <- paste(x$`First name`, x$`Last name`, x$Sex) col_patient_id <- "patient_id" - message(blue(paste0("NOTE: Using combined columns `", bold("First name"), "`, `", bold("Last name"), "` and `", bold("Sex"), "` as input for `col_patient_id`"))) + message(font_blue(paste0("NOTE: Using combined columns `", font_bold("First name"), "`, `", font_bold("Last name"), "` and `", font_bold("Sex"), "` as input for `col_patient_id`"))) } else { col_patient_id <- search_type_in_df(x = x, type = "patient_id") } - } - if (is.null(col_patient_id)) { - stop("`col_patient_id` must be set.", call. = FALSE) + if (is.null(col_patient_id)) { + stop("`col_patient_id` must be set.", call. = FALSE) + } } # -- key antibiotics @@ -239,27 +237,19 @@ first_isolate <- function(x, check_columns_existance(col_icu) check_columns_existance(col_keyantibiotics) - # create new dataframe with original row index - x <- x %>% - mutate(newvar_row_index = seq_len(nrow(x)), - newvar_mo = x %>% pull(col_mo) %>% as.mo(), - newvar_genus_species = paste(mo_genus(newvar_mo), mo_species(newvar_mo)), - newvar_date = x %>% pull(col_date), - newvar_patient_id = x %>% pull(col_patient_id)) + # create original row index + x$newvar_row_index <- seq_len(nrow(x)) + x$newvar_mo <- x %>% pull(col_mo) %>% as.mo() + x$newvar_genus_species <- paste(mo_genus(x$newvar_mo), mo_species(x$newvar_mo)) + x$newvar_date <- x %>% pull(col_date) + x$newvar_patient_id <- x %>% pull(col_patient_id) if (is.null(col_testcode)) { testcodes_exclude <- NULL } # remove testcodes if (!is.null(testcodes_exclude) & info == TRUE) { - message(blue(paste0("[Criterion] Excluded test codes: ", toString(testcodes_exclude)))) - } - - if (is.null(col_icu)) { - icu_exclude <- FALSE - } else { - x <- x %>% - mutate(col_icu = x %>% pull(col_icu) %>% as.logical()) + message(font_black(paste0("[Criterion] Exclude test codes: ", toString(paste0("'", testcodes_exclude, "'"))))) } if (is.null(col_specimen)) { @@ -270,11 +260,11 @@ first_isolate <- function(x, if (!is.null(specimen_group)) { check_columns_existance(col_specimen, x) if (info == TRUE) { - message(blue(paste0("[Criterion] Excluded other than specimen group '", specimen_group, "'"))) + message(font_black(paste0("[Criterion] Exclude other than specimen group '", specimen_group, "'"))) } } if (!is.null(col_keyantibiotics)) { - x <- x %>% mutate(key_ab = x %>% pull(col_keyantibiotics)) + x$newvar_key_ab <- x[, col_keyantibiotics, drop = TRUE] } if (is.null(testcodes_exclude)) { @@ -283,87 +273,38 @@ first_isolate <- function(x, # arrange data to the right sorting if (is.null(specimen_group)) { - # not filtering on specimen - if (icu_exclude == FALSE) { - if (info == TRUE & !is.null(col_icu)) { - message(blue("[Criterion] Included isolates from ICU")) - } - x <- x %>% - arrange(newvar_patient_id, - newvar_genus_species, - newvar_date) + x <- x[order(x$newvar_patient_id, + x$newvar_genus_species, + x$newvar_date), ] + rownames(x) <- NULL row.start <- 1 row.end <- nrow(x) - } else { - if (info == TRUE) { - message(blue("[Criterion] Excluded isolates from ICU")) - } - x <- x %>% - arrange_at(c(col_icu, - "newvar_patient_id", - "newvar_genus_species", - "newvar_date")) - - suppressWarnings( - row.start <- which(x %>% pull(col_icu) == FALSE) %>% min(na.rm = TRUE) - ) - suppressWarnings( - row.end <- which(x %>% pull(col_icu) == FALSE) %>% max(na.rm = TRUE) - ) - } - } else { - # filtering on specimen and only analyse these row to save time - if (icu_exclude == FALSE) { - if (info == TRUE & !is.null(col_icu)) { - message(blue("[Criterion] Included isolates from ICU.\n")) - } - x <- x %>% - arrange_at(c(col_specimen, - "newvar_patient_id", - "newvar_genus_species", - "newvar_date")) + # filtering on specimen and only analyse these rows to save time + x <- x[order(pull(x, col_specimen), + x$newvar_patient_id, + x$newvar_genus_species, + x$newvar_date), ] + rownames(x) <- NULL suppressWarnings( row.start <- which(x %>% pull(col_specimen) == specimen_group) %>% min(na.rm = TRUE) ) suppressWarnings( row.end <- which(x %>% pull(col_specimen) == specimen_group) %>% max(na.rm = TRUE) ) - } else { - if (info == TRUE) { - message(blue("[Criterion] Excluded isolates from ICU")) - } - x <- x %>% - arrange_at(c(col_icu, - col_specimen, - "newvar_patient_id", - "newvar_genus_species", - "newvar_date")) - suppressWarnings( - row.start <- min(which(x %>% pull(col_specimen) == specimen_group - & x %>% pull(col_icu) == FALSE), - na.rm = TRUE) - ) - suppressWarnings( - row.end <- max(which(x %>% pull(col_specimen) == specimen_group & - x %>% pull(col_icu) == FALSE), - na.rm = TRUE) - ) - } - } # no isolates found if (abs(row.start) == Inf | abs(row.end) == Inf) { if (info == TRUE) { - message(paste("=> Found", bold("no isolates"))) + message(paste("=> Found", font_bold("no isolates"))) } return(rep(FALSE, nrow(x))) } # did find some isolates - add new index numbers of rows - x <- x %>% mutate(newvar_row_index_sorted = seq_len(nrow(.))) - + x$newvar_row_index_sorted <- seq_len(nrow(x)) + scope.size <- row.end - row.start + 1 identify_new_year <- function(x, episode_days) { @@ -389,123 +330,121 @@ first_isolate <- function(x, } # Analysis of first isolate ---- - all_first <- x %>% - mutate(other_pat_or_mo = if_else(newvar_patient_id == lag(newvar_patient_id) - & newvar_genus_species == lag(newvar_genus_species), - FALSE, - TRUE)) %>% - group_by(newvar_patient_id, - newvar_genus_species) %>% - mutate(more_than_episode_ago = identify_new_year(x = newvar_date, - episode_days = episode_days)) %>% - ungroup() + x$other_pat_or_mo <- if_else(x$newvar_patient_id == lag(x$newvar_patient_id) & + x$newvar_genus_species == lag(x$newvar_genus_species), + FALSE, + TRUE) + x$episode_group <- paste(x$newvar_patient_id, x$newvar_genus_species) + x$more_than_episode_ago <- unname(unlist(lapply(unique(x$episode_group), + function(g, + df = x, + days = episode_days) { + identify_new_year(x = df[which(df$episode_group == g), "newvar_date"], + episode_days = days) + }))) weighted.notice <- "" if (!is.null(col_keyantibiotics)) { weighted.notice <- "weighted " if (info == TRUE) { if (type == "keyantibiotics") { - message(blue(paste0("[Criterion] Inclusion based on key antibiotics, ", + message(font_black(paste0("[Criterion] Base inclusion on key antibiotics, ", ifelse(ignore_I == FALSE, "not ", ""), "ignoring I"))) } if (type == "points") { - message(blue(paste0("[Criterion] Inclusion based on key antibiotics, using points threshold of " + message(font_black(paste0("[Criterion] Base inclusion on key antibiotics, using points threshold of " , points_threshold))) } } type_param <- type - all_first <- all_first %>% - mutate(key_ab_lag = lag(key_ab)) %>% - mutate(key_ab_other = !key_antibiotics_equal(y = key_ab, - z = key_ab_lag, - type = type_param, - ignore_I = ignore_I, - points_threshold = points_threshold, - info = info)) %>% - mutate( - real_first_isolate = - if_else( - newvar_row_index_sorted %>% between(row.start, row.end) - & newvar_genus_species != "" - & (other_pat_or_mo | more_than_episode_ago | key_ab_other), - TRUE, - FALSE)) + x$other_key_ab <- !key_antibiotics_equal(y = x$newvar_key_ab, + z = lag(x$newvar_key_ab), + type = type_param, + ignore_I = ignore_I, + points_threshold = points_threshold, + info = info) + # with key antibiotics + x$newvar_first_isolate <- if_else(x$newvar_row_index_sorted >= row.start & + x$newvar_row_index_sorted <= row.end & + x$newvar_genus_species != "" & + (x$other_pat_or_mo | x$more_than_episode_ago | x$other_key_ab), + TRUE, + FALSE) } else { # no key antibiotics - all_first <- all_first %>% - mutate( - real_first_isolate = - if_else( - newvar_row_index_sorted %>% between(row.start, row.end) - & newvar_genus_species != "" - & (other_pat_or_mo | more_than_episode_ago), - TRUE, - FALSE)) - + x$newvar_first_isolate <- if_else(x$newvar_row_index_sorted >= row.start & + x$newvar_row_index_sorted <= row.end & + x$newvar_genus_species != "" & + (x$other_pat_or_mo | x$more_than_episode_ago), + TRUE, + FALSE) } # first one as TRUE - all_first[row.start, "real_first_isolate"] <- TRUE + x[row.start, "newvar_first_isolate"] <- TRUE # no tests that should be included, or ICU if (!is.null(col_testcode)) { - all_first[which(all_first[, col_testcode] %in% tolower(testcodes_exclude)), "real_first_isolate"] <- FALSE + x[which(x[, col_testcode] %in% tolower(testcodes_exclude)), "newvar_first_isolate"] <- FALSE } - if (icu_exclude == TRUE) { - all_first[which(all_first[, col_icu] == TRUE), "real_first_isolate"] <- FALSE + if (!is.null(col_icu)) { + if (icu_exclude == TRUE) { + message(font_black("[Criterion] Exclude isolates from ICU.\n")) + x[which(as.logical(x[, col_icu, drop = TRUE])), "newvar_first_isolate"] <- FALSE + } else { + message(font_black("[Criterion] Include isolates from ICU.\n")) + } } decimal.mark <- getOption("OutDec") big.mark <- ifelse(decimal.mark != ",", ",", ".") # handle empty microorganisms - if (any(all_first$newvar_mo == "UNKNOWN", na.rm = TRUE) & info == TRUE) { - message(blue(paste0("NOTE: ", ifelse(include_unknown == TRUE, "Included ", "Excluded "), - format(sum(all_first$newvar_mo == "UNKNOWN"), + if (any(x$newvar_mo == "UNKNOWN", na.rm = TRUE) & info == TRUE) { + message(font_blue(paste0("NOTE: ", ifelse(include_unknown == TRUE, "Included ", "Excluded "), + format(sum(x$newvar_mo == "UNKNOWN"), decimal.mark = decimal.mark, big.mark = big.mark), - " isolates with a microbial ID 'UNKNOWN' (column `", bold(col_mo), "`)"))) + " isolates with a microbial ID 'UNKNOWN' (column `", font_bold(col_mo), "`)"))) } - all_first[which(all_first$newvar_mo == "UNKNOWN"), "real_first_isolate"] <- include_unknown + x[which(x$newvar_mo == "UNKNOWN"), "newvar_first_isolate"] <- include_unknown # exclude all NAs - if (any(is.na(all_first$newvar_mo)) & info == TRUE) { - message(blue(paste0("NOTE: Excluded ", format(sum(is.na(all_first$newvar_mo)), + if (any(is.na(x$newvar_mo)) & info == TRUE) { + message(font_blue(paste0("NOTE: Excluded ", format(sum(is.na(x$newvar_mo)), decimal.mark = decimal.mark, big.mark = big.mark), - " isolates with a microbial ID 'NA' (column `", bold(col_mo), "`)"))) + " isolates with a microbial ID 'NA' (column `", font_bold(col_mo), "`)"))) } - all_first[which(is.na(all_first$newvar_mo)), "real_first_isolate"] <- FALSE + x[which(is.na(x$newvar_mo)), "newvar_first_isolate"] <- FALSE # arrange back according to original sorting again - all_first <- all_first %>% - arrange(newvar_row_index) %>% - pull(real_first_isolate) + x <- x[order(x$newvar_row_index), ] + rownames(x) <- NULL if (info == TRUE) { - n_found <- base::sum(all_first, na.rm = TRUE) + n_found <- base::sum(x$newvar_first_isolate, na.rm = TRUE) p_found_total <- percentage(n_found / nrow(x)) p_found_scope <- percentage(n_found / scope.size) # mark up number of found n_found <- base::format(n_found, big.mark = big.mark, decimal.mark = decimal.mark) if (p_found_total != p_found_scope) { msg_txt <- paste0("=> Found ", - bold(paste0(n_found, " first ", weighted.notice, "isolates")), + font_bold(paste0(n_found, " first ", weighted.notice, "isolates")), " (", p_found_scope, " within scope and ", p_found_total, " of total)") } else { msg_txt <- paste0("=> Found ", - bold(paste0(n_found, " first ", weighted.notice, "isolates")), + font_bold(paste0(n_found, " first ", weighted.notice, "isolates")), " (", p_found_total, " of total)") } - base::message(msg_txt) + message(font_black(msg_txt)) } - all_first + x$newvar_first_isolate } #' @rdname first_isolate -#' @importFrom dplyr filter #' @export filter_first_isolate <- function(x, col_date = NULL, @@ -520,7 +459,6 @@ filter_first_isolate <- function(x, } #' @rdname first_isolate -#' @importFrom dplyr %>% mutate filter #' @export filter_first_weighted_isolate <- function(x, col_date = NULL, diff --git a/R/freq.R b/R/freq.R index 97815ab15..c990da0bf 100755 --- a/R/freq.R +++ b/R/freq.R @@ -24,8 +24,7 @@ cleaner::freq #' @exportMethod freq.mo -#' @importFrom dplyr n_distinct -#' @importFrom cleaner freq.default percentage +#' @importFrom cleaner freq.default #' @export #' @noRd freq.mo <- function(x, ...) { diff --git a/R/ggplot_pca.R b/R/ggplot_pca.R index e3daccc47..f949cde7b 100755 --- a/R/ggplot_pca.R +++ b/R/ggplot_pca.R @@ -58,6 +58,7 @@ #' # `example_isolates` is a dataset available in the AMR package. #' # See ?example_isolates. #' +#' \dontrun{ #' # See ?pca for more info about Principal Component Analysis (PCA). #' library(dplyr) #' pca_model <- example_isolates %>% @@ -71,6 +72,7 @@ #' #' # new #' ggplot_pca(pca_model) +#' } ggplot_pca <- function(x, choices = 1:2, scale = TRUE, @@ -120,14 +122,9 @@ ggplot_pca <- function(x, pc.biplot = pc.biplot, ellipse_prob = ellipse_prob, labels_text_placement = labels_text_placement) - nobs.factor <- calculations$nobs.factor - d <- calculations$d - u <- calculations$u - v <- calculations$v choices <- calculations$choices df.u <- calculations$df.u df.v <- calculations$df.v - r <- calculations$r ell <- calculations$ell groups <- calculations$groups group_name <- calculations$group_name @@ -232,7 +229,6 @@ ggplot_pca <- function(x, g } -#' @importFrom dplyr bind_rows #' @importFrom stats qchisq var pca_calculations <- function(pca_model, groups = NULL, @@ -328,18 +324,25 @@ pca_calculations <- function(pca_model, if (!is.null(df.u$groups)) { theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50)) circle <- cbind(cos(theta), sin(theta)) - ell <- bind_rows( - sapply(unique(df.u$groups), function(g, df = df.u) { - x <- df[which(df$groups == g), , drop = FALSE] - if (nrow(x) <= 2) { - return(NULL) - } - sigma <- var(cbind(x$xvar, x$yvar)) - mu <- c(mean(x$xvar), mean(x$yvar)) - ed <- sqrt(qchisq(ellipse_prob, df = 2)) - data.frame(sweep(circle %*% chol(sigma) * ed, 2, mu, FUN = "+"), - groups = x$groups[1]) - })) + + df.groups <- lapply(unique(df.u$groups), function(g, df = df.u) { + x <- df[which(df$groups == g), , drop = FALSE] + if (nrow(x) <= 2) { + return(data.frame(X1 = numeric(0), + X2 = numeric(0), + groups = character(0))) + } + sigma <- var(cbind(x$xvar, x$yvar)) + mu <- c(mean(x$xvar), mean(x$yvar)) + ed <- sqrt(qchisq(ellipse_prob, df = 2)) + data.frame(sweep(circle %*% chol(sigma) * ed, + MARGIN = 2, + STATS = mu, + FUN = "+"), + groups = x$groups[1], + stringsAsFactors = FALSE) + }) + ell <- do.call(rbind, df.groups) if (NROW(ell) == 0) { ell <- NULL } else { @@ -349,14 +352,9 @@ pca_calculations <- function(pca_model, ell <- NULL } - list(nobs.factor = nobs.factor, - d = d, - u = u, - v = v, - choices = choices, + list(choices = choices, df.u = df.u, df.v = df.v, - r = r, ell = ell, groups = groups, group_name = group_name, diff --git a/R/ggplot_rsi.R b/R/ggplot_rsi.R index 408a6d774..d4c8b6278 100755 --- a/R/ggplot_rsi.R +++ b/R/ggplot_rsi.R @@ -134,30 +134,6 @@ #' title = "AMR of Anti-UTI Drugs Per Hospital", #' x.title = "Hospital", #' datalabels = FALSE) -#' -#' # genuine analysis: check 3 most prevalent microorganisms -#' example_isolates %>% -#' # create new bacterial ID's, with all CoNS under the same group (Becker et al.) -#' mutate(mo = as.mo(mo, Becker = TRUE)) %>% -#' # filter on top three bacterial ID's -#' filter(mo %in% top_freq(freq(.$mo), 3)) %>% -#' # filter on first isolates -#' filter_first_isolate() %>% -#' # get short MO names (like "E. coli") -#' mutate(bug = mo_shortname(mo, Becker = TRUE)) %>% -#' # select this short name and some antiseptic drugs -#' select(bug, CXM, GEN, CIP) %>% -#' # group by MO -#' group_by(bug) %>% -#' # plot the thing, putting MOs on the facet -#' ggplot_rsi(x = "antibiotic", -#' facet = "bug", -#' translate_ab = FALSE, -#' nrow = 1, -#' title = "AMR of Top Three Microorganisms In Blood Culture Isolates", -#' subtitle = expression(paste("Only First Isolates, CoNS grouped according to Becker ", -#' italic("et al."), " (2014)")), -#' x.title = "Antibiotic (EARS-Net code)") #' } ggplot_rsi <- function(data, position = NULL, @@ -339,7 +315,6 @@ facet_rsi <- function(facet = c("interpretation", "antibiotic"), nrow = NULL) { } #' @rdname ggplot_rsi -#' @importFrom cleaner percentage #' @export scale_y_percent <- function(breaks = seq(0, 1, 0.1), limits = NULL) { stopifnot_installed_package("ggplot2") @@ -388,8 +363,6 @@ theme_rsi <- function() { } #' @rdname ggplot_rsi -#' @importFrom dplyr mutate %>% group_by_at -#' @importFrom cleaner percentage #' @export labels_rsi_count <- function(position = NULL, x = "antibiotic", @@ -415,11 +388,15 @@ labels_rsi_count <- function(position = NULL, colour = datalabels.colour, lineheight = 0.75, data = function(x) { - rsi_df(data = x, + transformed <- rsi_df(data = x, translate_ab = translate_ab, combine_SI = combine_SI, - combine_IR = combine_IR) %>% - group_by_at(x_name) %>% - mutate(lbl = paste0("n=", isolates)) + combine_IR = combine_IR) + transformed$gr <- transformed[, x_name, drop = TRUE] + transformed %>% + group_by(gr) %>% + mutate(lbl = paste0("n=", isolates)) %>% + ungroup() %>% + select(-gr) }) } diff --git a/R/guess_ab_col.R b/R/guess_ab_col.R index cc7514ef6..f51c989d4 100755 --- a/R/guess_ab_col.R +++ b/R/guess_ab_col.R @@ -27,8 +27,6 @@ #' @param search_string a text to search `x` for, will be checked with [as.ab()] if this value is not a column in `x` #' @param verbose a logical to indicate whether additional info should be printed #' @details You can look for an antibiotic (trade) name or abbreviation and it will search `x` and the [antibiotics] data set for any column containing a name or code of that antibiotic. **Longer columns names take precendence over shorter column names.** -#' @importFrom dplyr %>% select filter_all any_vars -#' @importFrom crayon blue #' @return A column name of `x`, or `NULL` when no result is found. #' @export #' @inheritSection AMR Read more on our website! @@ -103,23 +101,20 @@ guess_ab_col <- function(x = NULL, search_string = NULL, verbose = FALSE) { return(NULL) } else { if (verbose == TRUE) { - message(blue(paste0("NOTE: Using column `", bold(ab_result), "` as input for `", search_string, + message(font_blue(paste0("NOTE: Using column `", font_bold(ab_result), "` as input for `", search_string, "` (", ab_name(search_string, language = "en", tolower = TRUE), ")."))) } return(ab_result) } } - -#' @importFrom crayon blue bold -#' @importFrom dplyr %>% mutate arrange pull get_column_abx <- function(x, soft_dependencies = NULL, hard_dependencies = NULL, verbose = FALSE, ...) { - message(blue("NOTE: Auto-guessing columns suitable for analysis..."), appendLF = FALSE) + message(font_blue("NOTE: Auto-guessing columns suitable for analysis..."), appendLF = FALSE) x <- as.data.frame(x, stringsAsFactors = FALSE) x_bak <- x @@ -173,15 +168,15 @@ get_column_abx <- function(x, x <- x[order(names(x), x)] # succeeded with aut-guessing - message(blue("OK.")) + message(font_blue("OK.")) for (i in seq_len(length(x))) { if (verbose == TRUE & !names(x[i]) %in% names(duplicates)) { - message(blue(paste0("NOTE: Using column `", bold(x[i]), "` as input for `", names(x)[i], + message(font_blue(paste0("NOTE: Using column `", font_bold(x[i]), "` as input for `", names(x)[i], "` (", ab_name(names(x)[i], tolower = TRUE), ")."))) } if (names(x[i]) %in% names(duplicates)) { - warning(red(paste0("Using column `", bold(x[i]), "` as input for `", names(x)[i], + warning(font_red(paste0("Using column `", font_bold(x[i]), "` as input for `", names(x)[i], "` (", ab_name(names(x)[i], tolower = TRUE), "), although it was matched for multiple antibiotics or columns.")), call. = FALSE, @@ -204,14 +199,11 @@ get_column_abx <- function(x, if (!all(soft_dependencies %in% names(x))) { # missing a soft dependency may lower the reliability missing <- soft_dependencies[!soft_dependencies %in% names(x)] - missing_txt <- data.frame(missing = missing, - missing_names = ab_name(missing, tolower = TRUE), - stringsAsFactors = FALSE) %>% - mutate(txt = paste0(bold(missing), " (", missing_names, ")")) %>% - arrange(missing_names) %>% - pull(txt) - message(blue("NOTE: Reliability will be improved if these antimicrobial results would be available too:", - paste(missing_txt, collapse = ", "))) + missing_txt <- paste(paste0(ab_name(missing, tolower = TRUE, language = NULL), + " (", font_bold(missing, collapse = NULL), ")"), + collapse = ", ") + message(font_blue("NOTE: Reliability would be improved if these antimicrobial results would be available too:", + missing_txt)) } } x diff --git a/R/join_microorganisms.R b/R/join_microorganisms.R index 81e2d674d..0b26e039b 100755 --- a/R/join_microorganisms.R +++ b/R/join_microorganisms.R @@ -19,7 +19,7 @@ # Visit our website for more info: https://msberends.gitlab.io/AMR. # # ==================================================================== # -#' Join a table with [microorganisms] +#' Join [microorganisms] to a data set #' #' Join the data set [microorganisms] easily to an existing table or character vector. #' @inheritSection lifecycle Stable lifecycle @@ -30,13 +30,16 @@ #' @param by a variable to join by - if left empty will search for a column with class [`mo`] (created with [as.mo()]) or will be `"mo"` if that column name exists in `x`, could otherwise be a column name of `x` with values that exist in `microorganisms$mo` (like `by = "bacteria_id"`), or another column in [microorganisms] (but then it should be named, like `by = c("my_genus_species" = "fullname")`) #' @param suffix if there are non-joined duplicate variables in `x` and `y`, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2. #' @param ... other parameters to pass on to [dplyr::join()] -#' @details **Note:** As opposed to the [dplyr::join()] functions of `dplyr`, [`character`] vectors are supported and at default existing columns will get a suffix `"2"` and the newly joined columns will not get a suffix. See [dplyr::join()] for more information. +#' @details **Note:** As opposed to the [join()] functions of `dplyr`, [`character`] vectors are supported and at default existing columns will get a suffix `"2"` and the newly joined columns will not get a suffix. +#' +#' These functions rely on [merge()], a base R function to do joins. #' @inheritSection AMR Read more on our website! #' @export #' @examples #' left_join_microorganisms(as.mo("K. pneumoniae")) #' left_join_microorganisms("B_KLBSL_PNE") #' +#' \dontrun{ #' library(dplyr) #' example_isolates %>% left_join_microorganisms() #' @@ -49,13 +52,14 @@ #' colnames(df) #' df_joined <- left_join_microorganisms(df, "bacteria") #' colnames(df_joined) +#' } inner_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) { check_dataset_integrity() checked <- joins_check_df(x, by) x <- checked$x by <- checked$by join <- suppressWarnings( - dplyr::inner_join(x = x, y = microorganisms, by = by, suffix = suffix, ...) + inner_join(x = x, y = microorganisms, by = by, suffix = suffix, ...) ) if (NROW(join) > NROW(x)) { warning("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.") @@ -71,7 +75,7 @@ left_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) { x <- checked$x by <- checked$by join <- suppressWarnings( - dplyr::left_join(x = x, y = microorganisms, by = by, suffix = suffix, ...) + left_join(x = x, y = microorganisms, by = by, suffix = suffix, ...) ) if (NROW(join) > NROW(x)) { warning("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.") @@ -87,7 +91,7 @@ right_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) { x <- checked$x by <- checked$by join <- suppressWarnings( - dplyr::right_join(x = x, y = microorganisms, by = by, suffix = suffix, ...) + right_join(x = x, y = microorganisms, by = by, suffix = suffix, ...) ) if (NROW(join) > NROW(x)) { warning("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.") @@ -103,7 +107,7 @@ full_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) { x <- checked$x by <- checked$by join <- suppressWarnings( - dplyr::full_join(x = x, y = microorganisms, by = by, suffix = suffix, ...) + full_join(x = x, y = microorganisms, by = by, suffix = suffix, ...) ) if (NROW(join) > NROW(x)) { warning("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.") @@ -119,7 +123,7 @@ semi_join_microorganisms <- function(x, by = NULL, ...) { x <- checked$x by <- checked$by suppressWarnings( - dplyr::semi_join(x = x, y = microorganisms, by = by, ...) + semi_join(x = x, y = microorganisms, by = by, ...) ) } @@ -131,7 +135,7 @@ anti_join_microorganisms <- function(x, by = NULL, ...) { x <- checked$x by <- checked$by suppressWarnings( - dplyr::anti_join(x = x, y = microorganisms, by = by, ...) + anti_join(x = x, y = microorganisms, by = by, ...) ) } diff --git a/R/key_antibiotics.R b/R/key_antibiotics.R index 250e94ef9..bb7b898da 100755 --- a/R/key_antibiotics.R +++ b/R/key_antibiotics.R @@ -31,7 +31,9 @@ #' @param GramNeg_1,GramNeg_2,GramNeg_3,GramNeg_4,GramNeg_5,GramNeg_6 column names of antibiotics for **Gram-negatives**, case-insensitive. At default, the columns containing these antibiotics will be guessed with [guess_ab_col()]. #' @param warnings give warning about missing antibiotic columns, they will anyway be ignored #' @param ... other parameters passed on to function -#' @details The function [key_antibiotics()] returns a character vector with 12 antibiotic results for every isolate. These isolates can then be compared using [key_antibiotics_equal()], to check if two isolates have generally the same antibiogram. Missing and invalid values are replaced with a dot (`"."`). The [first_isolate()] function only uses this function on the same microbial species from the same patient. Using this, an MRSA will be included after a susceptible *S. aureus* (MSSA) found within the same episode (see `episode` parameter of [first_isolate()]). Without key antibiotic comparison it would not. +#' @details The function [key_antibiotics()] returns a character vector with 12 antibiotic results for every isolate. These isolates can then be compared using [key_antibiotics_equal()], to check if two isolates have generally the same antibiogram. Missing and invalid values are replaced with a dot (`"."`) by [key_antibiotics()] and ignored by [key_antibiotics_equal()]. +#' +#' The [first_isolate()] function only uses this function on the same microbial species from the same patient. Using this, e.g. an MRSA will be included after a susceptible *S. aureus* (MSSA) is found within the same patient episode. Without key antibiotic comparison it would not. See [first_isolate()] for more info. #' #' At default, the antibiotics that are used for **Gram-positive bacteria** are: #' - Amoxicillin @@ -65,8 +67,6 @@ #' @inheritSection first_isolate Key antibiotics #' @rdname key_antibiotics #' @export -#' @importFrom dplyr %>% mutate if_else pull -#' @importFrom crayon blue bold #' @seealso [first_isolate()] #' @inheritSection AMR Read more on our website! #' @examples @@ -120,6 +120,15 @@ key_antibiotics <- function(x, GramNeg_6 = guess_ab_col(x, "meropenem"), warnings = TRUE, ...) { + + dots <- unlist(list(...)) + if (length(dots) != 0) { + # backwards compatibility with old parameters + dots.names <- dots %>% names() + if ("info" %in% dots.names) { + warnings <- dots[which(dots.names == "info")] + } + } # try to find columns based on type # -- mo @@ -134,7 +143,7 @@ key_antibiotics <- function(x, col.list <- c(universal_1, universal_2, universal_3, universal_4, universal_5, universal_6, GramPos_1, GramPos_2, GramPos_3, GramPos_4, GramPos_5, GramPos_6, GramNeg_1, GramNeg_2, GramNeg_3, GramNeg_4, GramNeg_5, GramNeg_6) - check_available_columns <- function(x, col.list, info = TRUE) { + check_available_columns <- function(x, col.list, warnings = TRUE) { # check columns col.list <- col.list[!is.na(col.list) & !is.null(col.list)] names(col.list) <- col.list @@ -152,7 +161,7 @@ key_antibiotics <- function(x, } } if (!all(col.list %in% colnames(x))) { - if (info == TRUE) { + if (warnings == TRUE) { warning("Some columns do not exist and will be ignored: ", col.list.bak[!(col.list %in% colnames(x))] %>% toString(), ".\nTHIS MAY STRONGLY INFLUENCE THE OUTCOME.", @@ -163,7 +172,7 @@ key_antibiotics <- function(x, col.list } - col.list <- check_available_columns(x = x, col.list = col.list, info = warnings) + col.list <- check_available_columns(x = x, col.list = col.list, warnings = warnings) universal_1 <- col.list[universal_1] universal_2 <- col.list[universal_2] universal_3 <- col.list[universal_3] @@ -205,37 +214,34 @@ key_antibiotics <- function(x, } # join to microorganisms data set - x <- x %>% - as.data.frame(stringsAsFactors = FALSE) %>% - mutate_at(vars(col_mo), as.mo) %>% - left_join_microorganisms(by = col_mo) %>% - mutate(key_ab = NA_character_, - gramstain = mo_gramstain(pull(., col_mo), language = NULL)) + x <- x %>% as.data.frame(stringsAsFactors = FALSE) + x[, col_mo] <- as.mo(x[, col_mo, drop = TRUE]) + x$gramstain <- mo_gramstain(x[, col_mo, drop = TRUE], language = NULL) + x$key_ab <- NA_character_ + # mutate_at(vars(col_mo), as.mo) %>% + # left_join_microorganisms(by = col_mo) %>% + # mutate(key_ab = NA_character_, + # gramstain = mo_gramstain(pull(., col_mo), language = NULL)) + # # Gram + - x <- x %>% mutate(key_ab = - if_else(gramstain == "Gram-positive", - tryCatch(apply(X = x[, gram_positive], - MARGIN = 1, - FUN = function(x) paste(x, collapse = "")), - error = function(e) paste0(rep(".", 12), collapse = "")), - key_ab)) + x$key_ab <- if_else(x$gramstain == "Gram-positive", + tryCatch(apply(X = x[, gram_positive], + MARGIN = 1, + FUN = function(x) paste(x, collapse = "")), + error = function(e) paste0(rep(".", 12), collapse = "")), + x$key_ab) # Gram - - x <- x %>% mutate(key_ab = - if_else(gramstain == "Gram-negative", - tryCatch(apply(X = x[, gram_negative], - MARGIN = 1, - FUN = function(x) paste(x, collapse = "")), - error = function(e) paste0(rep(".", 12), collapse = "")), - key_ab)) + x$key_ab <- if_else(x$gramstain == "Gram-negative", + tryCatch(apply(X = x[, gram_negative], + MARGIN = 1, + FUN = function(x) paste(x, collapse = "")), + error = function(e) paste0(rep(".", 12), collapse = "")), + x$key_ab) # format - key_abs <- x %>% - pull(key_ab) %>% - gsub("(NA|NULL)", ".", .) %>% - gsub("[^SIR]", ".", ., ignore.case = TRUE) %>% - toupper() + key_abs <- toupper(gsub("[^SIR]", ".", gsub("(NA|NULL)", ".", x$key_ab))) if (n_distinct(key_abs) == 1) { warning("No distinct key antibiotics determined.", call. = FALSE) @@ -245,7 +251,6 @@ key_antibiotics <- function(x, } -#' @importFrom dplyr %>% #' @rdname key_antibiotics #' @export key_antibiotics_equal <- function(y, @@ -271,12 +276,13 @@ key_antibiotics_equal <- function(y, if (info_needed == TRUE) { p <- progress_estimated(length(x)) + on.exit(close(p)) } for (i in seq_len(length(x))) { if (info_needed == TRUE) { - p$tick()$print() + p$tick() } if (is.na(x[i])) { diff --git a/R/like.R b/R/like.R index 45914000e..467a8c2de 100755 --- a/R/like.R +++ b/R/like.R @@ -30,10 +30,15 @@ #' @name like #' @rdname like #' @export -#' @details When running a regular expression fails, these functions try again with `base::grepl(..., perl = TRUE)`. +#' @details +#' The `%like%` function: +#' * Is case insensitive (use `%like_case%` for case-sensitive matching) +#' * Supports multiple patterns +#' * Checks if `pattern` is a regular expression and sets `fixed = TRUE` if not, to greatly improve speed +#' * Tries again with `perl = TRUE` if regex fails #' #' Using RStudio? This function can also be inserted from the Addins menu and can have its own Keyboard Shortcut like `Ctrl+Shift+L` or `Cmd+Shift+L` (see `Tools` > `Modify Keyboard Shortcuts...`). -#' @source Idea from the [`like` function from the `data.table` package](https://github.com/Rdatatable/data.table/blob/master/R/like.R), but made it case insensitive at default and let it support multiple patterns. Also, if the regex fails the first time, it tries again with `perl = TRUE`. +#' @source Idea from the [`like` function from the `data.table` package](https://github.com/Rdatatable/data.table/blob/master/R/like.R) #' @seealso [base::grep()] #' @inheritSection AMR Read more on our website! #' @examples @@ -51,19 +56,27 @@ #' a %like% b #' #> TRUE TRUE TRUE #' -#' # get frequencies of bacteria whose name start with 'Ent' or 'ent' +#' # get isolates whose name start with 'Ent' or 'ent' #' library(dplyr) #' example_isolates %>% -#' filter(mo_name(mo) %like% "^ent") %>% -#' freq(mo_genus(mo)) +#' filter(mo_name(mo) %like% "^ent") %>% +#' freq(mo) like <- function(x, pattern, ignore.case = TRUE) { + # set to fixed if no regex found + fixed <- all(!grepl("[$.^*?+}{|)(]", pattern)) + if (ignore.case == TRUE) { + # set here, otherwise if fixed = TRUE, this warning will be thrown: argument 'ignore.case = TRUE' will be ignored + x <- tolower(x) + pattern <- tolower(pattern) + } + if (length(pattern) > 1) { if (length(x) != length(pattern)) { if (length(x) == 1) { x <- rep(x, length(pattern)) } # return TRUE for every 'x' that matches any 'pattern', FALSE otherwise - res <- sapply(pattern, function(pttrn) base::grepl(pttrn, x, ignore.case = ignore.case)) + res <- sapply(pattern, function(pttrn) base::grepl(pttrn, x, ignore.case = FALSE, fixed = fixed)) res2 <- as.logical(rowSums(res)) # get only first item of every hit in pattern res2[duplicated(res)] <- FALSE @@ -74,9 +87,9 @@ like <- function(x, pattern, ignore.case = TRUE) { res <- vector(length = length(pattern)) for (i in seq_len(length(res))) { if (is.factor(x[i])) { - res[i] <- as.integer(x[i]) %in% base::grep(pattern[i], levels(x[i]), ignore.case = ignore.case) + res[i] <- as.integer(x[i]) %in% base::grep(pattern[i], levels(x[i]), ignore.case = FALSE, fixed = fixed) } else { - res[i] <- base::grepl(pattern[i], x[i], ignore.case = ignore.case) + res[i] <- base::grepl(pattern[i], x[i], ignore.case = FALSE, fixed = fixed) } } return(res) @@ -85,13 +98,15 @@ like <- function(x, pattern, ignore.case = TRUE) { # the regular way how grepl works; just one pattern against one or more x if (is.factor(x)) { - as.integer(x) %in% base::grep(pattern, levels(x), ignore.case = ignore.case) + as.integer(x) %in% base::grep(pattern, levels(x), ignore.case = FALSE, fixed = fixed) } else { - tryCatch(base::grepl(pattern, x, ignore.case = ignore.case), + tryCatch(base::grepl(pattern, x, ignore.case = FALSE, fixed = fixed), error = function(e) ifelse(grepl("Invalid regexp", e$message), # try with perl = TRUE: return(base::grepl(pattern = pattern, x = x, - ignore.case = ignore.case, perl = TRUE)), + ignore.case = FALSE, + fixed = fixed, + perl = TRUE)), # stop otherwise stop(e$message))) } diff --git a/R/mdro.R b/R/mdro.R index 00184bda0..635239842 100755 --- a/R/mdro.R +++ b/R/mdro.R @@ -61,9 +61,6 @@ #' Ordered [`factor`] with levels `Negative` < `Positive, unconfirmed` < `Positive`. The value `"Positive, unconfirmed"` means that, according to the guideline, it is not entirely sure if the isolate is multi-drug resistant and this should be confirmed with additional (e.g. molecular) tests #' @rdname mdro #' @aliases MDR XDR PDR BRMO 3MRGN 4MRGN -#' @importFrom dplyr %>% filter_at vars all_vars pull mutate_at -#' @importFrom crayon blue bold italic red -#' @importFrom cleaner percentage #' @export #' @inheritSection AMR Read more on our website! #' @source @@ -99,7 +96,7 @@ mdro <- function(x, if ("rstudioapi" %in% rownames(utils::installed.packages())) { q_continue <- rstudioapi::showQuestion("Using verbose = TRUE with mdro()", txt) } else { - q_continue <- menu(choices = c("OK", "Cancel"), graphics = TRUE, title = txt) + q_continue <- utils::menu(choices = c("OK", "Cancel"), graphics = FALSE, title = txt) } if (q_continue %in% c(FALSE, 2)) { message("Cancelled, returning original data") @@ -110,6 +107,9 @@ mdro <- function(x, if (!is.data.frame(x)) { stop("`x` must be a data frame.", call. = FALSE) } + # force regular data.frame, not a tibble or data.table + x <- as.data.frame(x, stringsAsFactors = FALSE) + if (!is.numeric(pct_required_classes)) { stop("`pct_required_classes` must be numeric.", call. = FALSE) } @@ -147,8 +147,8 @@ mdro <- function(x, col_mo <- search_type_in_df(x = x, type = "mo") } if (is.null(col_mo) & guideline$code == "tb") { - message(blue("NOTE: No column found as input for `col_mo`,", - bold("assuming all records contain", italic("Mycobacterium tuberculosis.\n")))) + message(font_blue("NOTE: No column found as input for `col_mo`,", + font_bold("assuming all records contain", font_italic("Mycobacterium tuberculosis.")))) x$mo <- as.mo("Mycobacterium tuberculosis") col_mo <- "mo" } @@ -418,7 +418,7 @@ mdro <- function(x, if (guideline$code == "tb" & length(abx_tb) == 0) { stop("No antimycobacterials found in data set.", call. = FALSE) } - + if (combine_SI == TRUE) { search_result <- "R" } else { @@ -427,15 +427,15 @@ mdro <- function(x, if (info == TRUE) { if (combine_SI == TRUE) { - cat(red("\nOnly results with 'R' are considered as resistance. Use `combine_SI = FALSE` to also consider 'I' as resistance.\n")) + cat(font_red("\nOnly results with 'R' are considered as resistance. Use `combine_SI = FALSE` to also consider 'I' as resistance.\n")) } else { - cat(red("\nResults with 'R' or 'I' are considered as resistance. Use `combine_SI = TRUE` to only consider 'R' as resistance.\n")) + cat(font_red("\nResults with 'R' or 'I' are considered as resistance. Use `combine_SI = TRUE` to only consider 'R' as resistance.\n")) } cat("\nDetermining multidrug-resistant organisms (MDRO), according to:\n", - bold("Guideline: "), italic(guideline$name), "\n", - bold("Version: "), guideline$version, "\n", - bold("Author: "), guideline$author, "\n", - bold("Source: "), guideline$source, "\n", + font_bold("Guideline: "), font_italic(guideline$name), "\n", + font_bold("Version: "), guideline$version, "\n", + font_bold("Author: "), guideline$author, "\n", + font_bold("Source: "), guideline$source, "\n", "\n", sep = "") } @@ -460,7 +460,7 @@ mdro <- function(x, cols <- cols[!ab_missing(cols)] cols <- cols[!is.na(cols)] if (length(rows) > 0 & length(cols) > 0) { - x <<- x %>% mutate_at(vars(cols), as.rsi) + x[, cols] <- as.data.frame(lapply(x[, cols, drop = FALSE], function(col) as.rsi(col))) x[rows, "columns_nonsusceptible"] <<- sapply(rows, function(row, group_vct = cols) { cols_nonsus <- sapply(x[row, group_vct, drop = FALSE], @@ -471,13 +471,14 @@ mdro <- function(x, }) if (any_all == "any") { - search_function <- dplyr::any_vars + search_function <- any } else if (any_all == "all") { - search_function <- dplyr::all_vars + search_function <- all } - row_filter <- x %>% - filter_at(vars(cols), search_function(. %in% search_result)) %>% - pull("row_number") + row_filter <- as.logical(by(x, + seq_len(nrow(x)), + function(row) search_function(unlist(row[, cols]) %in% search_result, na.rm = TRUE))) + row_filter <- x[row_filter, "row_number", drop = TRUE] rows <- rows[rows %in% row_filter] x[rows, "MDRO"] <<- to x[rows, "reason"] <<- paste0(any_all, " of the required antibiotics ", ifelse(any_all == "any", "is", "are"), " R") @@ -485,12 +486,12 @@ mdro <- function(x, } trans_tbl2 <- function(txt, rows, lst) { if (info == TRUE) { - message(blue(txt, "..."), appendLF = FALSE) + message(font_blue(txt, "..."), appendLF = FALSE) } if (length(rows) > 0) { # function specific for the CMI paper of 2012 (Magiorakos et al.) lst_vector <- unlist(lst)[!is.na(unlist(lst))] - x <<- x %>% mutate_at(vars(lst_vector), as.rsi) + x[, lst_vector] <- as.data.frame(lapply(x[, lst_vector, drop = FALSE], function(col) as.rsi(col))) x[rows, "classes_in_guideline"] <<- length(lst) x[rows, "classes_available"] <<- sapply(rows, function(row, group_tbl = lst) { @@ -513,28 +514,25 @@ mdro <- function(x, na.rm = TRUE) }) # for PDR; all agents are R (or I if combine_SI = FALSE) - x[filter_at(x[rows, ], - vars(lst_vector), - all_vars(. %in% search_result))$row_number, "classes_affected"] <<- 999 + row_filter <- as.logical(by(x[rows, ], + seq_len(nrow(x[rows, ])), + function(row) all(unlist(row[, lst_vector]) %in% search_result, na.rm = TRUE))) + x[row_filter, "classes_affected"] <<- 999 } if (info == TRUE) { - message(blue(" OK")) + message(font_blue(" OK")) } } - x <- x %>% - mutate_at(vars(col_mo), as.mo) %>% - # join to microorganisms data set - left_join_microorganisms(by = col_mo) %>% - # add unavailable to where genus is available - mutate(MDRO = ifelse(!is.na(genus), 1, NA_integer_), - row_number = seq_len(nrow(.)), - reason = paste0("not covered by ", toupper(guideline$code), " guideline"), - columns_nonsusceptible = "") %>% - # transform to data.frame so subsetting is possible with x[y, z] (might not be the case with tibble/data.table/...) - as.data.frame(stringsAsFactors = FALSE) - + x[, col_mo] <- as.mo(x[, col_mo, drop = TRUE]) + # join to microorganisms data set + x <- left_join_microorganisms(x, by = col_mo) + x$MDRO <- ifelse(!is.na(x$genus), 1, NA_integer_) + x$row_number <- seq_len(nrow(x)) + x$reason <- paste0("not covered by ", toupper(guideline$code), " guideline") + x$columns_nonsusceptible <- "" + if (guideline$code == "cmi2012") { # CMI, 2012 --------------------------------------------------------------- # Non-susceptible = R and I @@ -543,20 +541,20 @@ mdro <- function(x, # take amoxicillin if ampicillin is unavailable if (is.na(AMP) & !is.na(AMX)) { if (verbose == TRUE) { - message(blue("NOTE: Filling ampicillin (AMP) results with amoxicillin (AMX) results")) + message(font_blue("NOTE: Filling ampicillin (AMP) results with amoxicillin (AMX) results")) } AMP <- AMX } # take ceftriaxone if cefotaxime is unavailable and vice versa if (is.na(CRO) & !is.na(CTX)) { if (verbose == TRUE) { - message(blue("NOTE: Filling ceftriaxone (CRO) results with cefotaxime (CTX) results")) + message(font_blue("NOTE: Filling ceftriaxone (CRO) results with cefotaxime (CTX) results")) } CRO <- CTX } if (is.na(CTX) & !is.na(CRO)) { if (verbose == TRUE) { - message(blue("NOTE: Filling cefotaxime (CTX) results with ceftriaxone (CRO) results")) + message(font_blue("NOTE: Filling cefotaxime (CTX) results with ceftriaxone (CRO) results")) } CTX <- CRO } @@ -642,7 +640,7 @@ mdro <- function(x, which(x$genus == "Staphylococcus" & x$species == "aureus"), c(OXA, FOX), "any") - trans_tbl2(paste("Table 1 -", italic("Staphylococcus aureus")), + trans_tbl2(paste("Table 1 -", font_italic("Staphylococcus aureus")), which(x$genus == "Staphylococcus" & x$species == "aureus"), list(GEN, RIF, @@ -661,7 +659,7 @@ mdro <- function(x, FOS, QDA, c(TCY, DOX, MNO))) - trans_tbl2(paste("Table 2 -", italic("Enterococcus"), "spp."), + trans_tbl2(paste("Table 2 -", font_italic("Enterococcus"), "spp."), which(x$genus == "Enterococcus"), list(GEH, STH, @@ -674,7 +672,7 @@ mdro <- function(x, AMP, QDA, c(DOX, MNO))) - trans_tbl2(paste0("Table 3 - ", italic("Enterobacteriaceae")), + trans_tbl2(paste0("Table 3 - ", font_italic("Enterobacteriaceae")), # this new order was previously 'Enterobacteriales' and contained only the family 'Enterobacteriaceae': which(x$order == "Enterobacterales"), list(c(GEN, TOB, AMK, NET), @@ -695,7 +693,7 @@ mdro <- function(x, FOS, COL, c(TCY, DOX, MNO))) - trans_tbl2(paste("Table 4 -", italic("Pseudomonas aeruginosa")), + trans_tbl2(paste("Table 4 -", font_italic("Pseudomonas aeruginosa")), which(x$genus == "Pseudomonas" & x$species == "aeruginosa"), list(c(GEN, TOB, AMK, NET), c(IPM, MEM, DOR), @@ -705,7 +703,7 @@ mdro <- function(x, ATM, FOS, c(COL, PLB))) - trans_tbl2(paste("Table 5 -", italic("Acinetobacter"), "spp."), + trans_tbl2(paste("Table 5 -", font_italic("Acinetobacter"), "spp."), which(x$genus == "Acinetobacter"), list(c(GEN, TOB, AMK, NET), c(IPM, MEM, DOR), @@ -941,70 +939,73 @@ mdro <- function(x, "all") } - prepare_drug <- function(ab) { - # returns vector values of drug - # if `ab` is a column name, looks up the values in `x` - if (length(ab) == 1 & is.character(ab)) { - if (ab %in% colnames(x)) { - ab <- as.data.frame(x)[, ab] - } - } - ab <- as.character(as.rsi(ab)) - ab[is.na(ab)] <- "" - ab - } - drug_is_R <- function(ab) { - # returns logical vector - ab <- prepare_drug(ab) - if (length(ab) == 1) { - rep(ab, NROW(x)) == "R" - } else { - ab == "R" - } - } - drug_is_not_R <- function(ab) { - # returns logical vector - ab <- prepare_drug(ab) - if (length(ab) == 1) { - rep(ab, NROW(x)) != "R" - } else { - ab != "R" - } - } - if (guideline$code == "tb") { # Tuberculosis ------------------------------------------------------------ - x <- x %>% - mutate(mono_count = 0, - mono_count = ifelse(drug_is_R(INH), mono_count + 1, mono_count), - mono_count = ifelse(drug_is_R(RIF), mono_count + 1, mono_count), - mono_count = ifelse(drug_is_R(ETH), mono_count + 1, mono_count), - mono_count = ifelse(drug_is_R(PZA), mono_count + 1, mono_count), - mono_count = ifelse(drug_is_R(RIB), mono_count + 1, mono_count), - mono_count = ifelse(drug_is_R(RFP), mono_count + 1, mono_count), - # from here on logicals - mono = mono_count > 0, - poly = ifelse(mono_count > 1 & drug_is_not_R(RIF) & drug_is_not_R(INH), - TRUE, FALSE), - mdr = ifelse(drug_is_R(RIF) & drug_is_R(INH), - TRUE, FALSE), - xdr = ifelse(drug_is_R(LVX) | drug_is_R(MFX) | drug_is_R(GAT), - TRUE, FALSE), - second = ifelse(drug_is_R(CAP) | drug_is_R(KAN) | drug_is_R(AMK), - TRUE, FALSE), - xdr = ifelse(mdr & xdr & second, TRUE, FALSE)) %>% - mutate(MDRO = case_when(xdr ~ 5, - mdr ~ 4, - poly ~ 3, - mono ~ 2, - TRUE ~ 1), - # keep all real TB, make other species NA - MDRO = ifelse(x$fullname == "Mycobacterium tuberculosis", MDRO, NA_real_)) + prepare_drug <- function(ab) { + # returns vector values of drug + # if `ab` is a column name, looks up the values in `x` + if (length(ab) == 1 & is.character(ab)) { + if (ab %in% colnames(x)) { + ab <- x[, ab, drop = TRUE] + } + } + ab <- as.character(as.rsi(ab)) + ab[is.na(ab)] <- "" + ab + } + drug_is_R <- function(ab) { + # returns logical vector + ab <- prepare_drug(ab) + if (length(ab) == 0) { + rep(FALSE, NROW(x)) + } else if (length(ab) == 1) { + rep(ab, NROW(x)) == "R" + } else { + ab == "R" + } + } + drug_is_not_R <- function(ab) { + # returns logical vector + ab <- prepare_drug(ab) + if (length(ab) == 0) { + rep(TRUE, NROW(x)) + } else if (length(ab) == 1) { + rep(ab, NROW(x)) != "R" + } else { + ab != "R" + } + } + + x$mono_count <- 0 + x[drug_is_R(INH), "mono_count"] <- x[drug_is_R(INH), "mono_count"] + 1 + x[drug_is_R(RIF), "mono_count"] <- x[drug_is_R(RIF), "mono_count"] + 1 + x[drug_is_R(ETH), "mono_count"] <- x[drug_is_R(ETH), "mono_count"] + 1 + x[drug_is_R(PZA), "mono_count"] <- x[drug_is_R(PZA), "mono_count"] + 1 + x[drug_is_R(RIB), "mono_count"] <- x[drug_is_R(RIB), "mono_count"] + 1 + x[drug_is_R(RFP), "mono_count"] <- x[drug_is_R(RFP), "mono_count"] + 1 + + x$mono <- x$mono_count > 0 + x$poly <- x$mono_count > 1 & drug_is_not_R(RIF) & drug_is_not_R(INH) + x$mdr <- drug_is_R(RIF) & drug_is_R(INH) + x$xdr <- drug_is_R(LVX) | drug_is_R(MFX) | drug_is_R(GAT) + x$second <- drug_is_R(CAP) | drug_is_R(KAN) | drug_is_R(AMK) + x$xdr <- x$mdr & x$xdr & x$second + x$MDRO <- ifelse(x$xdr, 5, + ifelse(x$mdr, 4, + ifelse(x$poly, 3, + ifelse(x$mono, 2, + 1)))) + # keep all real TB, make other species NA + x$MDRO <- ifelse(x$fullname == "Mycobacterium tuberculosis", x$MDRO, NA_real_) } if (info == TRUE) { - cat(bold(paste0("=> Found ", sum(x$MDRO %in% c(2:5), na.rm = TRUE), " MDROs out of ", sum(!is.na(x$MDRO)), - " tested isolates (", percentage(sum(x$MDRO %in% c(2:5), na.rm = TRUE) / sum(!is.na(x$MDRO))), ")\n"))) + if (sum(!is.na(x$MDRO) == 0)) { + cat(font_bold(paste0("=> Found 0 MDROs since no isolates are covered by the guideline"))) + } else { + cat(font_bold(paste0("=> Found ", sum(x$MDRO %in% c(2:5), na.rm = TRUE), " MDROs out of ", sum(!is.na(x$MDRO)), + " isolates (", trimws(percentage(sum(x$MDRO %in% c(2:5), na.rm = TRUE) / sum(!is.na(x$MDRO)))), ")\n"))) + } } # some more info on negative results diff --git a/R/mic.R b/R/mic.R index df325aae2..79aace178 100755 --- a/R/mic.R +++ b/R/mic.R @@ -30,7 +30,6 @@ #' @return Ordered [`factor`] with new class [`mic`] #' @aliases mic #' @export -#' @importFrom dplyr %>% #' @seealso [as.rsi()] #' @inheritSection AMR Read more on our website! #' @examples @@ -52,7 +51,6 @@ #' #' plot(mic_data) #' barplot(mic_data) -#' freq(mic_data) as.mic <- function(x, na.rm = FALSE) { if (is.mic(x)) { x @@ -138,7 +136,6 @@ all_valid_mics <- function(x) { #' @rdname as.mic #' @export -#' @importFrom dplyr %>% is.mic <- function(x) { inherits(x, "mic") } @@ -175,7 +172,6 @@ droplevels.mic <- function(x, exclude = ifelse(anyNA(levels(x)), NULL, NA), ...) #' @exportMethod print.mic #' @export -#' @importFrom dplyr %>% tibble group_by summarise pull #' @noRd print.mic <- function(x, ...) { cat("Class 'mic'\n") @@ -184,7 +180,6 @@ print.mic <- function(x, ...) { #' @exportMethod summary.mic #' @export -#' @importFrom dplyr %>% #' @noRd summary.mic <- function(object, ...) { x <- object @@ -241,7 +236,7 @@ barplot.mic <- function(height, #' @export pillar_shaft.mic <- function(x, ...) { out <- trimws(format(x)) - out[is.na(x)] <- pillar::style_na(NA) + out[is.na(x)] <- font_red(NA) pillar::new_pillar_shaft_simple(out, align = "right", min_width = 4) } diff --git a/R/mo.R b/R/mo.R index 5ee7bc054..e1dcb5839 100755 --- a/R/mo.R +++ b/R/mo.R @@ -31,7 +31,7 @@ #' #' This excludes *Enterococci* at default (who are in group D), use `Lancefield = "all"` to also categorise all *Enterococci* as group D. #' @param allow_uncertain a number between `0` (or `"none"`) and `3` (or `"all"`), or `TRUE` (= `2`) or `FALSE` (= `0`) to indicate whether the input should be checked for less probable results, please see *Details* -#' @param reference_df a [`data.frame`] to use for extra reference when translating `x` to a valid [`mo`]. See [set_mo_source()] and [get_mo_source()] to automate the usage of your own codes (e.g. used in your analysis or organisation). +#' @param reference_df a [`data.frame`] to be used for extra reference when translating `x` to a valid [`mo`]. See [set_mo_source()] and [get_mo_source()] to automate the usage of your own codes (e.g. used in your analysis or organisation). #' @param ... other parameters passed on to functions #' @rdname as.mo #' @aliases mo @@ -111,7 +111,6 @@ #' #' The [mo_property()] functions (like [mo_genus()], [mo_gramstain()]) to get properties based on the returned code. #' @inheritSection AMR Read more on our website! -#' @importFrom dplyr %>% pull left_join #' @examples #' \donttest{ #' # These examples all return "B_STPHY_AURS", the ID of S. aureus: @@ -187,7 +186,7 @@ as.mo <- function(x, x[trimws2(x) %like% "^(no|not|kein|geen|niet|non|sem) [a-z]+"] <- "UNKNOWN" uncertainty_level <- translate_allow_uncertain(allow_uncertain) - + if (mo_source_isvalid(reference_df) & isFALSE(Becker) & isFALSE(Lancefield) @@ -214,7 +213,7 @@ as.mo <- function(x, pull("mo") ) - } else if (all(x %in% microorganismsDT$mo) + } else if (all(x %in% MO_lookup$mo) & isFALSE(Becker) & isFALSE(Lancefield)) { y <- x @@ -240,10 +239,6 @@ is.mo <- function(x) { inherits(x, "mo") } -#' @importFrom dplyr %>% pull left_join n_distinct filter distinct -#' @importFrom data.table data.table as.data.table setkey -#' @importFrom crayon magenta red blue silver italic -#' @importFrom cleaner percentage # param property a column name of microorganisms # param initial_search logical - is FALSE when coming from uncertain tries, which uses exec_as.mo internally too # param dyslexia_mode logical - also check for characters that resemble others @@ -258,26 +253,55 @@ exec_as.mo <- function(x, initial_search = TRUE, dyslexia_mode = FALSE, debug = FALSE, - reference_data_to_use = microorganismsDT) { + reference_data_to_use = MO_lookup) { check_dataset_integrity() + lookup <- function(needle, column = property, haystack = reference_data_to_use, n = 1, debug_mode = debug) { + # `column` can be NULL for all columns, or a selection + # returns a character (vector) - if `column` > length 1 then with columns as names + if (isTRUE(debug_mode)) { + cat(font_silver("looking up: ", substitute(needle), "\n", collapse = "")) + } + if (length(column) == 1) { + res <- haystack[which(eval(substitute(needle), envir = haystack, enclos = parent.frame())), column, drop = TRUE] + res <- as.character(res) + if (length(res) == 0) { + NA_character_ + } else { + res[seq_len(min(n, length(res)))] + } + } else { + if (is.null(column)) { + column <- names(haystack) + } + res <- haystack[which(eval(substitute(needle), envir = haystack, enclos = parent.frame())), , drop = FALSE] + res <- res[seq_len(min(n, nrow(res))), column, drop = TRUE] + if (NROW(res) == 0) { + res <- rep(NA_character_, length(column)) + } + res <- as.character(res) + names(res) <- column + res + } + } + # start off with replaced language-specific non-ASCII characters with ASCII characters x <- parse_and_convert(x) - + # WHONET: xxx = no growth x[tolower(as.character(paste0(x, ""))) %in% c("", "xxx", "na", "nan")] <- NA_character_ # Laboratory systems: remove entries like "no growth" etc x[trimws2(x) %like% "(no .*growth|keine? .*wachtstum|geen .*groei|no .*crecimientonon|sem .*crescimento|pas .*croissance)"] <- NA_character_ x[trimws2(x) %like% "^(no|not|kein|geen|niet|non|sem) [a-z]+"] <- "UNKNOWN" - + if (initial_search == TRUE) { options(mo_failures = NULL) options(mo_uncertainties = NULL) options(mo_renamed = NULL) } options(mo_renamed_last_run = NULL) - + uncertainties <- data.frame(uncertainty = integer(0), input = character(0), fullname = character(0), @@ -362,47 +386,41 @@ exec_as.mo <- function(x, ) } else if (all(x %in% reference_data_to_use$mo)) { - # existing mo codes when not looking for property "mo", like mo_genus("B_ESCHR_COL") - y <- reference_data_to_use[prevalence == 1][data.table(mo = x), - on = "mo", - ..property][[1]] - if (any(is.na(y))) { - y[is.na(y)] <- reference_data_to_use[prevalence == 2][data.table(mo = x[is.na(y)]), - on = "mo", - ..property][[1]] - } - if (any(is.na(y))) { - y[is.na(y)] <- reference_data_to_use[prevalence == 3][data.table(mo = x[is.na(y)]), - on = "mo", - ..property][[1]] - } - x <- y + x <- data.frame(mo = x, stringsAsFactors = FALSE) %>% + left_join_microorganisms(by = "mo") %>% + pull(property) } else if (all(tolower(x) %in% reference_data_to_use$fullname_lower)) { # we need special treatment for very prevalent full names, they are likely! # e.g. as.mo("Staphylococcus aureus") - x <- reference_data_to_use[data.table(fullname_lower = tolower(x)), - on = "fullname_lower", - ..property][[1]] + x <- data.frame(fullname_lower = tolower(x), stringsAsFactors = FALSE) %>% + left_join_MO_lookup(by = "fullname_lower") %>% + pull(property) + # x <- reference_data_to_use[data.table(fullname_lower = tolower(x)), + # on = "fullname_lower", + # ..property][[1]] } else if (all(toupper(x) %in% microorganisms.codes$code)) { # commonly used MO codes - y <- as.data.table(microorganisms.codes)[data.table(code = toupper(x)), - on = "code", ] - - x <- reference_data_to_use[data.table(mo = y[["mo"]]), - on = "mo", - ..property][[1]] + x <- data.frame(code = toupper(x), stringsAsFactors = FALSE) %>% + left_join(microorganisms.codes, by = "code") %>% + left_join_MO_lookup(by = "mo") %>% + pull(property) + # y <- as.data.table(microorganisms.codes)[data.table(code = toupper(x)), + # on = "code", ] + # + # x <- reference_data_to_use[data.table(mo = y[["mo"]]), + # on = "mo", + # ..property][[1]] } else if (all(x %in% microorganisms.translation$mo_old)) { # is an old mo code, used in previous versions of this package old_mo_warning <- TRUE - y <- as.data.table(microorganisms.translation)[data.table(mo_old = x), - on = "mo_old", "mo_new"][[1]] - y <- reference_data_to_use[data.table(mo = y), - on = "mo", - ..property][[1]] - x <- y + x <- data.frame(mo_old = toupper(x), stringsAsFactors = FALSE) %>% + left_join(microorganisms.translation, by = "mo_old") %>% + rename(mo = mo_new) %>% + left_join_MO_lookup(by = "mo") %>% + pull(property) } else if (!all(x %in% microorganisms[, property])) { @@ -433,7 +451,7 @@ exec_as.mo <- function(x, x <- gsub("(spp.?|subsp.?|subspecies|biovar|serovar|species)", "", x) x <- gsub("^([a-z]{2,4})(spe.?)$", "\\1", x) # when ending in SPE instead of SPP and preceded by 2-4 characters x <- strip_whitespace(x, dyslexia_mode) - + x_backup_without_spp <- x x_species <- paste(x, "species") # translate to English for supported languages of mo_property @@ -515,85 +533,62 @@ exec_as.mo <- function(x, x_withspaces_start_end <- paste0("^", x_withspaces, "$") if (isTRUE(debug)) { - cat(paste0(blue("x"), ' "', x, '"\n')) - cat(paste0(blue("x_species"), ' "', x_species, '"\n')) - cat(paste0(blue("x_withspaces_start_only"), ' "', x_withspaces_start_only, '"\n')) - cat(paste0(blue("x_withspaces_end_only"), ' "', x_withspaces_end_only, '"\n')) - cat(paste0(blue("x_withspaces_start_end"), ' "', x_withspaces_start_end, '"\n')) - cat(paste0(blue("x_backup"), ' "', x_backup, '"\n')) - cat(paste0(blue("x_backup_without_spp"), ' "', x_backup_without_spp, '"\n')) - cat(paste0(blue("x_trimmed"), ' "', x_trimmed, '"\n')) - cat(paste0(blue("x_trimmed_species"), ' "', x_trimmed_species, '"\n')) - cat(paste0(blue("x_trimmed_without_group"), ' "', x_trimmed_without_group, '"\n')) + cat(paste0(font_blue("x"), ' "', x, '"\n')) + cat(paste0(font_blue("x_species"), ' "', x_species, '"\n')) + cat(paste0(font_blue("x_withspaces_start_only"), ' "', x_withspaces_start_only, '"\n')) + cat(paste0(font_blue("x_withspaces_end_only"), ' "', x_withspaces_end_only, '"\n')) + cat(paste0(font_blue("x_withspaces_start_end"), ' "', x_withspaces_start_end, '"\n')) + cat(paste0(font_blue("x_backup"), ' "', x_backup, '"\n')) + cat(paste0(font_blue("x_backup_without_spp"), ' "', x_backup_without_spp, '"\n')) + cat(paste0(font_blue("x_trimmed"), ' "', x_trimmed, '"\n')) + cat(paste0(font_blue("x_trimmed_species"), ' "', x_trimmed_species, '"\n')) + cat(paste0(font_blue("x_trimmed_without_group"), ' "', x_trimmed_without_group, '"\n')) } if (initial_search == TRUE) { - progress <- progress_estimated(n = length(x), min_time = 3) - - # before we start, omit the ones that are obvious - MO codes and full names - skip_vect <- rep(FALSE, length(x)) - skip_vect[toupper(x_backup) %in% reference_data_to_use$mo] <- TRUE - skip_vect[tolower(x_backup) %in% reference_data_to_use$fullname_lower] <- TRUE - x[toupper(x_backup) %in% reference_data_to_use$mo] <- reference_data_to_use[data.table(mo = toupper(x_backup[toupper(x_backup) %in% reference_data_to_use$mo])), - on = "mo", - ..property][[1]] - x[tolower(x_backup) %in% reference_data_to_use$fullname_lower] <- reference_data_to_use[data.table(fullname_lower = tolower(x_backup[tolower(x_backup) %in% reference_data_to_use$fullname_lower])), - on = "fullname_lower", - ..property][[1]] + progress <- progress_estimated(n = length(x), n_min = 25) # start if n >= 25 + on.exit(close(progress)) } - + for (i in seq_len(length(x))) { if (initial_search == TRUE) { - progress$tick()$print() - if (isTRUE(skip_vect[i])) { - next - } + progress$tick() } - if (x_backup[i] %like_case% "\\(unknown [a-z]+\\)") { - x[i] <- "UNKNOWN" + if (x_backup[i] %like_case% "\\(unknown [a-z]+\\)" | tolower(x_backup_without_spp[i]) %in% c("other", "none", "unknown")) { + # empty and nonsense values, ignore without warning + x[i] <- lookup(mo == "UNKNOWN") next } - found <- reference_data_to_use[mo == toupper(x_backup[i]), - ..property][[1]] - # is a valid MO code - if (length(found) > 0) { + # valid MO code --- + found <- lookup(mo == toupper(x_backup[i])) + if (!is.na(found)) { x[i] <- found[1L] next } + # old mo code, used in previous versions of this package ---- if (x_backup[i] %in% microorganisms.translation$mo_old) { - # is an old mo code, used in previous versions of this package old_mo_warning <- TRUE - found <- reference_data_to_use[mo == microorganisms.translation[which(microorganisms.translation$mo_old == x_backup[i]), "mo_new"], - ..property][[1]] - if (length(found) > 0) { + found <- lookup(mo_old == toupper(x_backup[i]), column = "mo_new", haystack = microorganisms.translation) + found <- lookup(mo == found) + if (!is.na(found)) { + # get property x[i] <- found[1L] next } } - if (toupper(x_backup_untouched[i]) %in% microorganisms.codes$code) { - # is a WHONET code, like "HA-" - found <- microorganismsDT[mo == microorganisms.codes[which(microorganisms.codes$code == toupper(x_backup_untouched[i])), "mo"][1L], - ..property][[1]] - if (length(found) > 0) { - x[i] <- found[1L] - next - } - } - - found <- reference_data_to_use[fullname_lower %in% tolower(c(x_backup[i], x_backup_without_spp[i])), - ..property][[1]] + found <- lookup(fullname_lower %in% tolower(c(x_backup[i], x_backup_without_spp[i]))) # most probable: is exact match in fullname - if (length(found) > 0) { + if (!is.na(found)) { x[i] <- found[1L] next } - # exact SNOMED code + # exact SNOMED code ---- if (x_backup[i] %like% "^[0-9]+$") { snomed_found <- unlist(lapply(reference_data_to_use$snomed, function(s) if (x_backup[i] %in% s) { @@ -601,74 +596,60 @@ exec_as.mo <- function(x, } else { FALSE })) - found <- reference_data_to_use[snomed_found == TRUE, - ..property][[1]] - if (length(found) > 0) { - x[i] <- found[1L] - next - } - } - - # very probable: is G. species - found <- reference_data_to_use[g_species %in% gsub("[^a-z0-9/ \\-]+", "", - tolower(c(x_backup[i], x_backup_without_spp[i]))), - ..property][[1]] - if (length(found) > 0) { - x[i] <- found[1L] - next - } - - found <- reference_data_to_use[col_id == x_backup[i], - ..property][[1]] - # is a valid Catalogue of Life ID - if (NROW(found) > 0) { - x[i] <- found[1L] - next - } - - # WHONET and other common LIS codes - if (any(toupper(c(x_backup[i], x_backup_without_spp[i])) %in% microorganisms.codes$code)) { - mo_found <- microorganisms.codes[which(microorganisms.codes$code %in% toupper(c(x_backup[i], x_backup_without_spp[i]))), "mo"][1L] - if (length(mo_found) > 0) { - x[i] <- microorganismsDT[mo == mo_found, - ..property][[1]][1L] - next - } - } - - if (!is.null(reference_df)) { - # self-defined reference - if (x_backup[i] %in% reference_df[, 1]) { - ref_mo <- reference_df[reference_df[, 1] == x_backup[i], "mo"][[1L]] - if (ref_mo %in% microorganismsDT[, mo]) { - x[i] <- microorganismsDT[mo == ref_mo, - ..property][[1]][1L] + if (sum(snomed_found, na.rm = TRUE) > 0) { + found <- reference_data_to_use[snomed_found == TRUE, property][[1]] + if (!is.na(found)) { + x[i] <- found[1L] next - } else { - warning("Value '", x_backup[i], "' was found in reference_df, but '", ref_mo, "' is not a valid MO code.", call. = FALSE) } } } + # very probable: is G. species ---- + found <- lookup(g_species %in% gsub("[^a-z0-9/ \\-]+", "", + tolower(c(x_backup[i], x_backup_without_spp[i])))) + if (!is.na(found)) { + x[i] <- found[1L] + next + } + + # valid Catalogue of Life ID --- + found <- lookup(col_id == x_backup[i]) + if (!is.na(found)) { + x[i] <- found[1L] + next + } + + # WHONET and other common LIS codes ---- + found <- lookup(code %in% toupper(c(x_backup_untouched[i], x_backup[i], x_backup_without_spp[i])), + column = "mo", + haystack = microorganisms.codes) + if (!is.na(found)) { + x[i] <- lookup(mo == found) + next + } + + # user-defined reference ---- + if (!is.null(reference_df)) { + if (x_backup[i] %in% reference_df[, 1]) { + # already checked integrity of reference_df, all MOs are valid + ref_mo <- reference_df[reference_df[, 1] == x_backup[i], "mo"][[1L]] + x[i] <- lookup(mo == ref_mo) + next + } + } + # WHONET: xxx = no growth if (tolower(as.character(paste0(x_backup_without_spp[i], ""))) %in% c("", "xxx", "na", "nan")) { x[i] <- NA_character_ next } - if (tolower(x_backup_without_spp[i]) %in% c("other", "none", "unknown")) { - # empty and nonsense values, ignore without warning - x[i] <- microorganismsDT[mo == "UNKNOWN", - ..property][[1]] - next - } - # check for very small input, but ignore the O antigens of E. coli if (nchar(gsub("[^a-zA-Z]", "", x_trimmed[i])) < 3 - & !x_backup_without_spp[i] %like_case% "[Oo]?(26|103|104|104|111|121|145|157)") { + & !toupper(x_backup_without_spp[i]) %like_case% "O?(26|103|104|104|111|121|145|157)") { # fewer than 3 chars and not looked for species, add as failure - x[i] <- microorganismsDT[mo == "UNKNOWN", - ..property][[1]] + x[i] <- lookup(mo == "UNKNOWN") if (initial_search == TRUE) { failures <- c(failures, x_backup[i]) } @@ -682,187 +663,169 @@ exec_as.mo <- function(x, } # translate known trivial abbreviations to genus + species ---- - if (!is.na(x_trimmed[i])) { - if (toupper(x_backup_without_spp[i]) %in% c("MRSA", "MSSA", "VISA", "VRSA") - | x_backup_without_spp[i] %like_case% " (mrsa|mssa|visa|vrsa) ") { - x[i] <- microorganismsDT[mo == "B_STPHY_AURS", - ..property][[1]][1L] + if (toupper(x_backup_without_spp[i]) %in% c("MRSA", "MSSA", "VISA", "VRSA") + | x_backup_without_spp[i] %like_case% " (mrsa|mssa|visa|vrsa) ") { + x[i] <- lookup(fullname == "Staphylococcus aureus") + next + } + if (toupper(x_backup_without_spp[i]) %in% c("MRSE", "MSSE") + | x_backup_without_spp[i] %like_case% " (mrse|msse) ") { + x[i] <- lookup(fullname == "Staphylococcus epidermidis") + next + } + if (toupper(x_backup_without_spp[i]) == "VRE" + | x_backup_without_spp[i] %like_case% " vre " + | x_backup_without_spp[i] %like_case% "(enterococci|enterokok|enterococo)[a-z]*?$") { + x[i] <- lookup(genus == "Enterococcus") + next + } + # support for: + # - AIEC (Adherent-Invasive E. coli) + # - ATEC (Atypical Entero-pathogenic E. coli) + # - DAEC (Diffusely Adhering E. coli) + # - EAEC (Entero-Aggresive E. coli) + # - EHEC (Entero-Haemorrhagic E. coli) + # - EIEC (Entero-Invasive E. coli) + # - EPEC (Entero-Pathogenic E. coli) + # - ETEC (Entero-Toxigenic E. coli) + # - NMEC (Neonatal Meningitis‐causing E. coli) + # - STEC (Shiga-toxin producing E. coli) + # - UPEC (Uropathogenic E. coli) + if (toupper(x_backup_without_spp[i]) %in% c("AIEC", "ATEC", "DAEC", "EAEC", "EHEC", "EIEC", "EPEC", "ETEC", "NMEC", "STEC", "UPEC") + # also support O-antigens of E. coli: O26, O103, O104, O111, O121, O145, O157 + | x_backup_without_spp[i] %like_case% "o?(26|103|104|111|121|145|157)") { + x[i] <- lookup(fullname == "Escherichia coli") + next + } + if (toupper(x_backup_without_spp[i]) == "MRPA" + | x_backup_without_spp[i] %like_case% " mrpa ") { + # multi resistant P. aeruginosa + x[i] <- lookup(fullname == "Pseudomonas aeruginosa") + next + } + if (toupper(x_backup_without_spp[i]) == "CRSM") { + # co-trim resistant S. maltophilia + x[i] <- lookup(fullname == "Stenotrophomonas maltophilia") + next + } + if (toupper(x_backup_without_spp[i]) %in% c("PISP", "PRSP", "VISP", "VRSP") + | x_backup_without_spp[i] %like_case% " (pisp|prsp|visp|vrsp) ") { + # peni I, peni R, vanco I, vanco R: S. pneumoniae + x[i] <- lookup(fullname == "Streptococcus pneumoniae") + next + } + if (x_backup_without_spp[i] %like_case% "^g[abcdfghk]s$") { + # Streptococci, like GBS = Group B Streptococci (B_STRPT_GRPB) + x[i] <- lookup(mo == toupper(gsub("g([abcdfghk])s", + "B_STRPT_GRP\\1", + x_backup_without_spp[i]))) + next + } + if (x_backup_without_spp[i] %like_case% "(streptococ|streptokok).* [abcdfghk]$") { + # Streptococci in different languages, like "estreptococos grupo B" + x[i] <- lookup(mo == toupper(gsub(".*(streptococ|streptokok|estreptococ).* ([abcdfghk])$", + "B_STRPT_GRP\\2", + x_backup_without_spp[i]))) + next + } + if (x_backup_without_spp[i] %like_case% "group [abcdfghk] (streptococ|streptokok|estreptococ)") { + # Streptococci in different languages, like "Group A Streptococci" + x[i] <- lookup(mo == toupper(gsub(".*group ([abcdfghk]) (streptococ|streptokok|estreptococ).*", + "B_STRPT_GRP\\1", + x_backup_without_spp[i]))) + next + } + if (x_backup_without_spp[i] %like_case% "haemoly.*strept") { + # Haemolytic streptococci in different languages + x[i] <- lookup(mo == "B_STRPT_HAEM") + next + } + # CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese) ---- + if (x_backup_without_spp[i] %like_case% "[ck]oagulas[ea] negatie?[vf]" + | x_trimmed[i] %like_case% "[ck]oagulas[ea] negatie?[vf]" + | x_backup_without_spp[i] %like_case% "[ck]o?ns[^a-z]?$") { + # coerce S. coagulase negative + x[i] <- lookup(mo == "B_STPHY_CONS") + next + } + if (x_backup_without_spp[i] %like_case% "[ck]oagulas[ea] positie?[vf]" + | x_trimmed[i] %like_case% "[ck]oagulas[ea] positie?[vf]" + | x_backup_without_spp[i] %like_case% "[ck]o?ps[^a-z]?$") { + # coerce S. coagulase positive + x[i] <- lookup(mo == "B_STPHY_COPS") + next + } + # streptococcal groups: milleri and viridans + if (x_trimmed[i] %like_case% "strepto.* mil+er+i" + | x_backup_without_spp[i] %like_case% "strepto.* mil+er+i" + | x_backup_without_spp[i] %like_case% "mgs[^a-z]?$") { + # Milleri Group Streptococcus (MGS) + x[i] <- lookup(mo == "B_STRPT_MILL") + next + } + if (x_trimmed[i] %like_case% "strepto.* viridans" + | x_backup_without_spp[i] %like_case% "strepto.* viridans" + | x_backup_without_spp[i] %like_case% "vgs[^a-z]?$") { + # Viridans Group Streptococcus (VGS) + x[i] <- lookup(mo == "B_STRPT_VIRI") + next + } + if (x_backup_without_spp[i] %like_case% "gram[ -]?neg.*" + | x_backup_without_spp[i] %like_case% "negatie?[vf]" + | x_trimmed[i] %like_case% "gram[ -]?neg.*") { + # coerce Gram negatives + x[i] <- lookup(mo == "B_GRAMN") + next + } + if (x_backup_without_spp[i] %like_case% "gram[ -]?pos.*" + | x_backup_without_spp[i] %like_case% "positie?[vf]" + | x_trimmed[i] %like_case% "gram[ -]?pos.*") { + # coerce Gram positives + x[i] <- lookup(mo == "B_GRAMP") + next + } + if (x_backup_without_spp[i] %like_case% "mycoba[ck]teri.[nm]?$") { + # coerce mycobacteria in multiple languages + x[i] <- lookup(genus == "Mycobacterium") + next + } + + if (x_backup_without_spp[i] %like_case% "salmonella [a-z]+ ?.*") { + if (x_backup_without_spp[i] %like_case% "salmonella group") { + # Salmonella Group A to Z, just return S. species for now + x[i] <- lookup(genus == "Salmonella") next - } - if (toupper(x_backup_without_spp[i]) %in% c("MRSE", "MSSE") - | x_backup_without_spp[i] %like_case% " (mrse|msse) ") { - x[i] <- microorganismsDT[mo == "B_STPHY_EPDR", - ..property][[1]][1L] - next - } - if (toupper(x_backup_without_spp[i]) == "VRE" - | x_backup_without_spp[i] %like_case% " vre " - | x_backup_without_spp[i] %like_case% "(enterococci|enterokok|enterococo)[a-z]*?$") { - x[i] <- microorganismsDT[mo == "B_ENTRC", - ..property][[1]][1L] - next - } - # support for: - # - AIEC (Adherent-Invasive E. coli) - # - ATEC (Atypical Entero-pathogenic E. coli) - # - DAEC (Diffusely Adhering E. coli) - # - EAEC (Entero-Aggresive E. coli) - # - EHEC (Entero-Haemorrhagic E. coli) - # - EIEC (Entero-Invasive E. coli) - # - EPEC (Entero-Pathogenic E. coli) - # - ETEC (Entero-Toxigenic E. coli) - # - NMEC (Neonatal Meningitis‐causing E. coli) - # - STEC (Shiga-toxin producing E. coli) - # - UPEC (Uropathogenic E. coli) - if (toupper(x_backup_without_spp[i]) %in% c("AIEC", "ATEC", "DAEC", "EAEC", "EHEC", "EIEC", "EPEC", "ETEC", "NMEC", "STEC", "UPEC") - # also support O-antigens of E. coli: O26, O103, O104, O111, O121, O145, O157 - | x_backup_without_spp[i] %like_case% "o?(26|103|104|111|121|145|157)") { - x[i] <- microorganismsDT[mo == "B_ESCHR_COLI", - ..property][[1]][1L] - next - } - if (toupper(x_backup_without_spp[i]) == "MRPA" - | x_backup_without_spp[i] %like_case% " mrpa ") { - # multi resistant P. aeruginosa - x[i] <- microorganismsDT[mo == "B_PSDMN_ARGN", - ..property][[1]][1L] - next - } - if (toupper(x_backup_without_spp[i]) == "CRSM") { - # co-trim resistant S. maltophilia - x[i] <- microorganismsDT[mo == "B_STNTR_MLTP", - ..property][[1]][1L] - next - } - if (toupper(x_backup_without_spp[i]) %in% c("PISP", "PRSP", "VISP", "VRSP") - | x_backup_without_spp[i] %like_case% " (pisp|prsp|visp|vrsp) ") { - # peni I, peni R, vanco I, vanco R: S. pneumoniae - x[i] <- microorganismsDT[mo == "B_STRPT_PNMN", - ..property][[1]][1L] - next - } - if (x_backup_without_spp[i] %like_case% "^g[abcdfghk]s$") { - # Streptococci, like GBS = Group B Streptococci (B_STRPT_GRPB) - x[i] <- microorganismsDT[mo == toupper(gsub("g([abcdfghk])s", "B_STRPT_GRP\\1", x_backup_without_spp[i])), - ..property][[1]][1L] - next - } - if (x_backup_without_spp[i] %like_case% "(streptococ|streptokok).* [abcdfghk]$") { - # Streptococci in different languages, like "estreptococos grupo B" - x[i] <- microorganismsDT[mo == toupper(gsub(".*(streptococ|streptokok|estreptococ).* ([abcdfghk])$", "B_STRPT_GRP\\2", x_backup_without_spp[i])), - ..property][[1]][1L] - next - } - if (x_backup_without_spp[i] %like_case% "group [abcdfghk] (streptococ|streptokok|estreptococ)") { - # Streptococci in different languages, like "Group A Streptococci" - x[i] <- microorganismsDT[mo == toupper(gsub(".*group ([abcdfghk]) (streptococ|streptokok|estreptococ).*", "B_STRPT_GRP\\1", x_backup_without_spp[i])), - ..property][[1]][1L] - next - } - if (x_backup_without_spp[i] %like_case% "haemoly.*strept") { - # Haemolytic streptococci in different languages - x[i] <- microorganismsDT[mo == "B_STRPT_HAEM", - ..property][[1]][1L] - next - } - # CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese) ---- - if (x_backup_without_spp[i] %like_case% "[ck]oagulas[ea] negatie?[vf]" - | x_trimmed[i] %like_case% "[ck]oagulas[ea] negatie?[vf]" - | x_backup_without_spp[i] %like_case% "[ck]o?ns[^a-z]?$") { - # coerce S. coagulase negative - x[i] <- microorganismsDT[mo == "B_STPHY_CONS", - ..property][[1]][1L] - next - } - if (x_backup_without_spp[i] %like_case% "[ck]oagulas[ea] positie?[vf]" - | x_trimmed[i] %like_case% "[ck]oagulas[ea] positie?[vf]" - | x_backup_without_spp[i] %like_case% "[ck]o?ps[^a-z]?$") { - # coerce S. coagulase positive - x[i] <- microorganismsDT[mo == "B_STPHY_COPS", - ..property][[1]][1L] - next - } - # streptococcal groups: milleri and viridans - if (x_trimmed[i] %like_case% "strepto.* mil+er+i" - | x_backup_without_spp[i] %like_case% "strepto.* mil+er+i" - | x_backup_without_spp[i] %like_case% "mgs[^a-z]?$") { - # Milleri Group Streptococcus (MGS) - x[i] <- microorganismsDT[mo == "B_STRPT_MILL", - ..property][[1]][1L] - next - } - if (x_trimmed[i] %like_case% "strepto.* viridans" - | x_backup_without_spp[i] %like_case% "strepto.* viridans" - | x_backup_without_spp[i] %like_case% "vgs[^a-z]?$") { - # Viridans Group Streptococcus (VGS) - x[i] <- microorganismsDT[mo == "B_STRPT_VIRI", - ..property][[1]][1L] - next - } - if (x_backup_without_spp[i] %like_case% "gram[ -]?neg.*" - | x_backup_without_spp[i] %like_case% "negatie?[vf]" - | x_trimmed[i] %like_case% "gram[ -]?neg.*") { - # coerce Gram negatives - x[i] <- microorganismsDT[mo == "B_GRAMN", - ..property][[1]][1L] - next - } - if (x_backup_without_spp[i] %like_case% "gram[ -]?pos.*" - | x_backup_without_spp[i] %like_case% "positie?[vf]" - | x_trimmed[i] %like_case% "gram[ -]?pos.*") { - # coerce Gram positives - x[i] <- microorganismsDT[mo == "B_GRAMP", - ..property][[1]][1L] - next - } - if (x_backup_without_spp[i] %like_case% "mycoba[ck]teri.[nm]?$") { - # coerce Gram positives - x[i] <- microorganismsDT[mo == "B_MYCBC", - ..property][[1]][1L] - next - } - - if (x_backup_without_spp[i] %like_case% "salmonella [a-z]+ ?.*") { - if (x_backup_without_spp[i] %like_case% "salmonella group") { - # Salmonella Group A to Z, just return S. species for now - x[i] <- microorganismsDT[mo == "B_SLMNL", - ..property][[1]][1L] - next - } else if (grepl("[sS]almonella [A-Z][a-z]+ ?.*", x_backup[i], ignore.case = FALSE) & - !x_backup[i] %like% "t[iy](ph|f)[iy]") { - # Salmonella with capital letter species like "Salmonella Goettingen" - they're all S. enterica - # except for S. typhi, S. paratyphi, S. typhimurium - x[i] <- microorganismsDT[mo == "B_SLMNL_ENTR", - ..property][[1]][1L] - uncertainties <- rbind(uncertainties, - format_uncertainty_as_df(uncertainty_level = 1, - input = x_backup[i], - result_mo = "B_SLMNL_ENTR")) - next - } - } - - # trivial names known to the field: - if ("meningococcus" %like_case% x_trimmed[i]) { - # coerce Neisseria meningitidis - x[i] <- microorganismsDT[mo == "B_NESSR_MNNG", - ..property][[1]][1L] - next - } - if ("gonococcus" %like_case% x_trimmed[i]) { - # coerce Neisseria gonorrhoeae - x[i] <- microorganismsDT[mo == "B_NESSR_GNRR", - ..property][[1]][1L] - next - } - if ("pneumococcus" %like_case% x_trimmed[i]) { - # coerce Streptococcus penumoniae - x[i] <- microorganismsDT[mo == "B_STRPT_PNMN", - ..property][[1]][1L] + } else if (grepl("[sS]almonella [A-Z][a-z]+ ?.*", x_backup[i], ignore.case = FALSE) & + !x_backup[i] %like% "t[iy](ph|f)[iy]") { + # Salmonella with capital letter species like "Salmonella Goettingen" - they're all S. enterica + # except for S. typhi, S. paratyphi, S. typhimurium + x[i] <- lookup(fullname == "Salmonella enterica") + uncertainties <- rbind(uncertainties, + format_uncertainty_as_df(uncertainty_level = 1, + input = x_backup[i], + result_mo = lookup(fullname == "Salmonella enterica", "mo"))) next } } + # trivial names known to the field: + if ("meningococcus" %like_case% x_trimmed[i]) { + # coerce Neisseria meningitidis + x[i] <- lookup(fullname == "Neisseria meningitidis") + next + } + if ("gonococcus" %like_case% x_trimmed[i]) { + # coerce Neisseria gonorrhoeae + x[i] <- lookup(fullname == "Neisseria gonorrhoeae") + next + } + if ("pneumococcus" %like_case% x_trimmed[i]) { + # coerce Streptococcus penumoniae + x[i] <- lookup(fullname == "Streptococcus pneumoniae") + next + } + # } + # NOW RUN THROUGH DIFFERENT PREVALENCE LEVELS check_per_prevalence <- function(data_to_check, data.old_to_check, @@ -880,16 +843,16 @@ exec_as.mo <- function(x, # if only genus is available, return only genus if (all(!c(x[i], b.x_trimmed) %like_case% " ")) { - found <- data_to_check[fullname_lower %in% c(h.x_species, i.x_trimmed_species), - ..property][[1]] - if (length(found) > 0) { + found <- lookup(fullname_lower %in% c(h.x_species, i.x_trimmed_species), + haystack = data_to_check) + if (!is.na(found)) { x[i] <- found[1L] return(x[i]) } if (nchar(g.x_backup_without_spp) >= 6) { - found <- data_to_check[fullname_lower %like_case% paste0("^", unregex(g.x_backup_without_spp), "[a-z]+"), - ..property][[1]] - if (length(found) > 0) { + found <- lookup(fullname_lower %like_case% paste0("^", unregex(g.x_backup_without_spp), "[a-z]+"), + haystack = data_to_check) + if (!is.na(found)) { x[i] <- found[1L] return(x[i]) } @@ -899,8 +862,7 @@ exec_as.mo <- function(x, # allow no codes less than 4 characters long, was already checked for WHONET earlier if (nchar(g.x_backup_without_spp) < 4) { - x[i] <- microorganismsDT[mo == "UNKNOWN", - ..property][[1]] + x[i] <- lookup(mo == "UNKNOWN") if (initial_search == TRUE) { failures <- c(failures, a.x_backup) } @@ -908,43 +870,43 @@ exec_as.mo <- function(x, } # try probable: trimmed version of fullname ---- - found <- data_to_check[fullname_lower %in% tolower(g.x_backup_without_spp), - ..property][[1]] - if (length(found) > 0) { + found <- lookup(fullname_lower %in% tolower(g.x_backup_without_spp), + haystack = data_to_check) + if (!is.na(found)) { return(found[1L]) } # try any match keeping spaces ---- - found <- data_to_check[fullname_lower %like_case% d.x_withspaces_start_end, - ..property][[1]] - if (length(found) > 0 & nchar(g.x_backup_without_spp) >= 6) { + found <- lookup(fullname_lower %like_case% d.x_withspaces_start_end, + haystack = data_to_check) + if (!is.na(found) & nchar(g.x_backup_without_spp) >= 6) { return(found[1L]) } # try any match keeping spaces, not ending with $ ---- - found <- data_to_check[fullname_lower %like_case% paste0(trimws(e.x_withspaces_start_only), " "), - ..property][[1]] - if (length(found) > 0) { + found <- lookup(fullname_lower %like_case% paste0(trimws(e.x_withspaces_start_only), " "), + haystack = data_to_check) + if (!is.na(found)) { return(found[1L]) } - found <- data_to_check[fullname_lower %like_case% e.x_withspaces_start_only, - ..property][[1]] - if (length(found) > 0 & nchar(g.x_backup_without_spp) >= 6) { + found <- lookup(fullname_lower %like_case% e.x_withspaces_start_only, + haystack = data_to_check) + if (!is.na(found) & nchar(g.x_backup_without_spp) >= 6) { return(found[1L]) } # try any match keeping spaces, not start with ^ ---- - found <- data_to_check[fullname_lower %like_case% paste0(" ", trimws(f.x_withspaces_end_only)), - ..property][[1]] - if (length(found) > 0) { + found <- lookup(fullname_lower %like_case% paste0(" ", trimws(f.x_withspaces_end_only)), + haystack = data_to_check) + if (!is.na(found)) { return(found[1L]) } # try a trimmed version - found <- data_to_check[fullname_lower %like_case% b.x_trimmed - | fullname_lower %like_case% c.x_trimmed_without_group, - ..property][[1]] - if (length(found) > 0 & nchar(g.x_backup_without_spp) >= 6) { + found <- lookup(fullname_lower %like_case% b.x_trimmed | + fullname_lower %like_case% c.x_trimmed_without_group, + haystack = data_to_check) + if (!is.na(found) & nchar(g.x_backup_without_spp) >= 6) { return(found[1L]) } @@ -958,44 +920,43 @@ exec_as.mo <- function(x, g.x_backup_without_spp %>% substr(1, x_length / 2), ".* ", g.x_backup_without_spp %>% substr((x_length / 2) + 1, x_length)) - found <- data_to_check[fullname_lower %like_case% x_split, - ..property][[1]] - if (length(found) > 0) { + found <- lookup(fullname_lower %like_case% x_split, + haystack = data_to_check) + if (!is.na(found)) { return(found[1L]) } } # try fullname without start and without nchar limit of >= 6 ---- # like "K. pneu rhino" >> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH - found <- data_to_check[fullname_lower %like_case% e.x_withspaces_start_only, - ..property][[1]] - if (length(found) > 0) { + found <- lookup(fullname_lower %like_case% e.x_withspaces_start_only, + haystack = data_to_check) + if (!is.na(found)) { return(found[1L]) } # MISCELLANEOUS ---- # look for old taxonomic names ---- - # wait until prevalence == 2 to run the old taxonomic results on both prevalence == 1 and prevalence == 2 - found <- data.old_to_check[fullname_lower == tolower(a.x_backup) - | fullname_lower %like_case% d.x_withspaces_start_end, ] - if (NROW(found) > 0) { - col_id_new <- found[1, col_id_new] + found <- lookup(fullname_lower %like_case% e.x_withspaces_start_only, + column = NULL, # all columns + haystack = data.old_to_check) + if (!all(is.na(found))) { + col_id_new <- found["col_id_new"] # when property is "ref" (which is the case in mo_ref, mo_authors and mo_year), return the old value, so: # mo_ref() of "Chlamydia psittaci" will be "Page, 1968" (with warning) # mo_ref() of "Chlamydophila psittaci" will be "Everett et al., 1999" if (property == "ref") { - x[i] <- found[1, ref] + x[i] <- found["ref"] } else { - x[i] <- microorganismsDT[col_id == found[1, col_id_new], - ..property][[1]] + x[i] <- lookup(col_id == found["col_id_new"], haystack = MO_lookup) } - options(mo_renamed_last_run = found[1, fullname]) - was_renamed(name_old = found[1, fullname], - name_new = microorganismsDT[col_id == found[1, col_id_new], fullname], - ref_old = found[1, ref], - ref_new = microorganismsDT[col_id == found[1, col_id_new], ref], - mo = microorganismsDT[col_id == found[1, col_id_new], mo]) + options(mo_renamed_last_run = found["fullname"]) + was_renamed(name_old = found["fullname"], + name_new = lookup(col_id == found["col_id_new"], "fullname", haystack = MO_lookup), + ref_old = found["ref"], + ref_new = lookup(col_id == found["col_id_new"], "ref", haystack = MO_lookup), + mo = lookup(col_id == found["col_id_new"], "mo", haystack = MO_lookup)) return(x[i]) } @@ -1024,28 +985,29 @@ exec_as.mo <- function(x, if (isTRUE(debug)) { message("Running '", d.x_withspaces_start_end, "' and '", e.x_withspaces_start_only, "'") } - found <- data.old_to_check[fullname_lower %like_case% d.x_withspaces_start_end - | fullname_lower %like_case% e.x_withspaces_start_only] - if (NROW(found) > 0 & nchar(g.x_backup_without_spp) >= 6) { + found <- lookup(fullname_lower %like_case% d.x_withspaces_start_end | + fullname_lower %like_case% e.x_withspaces_start_only, + column = NULL, # all columns + haystack = data.old_to_check) + if (!all(is.na(found)) & nchar(g.x_backup_without_spp) >= 6) { if (property == "ref") { # when property is "ref" (which is the case in mo_ref, mo_authors and mo_year), return the old value, so: # mo_ref("Chlamydia psittaci) = "Page, 1968" (with warning) # mo_ref("Chlamydophila psittaci) = "Everett et al., 1999" - x <- found[1, ref] + x <- found["ref"] } else { - x <- microorganismsDT[col_id == found[1, col_id_new], - ..property][[1]] + x <- lookup(col_id == found["col_id_new"], haystack = MO_lookup) } - was_renamed(name_old = found[1, fullname], - name_new = microorganismsDT[col_id == found[1, col_id_new], fullname], - ref_old = found[1, ref], - ref_new = microorganismsDT[col_id == found[1, col_id_new], ref], - mo = microorganismsDT[col_id == found[1, col_id_new], mo]) - options(mo_renamed_last_run = found[1, fullname]) + was_renamed(name_old = found["fullname"], + name_new = lookup(col_id == found["col_id_new"], "fullname", haystack = MO_lookup), + ref_old = found["ref"], + ref_new = lookup(col_id == found["col_id_new"], "ref", haystack = MO_lookup), + mo = lookup(col_id == found["col_id_new"], "mo", haystack = MO_lookup)) + options(mo_renamed_last_run = found["fullname"]) uncertainties <<- rbind(uncertainties, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, - result_mo = microorganismsDT[col_id == found[1, col_id_new], mo])) + result_mo = lookup(col_id == found["col_id_new"], "mo", haystack = MO_lookup))) return(x) } @@ -1065,13 +1027,12 @@ exec_as.mo <- function(x, } if (!empty_result(found)) { found_result <- found - found <- reference_data_to_use[mo == found, - ..property][[1]] + found <- lookup(mo == found) uncertainties <<- rbind(uncertainties, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, - result_mo = found_result[1L])) - return(found[1L]) + result_mo = found_result)) + return(found) } } @@ -1089,15 +1050,16 @@ exec_as.mo <- function(x, message("Running '", paste(b.x_trimmed, "species"), "'") } # not when input is like Genustext, because then Neospora would lead to Actinokineospora - found <- uncertain.reference_data_to_use[fullname_lower %like_case% paste(b.x_trimmed, "species"), - ..property][[1]] - if (length(found) > 0) { - x[i] <- found[1L] + found <- lookup(fullname_lower %like_case% paste(b.x_trimmed, "species"), + haystack = uncertain.reference_data_to_use) + if (!is.na(found)) { + found_result <- found + found <- lookup(mo == found) uncertainties <<- rbind(uncertainties, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, - result_mo = found_result[1L])) - return(x) + result_mo = found_result)) + return(found) } } } @@ -1119,13 +1081,12 @@ exec_as.mo <- function(x, } if (!empty_result(found) & nchar(g.x_backup_without_spp) >= 6) { found_result <- found - found <- reference_data_to_use[mo == found, - ..property][[1]] + found <- lookup(mo == found) uncertainties <<- rbind(uncertainties, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, - result_mo = found_result[1L])) - return(found[1L]) + result_mo = found_result)) + return(found) } # (5) inverse input ---- @@ -1144,13 +1105,12 @@ exec_as.mo <- function(x, } if (!empty_result(found) & nchar(g.x_backup_without_spp) >= 6) { found_result <- found - found <- reference_data_to_use[mo == found, - ..property][[1]] + found <- lookup(mo == found) uncertainties <<- rbind(uncertainties, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, - result_mo = found_result[1L])) - return(found[1L]) + result_mo = found_result)) + return(found) } # (6) try to strip off half an element from end and check the remains ---- @@ -1176,13 +1136,12 @@ exec_as.mo <- function(x, } if (!empty_result(found)) { found_result <- found - found <- reference_data_to_use[mo == found, - ..property][[1]] + found <- lookup(mo == found) uncertainties <<- rbind(uncertainties, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, - result_mo = found_result[1L])) - return(found[1L]) + result_mo = found_result)) + return(found) } } } @@ -1206,13 +1165,12 @@ exec_as.mo <- function(x, } if (!empty_result(found)) { found_result <- found - found <- reference_data_to_use[mo == found, - ..property][[1]] + found <- lookup(mo == found) uncertainties <<- rbind(uncertainties, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, - result_mo = found_result[1L])) - return(found[1L]) + result_mo = found_result)) + return(found) } } } @@ -1224,24 +1182,22 @@ exec_as.mo <- function(x, if (b.x_trimmed %like_case% "yeast") { found <- "F_YEAST" found_result <- found - found <- microorganismsDT[mo == found, - ..property][[1]] + found <- lookup(mo == found) uncertainties <<- rbind(uncertainties, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, - result_mo = found_result[1L])) - return(found[1L]) + result_mo = found_result)) + return(found) } if (b.x_trimmed %like_case% "(fungus|fungi)" & !b.x_trimmed %like_case% "fungiphrya") { found <- "F_FUNGUS" found_result <- found - found <- microorganismsDT[mo == found, - ..property][[1]] + found <- lookup(mo == found) uncertainties <<- rbind(uncertainties, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, - result_mo = found_result[1L])) - return(found[1L]) + result_mo = found_result)) + return(found) } # (9) try to strip off one element from start and check the remains (only allow >= 2-part name outcome) ---- if (isTRUE(debug)) { @@ -1262,15 +1218,14 @@ exec_as.mo <- function(x, } if (!empty_result(found)) { found_result <- found - found <- reference_data_to_use[mo == found_result[1L], - ..property][[1]] + found <- lookup(mo == found) # uncertainty level 2 only if searched part contains a space (otherwise it will be found with lvl 3) if (x_strip_collapsed %like_case% " ") { uncertainties <<- rbind(uncertainties, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, - result_mo = found_result[1L])) - return(found[1L]) + result_mo = found_result)) + return(found) } } } @@ -1300,13 +1255,12 @@ exec_as.mo <- function(x, } if (!empty_result(found)) { found_result <- found - found <- reference_data_to_use[mo == found, - ..property][[1]] + found <- lookup(mo == found) uncertainties <<- rbind(uncertainties, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, - result_mo = found_result[1L])) - return(found[1L]) + result_mo = found_result)) + return(found) } } } @@ -1329,13 +1283,12 @@ exec_as.mo <- function(x, } if (!empty_result(found)) { found_result <- found - found <- reference_data_to_use[mo == found, - ..property][[1]] + found <- lookup(mo == found) uncertainties <<- rbind(uncertainties, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, - result_mo = found_result[1L])) - return(found[1L]) + result_mo = found_result)) + return(found) } } } @@ -1347,58 +1300,51 @@ exec_as.mo <- function(x, if (isTRUE(debug)) { message("Running '", f.x_withspaces_end_only, "'") } - found <- reference_data_to_use[fullname_lower %like_case% f.x_withspaces_end_only] - if (nrow(found) > 0) { - found_result <- found[["mo"]] - if (!empty_result(found_result) & nchar(g.x_backup_without_spp) >= 6) { - found <- reference_data_to_use[mo == found_result[1L], - ..property][[1]] - uncertainties <<- rbind(uncertainties, - format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, - input = a.x_backup, - result_mo = found_result[1L])) - return(found[1L]) - } + found <- lookup(fullname_lower %like_case% f.x_withspaces_end_only, column = "mo") + if (!is.na(found) & nchar(g.x_backup_without_spp) >= 6) { + found_result <- lookup(mo == found) + uncertainties <<- rbind(uncertainties, + format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, + input = a.x_backup, + result_mo = found_result)) + return(found) } } + # didn't found in uncertain results too return(NA_character_) } # uncertain results - # wait until prevalence == 2 to run the uncertain results on both prevalence == 1 and prevalence == 2 - if (nrow(data_to_check) == nrow(microorganismsDT[prevalence == 2])) { - x[i] <- uncertain_fn(a.x_backup = a.x_backup, - b.x_trimmed = b.x_trimmed, - d.x_withspaces_start_end = d.x_withspaces_start_end, - e.x_withspaces_start_only = e.x_withspaces_start_only, - f.x_withspaces_end_only = f.x_withspaces_end_only, - g.x_backup_without_spp = g.x_backup_without_spp, - uncertain.reference_data_to_use = microorganismsDT[prevalence %in% c(1, 2)]) - if (!empty_result(x[i])) { - return(x[i]) - } - } else if (nrow(data_to_check) == nrow(microorganismsDT[prevalence == 3])) { - x[i] <- uncertain_fn(a.x_backup = a.x_backup, - b.x_trimmed = b.x_trimmed, - d.x_withspaces_start_end = d.x_withspaces_start_end, - e.x_withspaces_start_only = e.x_withspaces_start_only, - f.x_withspaces_end_only = f.x_withspaces_end_only, - g.x_backup_without_spp = g.x_backup_without_spp, - uncertain.reference_data_to_use = microorganismsDT[prevalence == 3]) - if (!empty_result(x[i])) { - return(x[i]) - } + x[i] <- uncertain_fn(a.x_backup = a.x_backup, + b.x_trimmed = b.x_trimmed, + d.x_withspaces_start_end = d.x_withspaces_start_end, + e.x_withspaces_start_only = e.x_withspaces_start_only, + f.x_withspaces_end_only = f.x_withspaces_end_only, + g.x_backup_without_spp = g.x_backup_without_spp, + uncertain.reference_data_to_use = MO_lookup[which(MO_lookup$prevalence %in% c(1, 2)), ]) + if (!empty_result(x[i])) { + return(x[i]) + } + x[i] <- uncertain_fn(a.x_backup = a.x_backup, + b.x_trimmed = b.x_trimmed, + d.x_withspaces_start_end = d.x_withspaces_start_end, + e.x_withspaces_start_only = e.x_withspaces_start_only, + f.x_withspaces_end_only = f.x_withspaces_end_only, + g.x_backup_without_spp = g.x_backup_without_spp, + uncertain.reference_data_to_use = MO_lookup[which(MO_lookup$prevalence == 3), ]) + if (!empty_result(x[i])) { + return(x[i]) } # didn't found any return(NA_character_) } - # FIRST TRY VERY PREVALENT IN HUMAN INFECTIONS ---- - x[i] <- check_per_prevalence(data_to_check = reference_data_to_use[prevalence == 1], - data.old_to_check = microorganisms.oldDT[prevalence == 1], + # CHECK ALL IN ONE GO ---- + x[i] <- check_per_prevalence(data_to_check = MO_lookup, + data.old_to_check = MO.old_lookup, a.x_backup = x_backup[i], b.x_trimmed = x_trimmed[i], c.x_trimmed_without_group = x_trimmed_without_group[i], @@ -1412,42 +1358,9 @@ exec_as.mo <- function(x, next } - # THEN TRY PREVALENT IN HUMAN INFECTIONS ---- - x[i] <- check_per_prevalence(data_to_check = reference_data_to_use[prevalence == 2], - data.old_to_check = microorganisms.oldDT[prevalence %in% c(2, 3)], # run all other old MOs the second time, - # otherwise e.g. mo_ref("Chlamydia psittaci") doesn't work correctly - a.x_backup = x_backup[i], - b.x_trimmed = x_trimmed[i], - c.x_trimmed_without_group = x_trimmed_without_group[i], - d.x_withspaces_start_end = x_withspaces_start_end[i], - e.x_withspaces_start_only = x_withspaces_start_only[i], - f.x_withspaces_end_only = x_withspaces_end_only[i], - g.x_backup_without_spp = x_backup_without_spp[i], - h.x_species = x_species[i], - i.x_trimmed_species = x_trimmed_species[i]) - if (!empty_result(x[i])) { - next - } - - # THEN UNPREVALENT IN HUMAN INFECTIONS ---- - x[i] <- check_per_prevalence(data_to_check = reference_data_to_use[prevalence == 3], - data.old_to_check = microorganisms.oldDT[prevalence == 999], - a.x_backup = x_backup[i], - b.x_trimmed = x_trimmed[i], - c.x_trimmed_without_group = x_trimmed_without_group[i], - d.x_withspaces_start_end = x_withspaces_start_end[i], - e.x_withspaces_start_only = x_withspaces_start_only[i], - f.x_withspaces_end_only = x_withspaces_end_only[i], - g.x_backup_without_spp = x_backup_without_spp[i], - h.x_species = x_species[i], - i.x_trimmed_species = x_trimmed_species[i]) - if (!empty_result(x[i])) { - next - } # no results found: make them UNKNOWN ---- - x[i] <- microorganismsDT[mo == "UNKNOWN", - ..property][[1]] + x[i] <- lookup(mo == "UNKNOWN") if (initial_search == TRUE) { failures <- c(failures, x_backup[i]) } @@ -1472,7 +1385,7 @@ exec_as.mo <- function(x, msg <- paste0(msg, ": ", paste('"', unique(failures), '"', sep = "", collapse = ", ")) } msg <- paste0(msg, ".\nUse mo_failures() to review ", plural[2], ". Edit the `allow_uncertain` parameter if needed (see ?as.mo).") - warning(red(paste0("\n", msg)), + warning(font_red(paste0("\n", msg)), call. = FALSE, immediate. = TRUE) # thus will always be shown, even if >= warnings } @@ -1486,7 +1399,7 @@ exec_as.mo <- function(x, } msg <- paste0("Result", plural[1], " of ", nr2char(NROW(uncertainties)), " value", plural[1], " ", plural[3], " guessed with uncertainty. Use mo_uncertainties() to review ", plural[2], ".") - warning(red(paste0("\n", msg)), + warning(font_red(paste0("\n", msg)), call. = FALSE, immediate. = TRUE) # thus will always be shown, even if >= warnings } @@ -1495,89 +1408,68 @@ exec_as.mo <- function(x, if (Becker == TRUE | Becker == "all") { # See Source. It's this figure: # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4187637/figure/F3/ - MOs_staph <- microorganismsDT[genus == "Staphylococcus"] - setkey(MOs_staph, species) - CoNS <- MOs_staph[species %in% c("arlettae", "auricularis", "capitis", - "caprae", "carnosus", "chromogenes", "cohnii", "condimenti", - "devriesei", "epidermidis", "equorum", "felis", - "fleurettii", "gallinarum", "haemolyticus", - "hominis", "jettensis", "kloosii", "lentus", - "lugdunensis", "massiliensis", "microti", - "muscae", "nepalensis", "pasteuri", "petrasii", - "pettenkoferi", "piscifermentans", "rostri", - "saccharolyticus", "saprophyticus", "sciuri", - "stepanovicii", "simulans", "succinus", - "vitulinus", "warneri", "xylosus") - | (species == "schleiferi" & subspecies %in% c("schleiferi", "")), - ..property][[1]] - CoPS <- MOs_staph[species %in% c("simiae", "agnetis", - "delphini", "lutrae", - "hyicus", "intermedius", - "pseudintermedius", "pseudointermedius", - "schweitzeri", "argenteus") - | (species == "schleiferi" & subspecies == "coagulans"), - ..property][[1]] + MOs_staph <- MO_lookup[which(MO_lookup$genus == "Staphylococcus"), ] + CoNS <- MOs_staph[which(MOs_staph$species %in% c("arlettae", "auricularis", "capitis", + "caprae", "carnosus", "chromogenes", "cohnii", "condimenti", + "devriesei", "epidermidis", "equorum", "felis", + "fleurettii", "gallinarum", "haemolyticus", + "hominis", "jettensis", "kloosii", "lentus", + "lugdunensis", "massiliensis", "microti", + "muscae", "nepalensis", "pasteuri", "petrasii", + "pettenkoferi", "piscifermentans", "rostri", + "saccharolyticus", "saprophyticus", "sciuri", + "stepanovicii", "simulans", "succinus", + "vitulinus", "warneri", "xylosus") + | (MOs_staph$species == "schleiferi" & MOs_staph$subspecies %in% c("schleiferi", ""))), + property] + CoPS <- MOs_staph[which(MOs_staph$species %in% c("simiae", "agnetis", + "delphini", "lutrae", + "hyicus", "intermedius", + "pseudintermedius", "pseudointermedius", + "schweitzeri", "argenteus") + | (MOs_staph$species == "schleiferi" & MOs_staph$subspecies == "coagulans")), + property] # warn when species found that are not in Becker (2014, PMID 25278577) and Becker (2019, PMID 30872103) post_Becker <- c("argensis", "caeli", "cornubiensis", "edaphicus") - if (any(x %in% MOs_staph[species %in% post_Becker, - ..property][[1]])) { + if (any(x %in% MOs_staph[which(MOs_staph$species %in% post_Becker), property])) { - warning("Becker ", italic("et al."), " (2014, 2019) does not contain these species named after their publication: ", - italic(paste("S.", - sort(mo_species(unique(x[x %in% MOs_staph[species %in% post_Becker, - ..property][[1]]]))), - collapse = ", ")), + warning("Becker ", font_italic("et al."), " (2014, 2019) does not contain these species named after their publication: ", + font_italic(paste("S.", + sort(mo_species(unique(x[x %in% MOs_staph[which(MOs_staph$species %in% post_Becker), property]]))), + collapse = ", ")), ".", call. = FALSE, immediate. = TRUE) } - x[x %in% CoNS] <- microorganismsDT[mo == "B_STPHY_CONS", - ..property][[1]][1L] - x[x %in% CoPS] <- microorganismsDT[mo == "B_STPHY_COPS", - ..property][[1]][1L] + x[x %in% CoNS] <- lookup(mo == "B_STPHY_CONS") + x[x %in% CoPS] <- lookup(mo == "B_STPHY_COPS") if (Becker == "all") { - x[x %in% microorganismsDT[mo %like_case% "^B_STPHY_AURS", - ..property][[1]]] <- microorganismsDT[mo == "B_STPHY_COPS", - ..property][[1]][1L] + x[x %in% lookup(fullname %like_case% "^Staphylococcus aureus", n = Inf)] <- lookup(mo == "B_STPHY_COPS") } } # Lancefield ---- if (Lancefield == TRUE | Lancefield == "all") { # group A - S. pyogenes - x[x == microorganismsDT[mo == "B_STRPT_PYGN", - ..property][[1]][1L]] <- microorganismsDT[mo == "B_STRPT_GRPA", - ..property][[1]][1L] + x[x %in% lookup(genus == "Streptococcus" & species == "pyogenes", n = Inf)] <- lookup(fullname == "Streptococcus group A") # group B - S. agalactiae - x[x == microorganismsDT[mo == "B_STRPT_AGLC", - ..property][[1]][1L]] <- microorganismsDT[mo == "B_STRPT_GRPB", - ..property][[1]][1L] + x[x %in% lookup(genus == "Streptococcus" & species == "agalactiae", n = Inf)] <- lookup(fullname == "Streptococcus group B") # group C - S_groupC <- microorganismsDT %>% filter(genus == "Streptococcus", - species %in% c("equisimilis", "equi", - "zooepidemicus", "dysgalactiae")) %>% - pull(property) - x[x %in% S_groupC] <- microorganismsDT[mo == "B_STRPT_GRPC", - ..property][[1]][1L] + x[x %in% lookup(genus == "Streptococcus" & + species %in% c("equisimilis", "equi", "zooepidemicus", "dysgalactiae"), + n = Inf)] <- lookup(fullname == "Streptococcus group C") if (Lancefield == "all") { # all Enterococci - x[x %like% "^(Enterococcus|B_ENTRC)"] <- microorganismsDT[mo == "B_STRPT_GRPD", - ..property][[1]][1L] + x[x %in% lookup(genus == "Enterococcus", n = Inf)] <- lookup(fullname == "Streptococcus group D") } # group F - S. anginosus - x[x == microorganismsDT[mo == "B_STRPT_ANGN", - ..property][[1]][1L]] <- microorganismsDT[mo == "B_STRPT_GRPF", - ..property][[1]][1L] + x[x %in% lookup(genus == "Streptococcus" & species == "anginosus", n = Inf)] <- lookup(fullname == "Streptococcus group F") # group H - S. sanguinis - x[x == microorganismsDT[mo == "B_STRPT_SNGN", - ..property][[1]][1L]] <- microorganismsDT[mo == "B_STRPT_GRPH", - ..property][[1]][1L] + x[x %in% lookup(genus == "Streptococcus" & species == "sanguinis", n = Inf)] <- lookup(fullname == "Streptococcus group H") # group K - S. salivarius - x[x == microorganismsDT[mo == "B_STRPT_SLVR", - ..property][[1]][1L]] <- microorganismsDT[mo == "B_STRPT_GRPK", - ..property][[1]][1L] + x[x %in% lookup(genus == "Streptococcus" & species == "salivarius", n = Inf)] <- lookup(fullname == "Streptococcus group K") } # Wrap up ---------------------------------------------------------------- @@ -1595,12 +1487,8 @@ exec_as.mo <- function(x, df_input <- data.frame(input = as.character(x_input), stringsAsFactors = FALSE) - suppressWarnings( - x <- df_input %>% - left_join(df_found, - by = "input") %>% - pull(found) - ) + # super fast using base::match() which is a lot faster than base::merge() + x <- df_found$found[match(df_input$input, df_found$input)] if (property == "mo") { x <- to_class_mo(x) @@ -1621,7 +1509,6 @@ empty_result <- function(x) { all(x %in% c(NA, "UNKNOWN")) } -#' @importFrom crayon italic was_renamed <- function(name_old, name_new, ref_old = "", ref_new = "", mo = "") { newly_set <- data.frame(old_name = name_old, old_ref = ref_old, @@ -1645,14 +1532,14 @@ format_uncertainty_as_df <- function(uncertainty_level, df <- data.frame(uncertainty = uncertainty_level, input = input, fullname = getOption("mo_renamed_last_run"), - renamed_to = microorganismsDT[mo == result_mo, fullname][[1]], + renamed_to = MO_lookup[which(MO_lookup$mo == result_mo), "fullname"][1], mo = result_mo, stringsAsFactors = FALSE) options(mo_renamed_last_run = NULL) } else { df <- data.frame(uncertainty = uncertainty_level, input = input, - fullname = microorganismsDT[mo == result_mo, fullname][[1]], + fullname = MO_lookup[which(MO_lookup$mo == result_mo), "fullname"][1], renamed_to = NA_character_, mo = result_mo, stringsAsFactors = FALSE) @@ -1676,14 +1563,14 @@ print.mo <- function(x, ...) { pillar_shaft.mo <- function(x, ...) { out <- format(x) # grey out the kingdom (part until first "_") - out[!is.na(x)] <- gsub("^([A-Z]+_)(.*)", paste0(pillar::style_subtle("\\1"), "\\2"), out[!is.na(x)]) + out[!is.na(x)] <- gsub("^([A-Z]+_)(.*)", paste0(font_subtle("\\1"), "\\2"), out[!is.na(x)]) # and grey out every _ - out[!is.na(x)] <- gsub("_", pillar::style_subtle("_"), out[!is.na(x)]) + out[!is.na(x)] <- gsub("_", font_subtle("_"), out[!is.na(x)]) # markup NA and UNKNOWN - out[is.na(x)] <- pillar::style_na(" NA") - out[x == "UNKNOWN"] <- pillar::style_na(" UNKNOWN") - + out[is.na(x)] <- font_red(" NA") + out[x == "UNKNOWN"] <- font_red(" UNKNOWN") + # make it always fit exactly pillar::new_pillar_shaft_simple(out, align = "left", @@ -1693,14 +1580,13 @@ pillar_shaft.mo <- function(x, ...) { } #' @exportMethod summary.mo -#' @importFrom dplyr n_distinct -#' @importFrom cleaner freq top_freq #' @export #' @noRd summary.mo <- function(object, ...) { # unique and top 1-3 - x <- as.mo(object) - top_3 <- unname(top_freq(freq(x), 3)) + x <- as.mo(object) # force again, could be mo from older pkg version + top <- as.data.frame(table(x), responseName = "n", stringsAsFactors = FALSE) + top_3 <- top[order(-top$n), 1][1:3] c("Class" = "mo", "" = length(x[is.na(x)]), "Unique" = n_distinct(x[!is.na(x)]), @@ -1777,7 +1663,6 @@ mo_failures <- function() { } #' @rdname as.mo -#' @importFrom crayon italic #' @export mo_uncertainties <- function() { if (is.null(getOption("mo_uncertainties"))) { @@ -1788,35 +1673,33 @@ mo_uncertainties <- function() { } #' @exportMethod print.mo_uncertainties -#' @importFrom crayon green yellow red white black bgGreen bgYellow bgRed -#' @importFrom cleaner percentage #' @export #' @noRd print.mo_uncertainties <- function(x, ...) { if (NROW(x) == 0) { return(NULL) } - cat(paste0(bold(nr2char(nrow(x)), paste0("unique result", ifelse(nrow(x) > 1, "s", ""), " guessed with uncertainty:")), - "\n(1 = ", green("renamed/misspelled"), - ", 2 = ", yellow("uncertain"), - ", 3 = ", red("very uncertain"), ")\n")) + cat(paste0(font_bold(nr2char(nrow(x)), paste0("unique result", ifelse(nrow(x) > 1, "s", ""), " guessed with uncertainty:")), + "\n(1 = ", font_green("renamed/misspelled"), + ", 2 = ", font_yellow("uncertain"), + ", 3 = ", font_red("very uncertain"), ")\n")) msg <- "" for (i in seq_len(nrow(x))) { if (x[i, "uncertainty"] == 1) { - colour1 <- green - colour2 <- function(...) bgGreen(white(...)) + colour1 <- font_green + colour2 <- function(...) font_green_bg(font_white(...)) } else if (x[i, "uncertainty"] == 2) { - colour1 <- yellow - colour2 <- function(...) bgYellow(black(...)) + colour1 <- font_yellow + colour2 <- function(...) font_yellow_bg(font_black(...)) } else { - colour1 <- red - colour2 <- function(...) bgRed(white(...)) + colour1 <- font_red + colour2 <- function(...) font_red_bg(font_white(...)) } msg <- paste(msg, paste0(colour2(paste0(" [", x[i, "uncertainty"], "] ")), ' "', x[i, "input"], '" -> ', - colour1(paste0(italic(x[i, "fullname"]), - ifelse(!is.na(x[i, "renamed_to"]), paste(", renamed to", italic(x[i, "renamed_to"])), ""), + colour1(paste0(font_italic(x[i, "fullname"]), + ifelse(!is.na(x[i, "renamed_to"]), paste(", renamed to", font_italic(x[i, "renamed_to"])), ""), " (", x[i, "mo"], ", score: ", percentage(levenshtein_fraction(x[i, "input"], x[i, "fullname"]), digits = 1), ")"))), @@ -1826,7 +1709,6 @@ print.mo_uncertainties <- function(x, ...) { } #' @rdname as.mo -#' @importFrom dplyr distinct #' @export mo_renamed <- function() { items <- getOption("mo_renamed") @@ -1840,7 +1722,6 @@ mo_renamed <- function() { } #' @exportMethod print.mo_renamed -#' @importFrom crayon blue italic #' @export #' @noRd print.mo_renamed <- function(x, ...) { @@ -1848,13 +1729,13 @@ print.mo_renamed <- function(x, ...) { return(invisible()) } for (i in seq_len(nrow(x))) { - message(blue(paste0("NOTE: ", - italic(x$old_name[i]), ifelse(x$old_ref[i] %in% c("", NA), "", - paste0(" (", gsub("et al.", italic("et al."), x$old_ref[i]), ")")), - " was renamed ", - italic(x$new_name[i]), ifelse(x$new_ref[i] %in% c("", NA), "", - paste0(" (", gsub("et al.", italic("et al."), x$new_ref[i]), ")")), - " [", x$mo[i], "]"))) + message(font_blue(paste0("NOTE: ", + font_italic(x$old_name[i]), ifelse(x$old_ref[i] %in% c("", NA), "", + paste0(" (", gsub("et al.", font_italic("et al."), x$old_ref[i]), ")")), + " was renamed ", + font_italic(x$new_name[i]), ifelse(x$new_ref[i] %in% c("", NA), "", + paste0(" (", gsub("et al.", font_italic("et al."), x$new_ref[i]), ")")), + " [", x$mo[i], "]"))) } } @@ -1899,12 +1780,11 @@ load_mo_failures_uncertainties_renamed <- function(metadata) { options("mo_renamed" = metadata$renamed) } -#' @importFrom utils adist levenshtein_fraction <- function(input, output) { levenshtein <- double(length = length(input)) for (i in seq_len(length(input))) { # determine Levenshtein distance, but maximise to nchar of output - levenshtein[i] <- base::min(base::as.double(adist(input[i], output[i], ignore.case = TRUE)), + levenshtein[i] <- base::min(base::as.double(utils::adist(input[i], output[i], ignore.case = TRUE)), base::nchar(output[i])) } # self-made score between 0 and 1 (for % certainty, so 0 means huge distance, 1 means no distance) @@ -1938,3 +1818,10 @@ parse_and_convert <- function(x) { }, error = function(e) stop(e$message, call. = FALSE)) # this will also be thrown when running `as.mo(no_existing_object)` parsed } + +left_join_MO_lookup <- function(x, ...) { + left_join(x = x, y = MO_lookup, ...) +} +left_join_MO.old_lookup <- function(x, ...) { + left_join(x = x, y = MO.old_lookup, ...) +} diff --git a/R/mo_property.R b/R/mo_property.R index 1e4f29f13..c6409fdf0 100755 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -358,30 +358,32 @@ mo_info <- function(x, language = get_locale(), ...) { } #' @rdname mo_property -#' @importFrom utils browseURL -#' @importFrom dplyr %>% left_join select mutate case_when #' @export mo_url <- function(x, open = FALSE, ...) { mo <- as.mo(x = x, ... = ...) mo_names <- mo_name(mo) metadata <- get_mo_failures_uncertainties_renamed() - + df <- data.frame(mo, stringsAsFactors = FALSE) %>% - left_join(select(microorganisms, mo, source, species_id), by = "mo") %>% - mutate(url = case_when(source == "CoL" ~ - paste0(gsub("{year}", catalogue_of_life$year, catalogue_of_life$url_CoL, fixed = TRUE), "details/species/id/", species_id), - source == "DSMZ" ~ - paste0(catalogue_of_life$url_DSMZ, "/", unlist(lapply(strsplit(mo_names, ""), function(x) x[1]))), - TRUE ~ - NA_character_)) - + left_join(select(microorganisms, mo, source, species_id), by = "mo") + df$url <- ifelse(df$source == "CoL", + paste0(gsub("{year}", + catalogue_of_life$year, + catalogue_of_life$url_CoL, + fixed = TRUE), + "details/species/id/", + df$species_id), + ifelse(df$source == "DSMZ", + paste0(catalogue_of_life$url_DSMZ, "/", unlist(lapply(strsplit(mo_names, ""), function(x) x[1]))), + NA_character_)) u <- df$url names(u) <- mo_names + if (open == TRUE) { if (length(u) > 1) { warning("only the first URL will be opened, as `browseURL()` only suports one string.") } - browseURL(u[1L]) + utils::browseURL(u[1L]) } load_mo_failures_uncertainties_renamed(metadata) @@ -390,7 +392,6 @@ mo_url <- function(x, open = FALSE, ...) { #' @rdname mo_property -#' @importFrom data.table data.table as.data.table setkey #' @export mo_property <- function(x, property = "fullname", language = get_locale(), ...) { if (length(property) != 1L) { @@ -419,7 +420,7 @@ mo_validate <- function(x, property, ...) { # try to catch an error when inputting an invalid parameter # so the 'call.' can be set to FALSE - tryCatch(x[1L] %in% microorganisms[1, property], + tryCatch(x[1L] %in% MO_lookup[1, property, drop = TRUE], error = function(e) stop(e$message, call. = FALSE)) if (is.mo(x) @@ -428,7 +429,7 @@ mo_validate <- function(x, property, ...) { # this will not reset mo_uncertainties and mo_failures # because it's already a valid MO x <- exec_as.mo(x, property = property, initial_search = FALSE, ...) - } else if (!all(x %in% pull(microorganisms, property)) + } else if (!all(x %in% MO_lookup[, property, drop = TRUE]) | Becker %in% c(TRUE, "all") | Lancefield %in% c(TRUE, "all")) { x <- exec_as.mo(x, property = property, ...) diff --git a/R/mo_source.R b/R/mo_source.R index 27ed21895..3909762f8 100644 --- a/R/mo_source.R +++ b/R/mo_source.R @@ -96,7 +96,6 @@ #' set_mo_source(NULL) #' # Removed mo_source file '~/.mo_source.rds'. #' ``` -#' @importFrom dplyr select everything #' @export #' @inheritSection AMR Read more on our website! set_mo_source <- function(path) { @@ -137,13 +136,13 @@ set_mo_source <- function(path) { try( df <- utils::read.table(header = TRUE, sep = ",", stringsAsFactors = FALSE), silent = TRUE) - if (!mo_source_isvalid(df)) { + if (!mo_source_isvalid(df, stop_on_error = FALSE)) { # try tab try( df <- utils::read.table(header = TRUE, sep = "\t", stringsAsFactors = FALSE), silent = TRUE) } - if (!mo_source_isvalid(df)) { + if (!mo_source_isvalid(df, stop_on_error = FALSE)) { # try pipe try( df <- utils::read.table(header = TRUE, sep = "|", stringsAsFactors = FALSE), @@ -151,9 +150,8 @@ set_mo_source <- function(path) { } } - if (!mo_source_isvalid(df)) { - stop("File must contain a column with self-defined values and a reference column `mo` with valid values from the `microorganisms` data set.") - } + # check integrity + mo_source_isvalid(df) df <- df %>% filter(!is.na(mo)) @@ -201,7 +199,7 @@ get_mo_source <- function() { } } -mo_source_isvalid <- function(x) { +mo_source_isvalid <- function(x, refer_to_name = "`reference_df`", stop_on_error = TRUE) { check_dataset_integrity() @@ -212,13 +210,41 @@ mo_source_isvalid <- function(x) { return(TRUE) } if (is.null(x)) { - return(TRUE) + if (stop_on_error == TRUE) { + stop(refer_to_name, " cannot be NULL.", call. = FALSE) + } else { + return(FALSE) + } } if (!is.data.frame(x)) { - return(FALSE) + if (stop_on_error == TRUE) { + stop(refer_to_name, " must be a data.frame.", call. = FALSE) + } else { + return(FALSE) + } } if (!"mo" %in% colnames(x)) { - return(FALSE) + if (stop_on_error == TRUE) { + stop(refer_to_name, " must contain a column 'mo'.", call. = FALSE) + } else { + return(FALSE) + } } - all(x$mo %in% c("", microorganisms$mo, microorganisms.translation$mo_old), na.rm = TRUE) + if (!all(x$mo %in% c("", microorganisms$mo, microorganisms.translation$mo_old), na.rm = TRUE)) { + if (stop_on_error == TRUE) { + invalid <- x[which(!x$mo %in% c("", microorganisms$mo, microorganisms.translation$mo_old)), , drop = FALSE] + if (nrow(invalid) > 1) { + plural <- "s" + } else { + plural <- "" + } + stop("Value", plural, " ", paste0("'", invalid[, 1, drop = TRUE], "'", collapse = ", "), + " found in ", tolower(refer_to_name), + ", but with invalid microorganism code", plural, " ", paste0("'", invalid$mo, "'", collapse = ", "), ".", + call. = FALSE) + } else { + return(FALSE) + } + } + TRUE } diff --git a/R/pca.R b/R/pca.R index e6afa9c06..1d656ac69 100755 --- a/R/pca.R +++ b/R/pca.R @@ -24,20 +24,19 @@ #' Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels, and automatic filtering on only suitable (i.e. non-empty and numeric) variables. #' @inheritSection lifecycle Maturing lifecycle #' @param x a [data.frame] containing numeric columns -#' @param ... columns of `x` to be selected for PCA +#' @param ... columns of `x` to be selected for PCA, can be unquoted since it supports quasiquotation. #' @inheritParams stats::prcomp #' @details The [pca()] function takes a [data.frame] as input and performs the actual PCA with the \R function [prcomp()]. #' #' The result of the [pca()] function is a [prcomp] object, with an additional attribute `non_numeric_cols` which is a vector with the column names of all columns that do not contain numeric values. These are probably the groups and labels, and will be used by [ggplot_pca()]. #' @return An object of classes [pca] and [prcomp] #' @importFrom stats prcomp -#' @importFrom dplyr ungroup %>% filter_all all_vars -#' @importFrom rlang enquos eval_tidy #' @export #' @examples #' # `example_isolates` is a dataset available in the AMR package. #' # See ?example_isolates. #' +#' \dontrun{ #' # calculate the resistance per group first #' library(dplyr) #' resistance_data <- example_isolates %>% @@ -53,6 +52,7 @@ #' summary(pca_result) #' biplot(pca_result) #' ggplot_pca(pca_result) # a new and convenient plot function +#' } pca <- function(x, ..., retx = TRUE, @@ -70,47 +70,46 @@ pca <- function(x, x <- as.data.frame(x, stringsAsFactors = FALSE) x.bak <- x - user_exprs <- enquos(...) - - if (length(user_exprs) > 0) { + # defuse R expressions, this replaces rlang::enquos() + dots <- substitute(list(...)) + if (length(dots) > 1) { new_list <- list(0) - for (i in seq_len(length(user_exprs))) { - new_list[[i]] <- tryCatch(eval_tidy(user_exprs[[i]], data = x), + for (i in seq_len(length(dots) - 1)) { + new_list[[i]] <- tryCatch(eval(dots[[i + 1]], envir = x), error = function(e) stop(e$message, call. = FALSE)) if (length(new_list[[i]]) == 1) { - if (i == 1) { - # only for first item: - if (is.character(new_list[[i]]) & new_list[[i]] %in% colnames(x)) { - # this is to support: df %>% pca("mycol") - new_list[[i]] <- x[, new_list[[i]]] - } + if (is.character(new_list[[i]]) & new_list[[i]] %in% colnames(x)) { + # this is to support quoted variables: df %>% pca("mycol1", "mycol2") + new_list[[i]] <- x[, new_list[[i]]] } else { # remove item - it's a parameter like `center` new_list[[i]] <- NULL } } } + x <- as.data.frame(new_list, stringsAsFactors = FALSE) if (any(sapply(x, function(y) !is.numeric(y)))) { warning("Be sure to first calculate the resistance (or susceptibility) of variables with antimicrobial test results, since PCA works with numeric variables only. Please see Examples in ?pca.") } + # set column names - tryCatch(colnames(x) <- sapply(user_exprs, function(y) as_label(y)), + tryCatch(colnames(x) <- as.character(dots)[2:length(dots)], error = function(e) warning("column names could not be set")) + # keep only numeric columns x <- x[, sapply(x, function(y) is.numeric(y))] # bind the data set with the non-numeric columns x <- cbind(x.bak[, sapply(x.bak, function(y) !is.numeric(y) & !all(is.na(y))), drop = FALSE], x) } - x <- x %>% - ungroup() %>% # would otherwise select the grouping vars - filter_all(all_vars(!is.na(.))) + x <- ungroup(x) # would otherwise select the grouping vars + x <- x[rowSums(is.na(x)) == 0, ] # remove columns containing NAs pca_data <- x[, which(sapply(x, function(x) is.numeric(x)))] - message(blue(paste0("NOTE: Columns selected for PCA: ", paste0(bold(colnames(pca_data)), collapse = "/"), - ".\n Total observations available: ", nrow(pca_data), "."))) + message(font_blue(paste0("NOTE: Columns selected for PCA: ", paste0(font_bold(colnames(pca_data)), collapse = "/"), + ".\n Total observations available: ", nrow(pca_data), "."))) pca_model <- prcomp(pca_data, retx = retx, center = center, scale. = scale., tol = tol, rank. = rank.) attr(pca_model, "non_numeric_cols") <- x[, sapply(x, function(y) !is.numeric(y) & !all(is.na(y))), drop = FALSE] diff --git a/R/progress_estimated.R b/R/progress_estimated.R deleted file mode 100644 index 1944d1605..000000000 --- a/R/progress_estimated.R +++ /dev/null @@ -1,142 +0,0 @@ -# ==================================================================== # -# TITLE # -# Antimicrobial Resistance (AMR) Analysis # -# # -# SOURCE # -# https://gitlab.com/msberends/AMR # -# # -# LICENCE # -# (c) 2018-2020 Berends MS, Luz CF et al. # -# # -# This R package is free software; you can freely use and distribute # -# it for both personal and commercial purposes under the terms of the # -# GNU General Public License version 2.0 (GNU GPL-2), as published by # -# the Free Software Foundation. # -# # -# We created this package for both routine data analysis and academic # -# research and it was publicly released in the hope that it will be # -# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. # -# Visit our website for more info: https://msberends.gitlab.io/AMR. # -# ==================================================================== # - -# taken from https://github.com/tidyverse/dplyr/blob/f306d8da8f27c2e6abbd3c70f219fef7ca61fbb5/R/progress.R -# when it was still in the dplyr package - -progress_estimated <- function(n, min_time = 0) { - Progress$new(n, min_time = min_time) -} - -#' @importFrom R6 R6Class -Progress <- R6::R6Class("Progress", - public = list( - n = NULL, - i = 0, - init_time = NULL, - stopped = FALSE, - stop_time = NULL, - min_time = NULL, - last_update = NULL, - - initialize = function(n, min_time = 0, ...) { - self$n <- n - self$min_time <- min_time - self$begin() - }, - - begin = function() { - "Initialise timer. Call this before beginning timing." - self$i <- 0 - self$last_update <- self$init_time <- now() - self$stopped <- FALSE - self - }, - - pause = function(x) { - "Sleep for x seconds. Useful for testing." - Sys.sleep(x) - self - }, - - width = function() { - getOption("width") - nchar("|100% ~ 99.9 h remaining") - 2 - }, - - tick = function() { - "Process one element" - if (self$stopped) return(self) - - if (self$i == self$n) stop("No more ticks") - self$i <- self$i + 1 - self - }, - - stop = function() { - if (self$stopped) return(self) - - self$stopped <- TRUE - self$stop_time <- now() - self - }, - - print = function(...) { - if (!isTRUE(getOption("dplyr.show_progress")) || # user sepecifies no progress - !interactive() || # not an interactive session - !is.null(getOption("knitr.in.progress"))) { # dplyr used within knitr document - return(invisible(self)) - } - - now_ <- now() - if (now_ - self$init_time < self$min_time || now_ - self$last_update < 0.05) { - return(invisible(self)) - } - self$last_update <- now_ - - if (self$stopped) { - overall <- show_time(self$stop_time - self$init_time) - if (self$i == self$n) { - cat_line("Completed after ", overall) - cat("\n") - } else { - cat_line("Killed after ", overall) - cat("\n") - } - return(invisible(self)) - } - - avg <- (now() - self$init_time) / self$i - time_left <- (self$n - self$i) * avg - nbars <- trunc(self$i / self$n * self$width()) - - cat_line( - "|", str_rep("=", nbars), str_rep(" ", self$width() - nbars), "|", - format(round(self$i / self$n * 100), width = 3), "% ", - "~", show_time(time_left), " remaining" - ) - - invisible(self) - } - ) -) - -cat_line <- function(...) { - msg <- paste(..., sep = "", collapse = "") - gap <- max(c(0, getOption("width") - nchar(msg, "width"))) - cat("\r", msg, rep.int(" ", gap), sep = "") - utils::flush.console() -} - -str_rep <- function(x, i) { - paste(rep.int(x, i), collapse = "") -} - -show_time <- function(x) { - if (x < 60) { - paste(round(x), "s") - } else if (x < 60 * 60) { - paste(round(x / 60), "m") - } else { - paste(round(x / (60 * 60)), "h") - } -} - -now <- function() proc.time()[[3]] diff --git a/R/proportion.R b/R/proportion.R index b8887e9c4..31cb22c9f 100755 --- a/R/proportion.R +++ b/R/proportion.R @@ -21,7 +21,7 @@ #' Calculate microbial resistance #' -#' @description These functions can be used to calculate the (co-)resistance or susceptibility of microbial isolates (i.e. percentage of S, SI, I, IR or R). All functions support quasiquotation with pipes, can be used in `summarise()`][dplyr::summarise()] and also support grouped variables, please see *Examples*. +#' @description These functions can be used to calculate the (co-)resistance or susceptibility of microbial isolates (i.e. percentage of S, SI, I, IR or R). All functions support quasiquotation with pipes, can be used in [summarise()] from the `dplyr` package and also supports grouped variables, please see *Examples*. #' #' [resistance()] should be used to calculate resistance, [susceptibility()] should be used to calculate susceptibility.\cr #' @inheritSection lifecycle Stable lifecycle @@ -42,7 +42,7 @@ #' #' These functions are not meant to count isolates, but to calculate the proportion of resistance/susceptibility. Use the `count()`][AMR::count()] functions to count isolates. The function [susceptibility()] is essentially equal to `count_susceptible() / count_all()`. *Low counts can influence the outcome - the `proportion` functions may camouflage this, since they only return the proportion (albeit being dependent on the `minimum` parameter).* #' -#' The function [proportion_df()] takes any variable from `data` that has an [`rsi`] class (created with [as.rsi()]) and calculates the proportions R, I and S. The function [rsi_df()] works exactly like [proportion_df()], but adds the number of isolates. +#' The function [proportion_df()] takes any variable from `data` that has an [`rsi`] class (created with [as.rsi()]) and calculates the proportions R, I and S. It also supports grouped variables. The function [rsi_df()] works exactly like [proportion_df()], but adds the number of isolates. #' @section Combination therapy: #' When using more than one variable for `...` (= combination therapy)), use `only_all_tested` to only count isolates that are tested for all antibiotics/variables that you test them for. See this example for two antibiotics, Drug A and Drug B, about how [susceptibility()] works to calculate the %SI: #' @@ -99,6 +99,7 @@ #' proportion_IR(example_isolates$AMX) #' proportion_R(example_isolates$AMX) #' +#' \dontrun{ #' library(dplyr) #' example_isolates %>% #' group_by(hospital_id) %>% @@ -135,7 +136,6 @@ #' summarise(numerator = count_susceptible(AMC, GEN, only_all_tested = TRUE), #' denominator = count_all(AMC, GEN, only_all_tested = TRUE), #' proportion = susceptibility(AMC, GEN, only_all_tested = TRUE)) - #' #' #' example_isolates %>% @@ -158,9 +158,6 @@ #' group_by(hospital_id) %>% #' proportion_df(translate = FALSE) #' -#' -#' \dontrun{ -#' #' # calculate current empiric combination therapy of Helicobacter gastritis: #' my_table %>% #' filter(first_isolate == TRUE, @@ -265,7 +262,6 @@ proportion_S <- function(..., } #' @rdname proportion -#' @importFrom dplyr %>% select_if bind_rows summarise_if mutate group_vars select everything #' @export proportion_df <- function(data, translate_ab = "name", diff --git a/R/resistance_predict.R b/R/resistance_predict.R index 48af6e3e7..1812102ed 100755 --- a/R/resistance_predict.R +++ b/R/resistance_predict.R @@ -59,8 +59,6 @@ #' @rdname resistance_predict #' @export #' @importFrom stats predict glm lm -#' @importFrom dplyr %>% pull mutate mutate_at n group_by_at summarise filter filter_at all_vars n_distinct arrange case_when n_groups transmute ungroup -#' @importFrom tidyr pivot_wider #' @inheritSection AMR Read more on our website! #' @examples #' x <- resistance_predict(example_isolates, @@ -70,22 +68,22 @@ #' plot(x) #' ggplot_rsi_predict(x) #' -#' # use dplyr so you can actually read it: -#' library(dplyr) -#' x <- example_isolates %>% -#' filter_first_isolate() %>% -#' filter(mo_genus(mo) == "Staphylococcus") %>% -#' resistance_predict("PEN", model = "binomial") -#' plot(x) -#' -#' -#' # get the model from the object -#' mymodel <- attributes(x)$model -#' summary(mymodel) +#' # using dplyr: +#' if (!require("dplyr")) { +#' library(dplyr) +#' x <- example_isolates %>% +#' filter_first_isolate() %>% +#' filter(mo_genus(mo) == "Staphylococcus") %>% +#' resistance_predict("PEN", model = "binomial") +#' plot(x) #' +#' # get the model from the object +#' mymodel <- attributes(x)$model +#' summary(mymodel) +#' } #' #' # create nice plots with ggplot2 yourself -#' if (!require(ggplot2)) { +#' if (!require(ggplot2) & !require("dplyr")) { #' #' data <- example_isolates %>% #' filter(mo == as.mo("E. coli")) %>% @@ -160,11 +158,9 @@ resistance_predict <- function(x, stop("Column ", col_date, " not found.") } - if (n_groups(x) > 1) { - # no grouped tibbles please, mutate will throw errors - x <- base::as.data.frame(x, stringsAsFactors = FALSE) - } - + # no grouped tibbles, mutate will throw errors + x <- as.data.frame(x, stringsAsFactors = FALSE) + year <- function(x) { # don't depend on lubridate or so, would be overkill for only this function if (all(grepl("^[0-9]{4}$", x))) { @@ -174,42 +170,54 @@ resistance_predict <- function(x, } } - df <- x %>% - mutate_at(col_ab, as.rsi) %>% - mutate_at(col_ab, droplevels) + df <- x + df[, col_ab] <- droplevels(as.rsi(df[, col_ab, drop = TRUE])) if (I_as_S == TRUE) { - df <- df %>% - mutate_at(col_ab, ~gsub("I", "S", .)) + # then I as S + df[, col_ab] <- gsub("I", "S", df[, col_ab, drop = TRUE]) } else { # then I as R - df <- df %>% - mutate_at(col_ab, ~gsub("I", "R", .)) + df[, col_ab] <- gsub("I", "R", df[, col_ab, drop = TRUE]) } - df <- df %>% - filter_at(col_ab, all_vars(!is.na(.))) %>% - mutate(year = year(pull(., col_date))) %>% - group_by_at(c("year", col_ab)) %>% - summarise(n()) + df[, col_ab] <- ifelse(is.na(df[, col_ab, drop = TRUE]), 0, df[, col_ab, drop = TRUE]) + + # remove rows with NAs + df <- subset(df, !is.na(df[, col_ab, drop = TRUE])) + df$year <- year(df[, col_date, drop = TRUE]) + df <- as.data.frame(rbind(table(df[, c("year", col_ab)])), stringsAsFactors = FALSE) + df$year <- as.integer(rownames(df)) + rownames(df) <- NULL - if (df %>% pull(col_ab) %>% n_distinct(na.rm = TRUE) < 2) { - stop("No variety in antimicrobial interpretations - all isolates are '", - df %>% pull(col_ab) %>% unique(), "'.", - call. = FALSE) - } - - colnames(df) <- c("year", "antibiotic", "observations") - - df <- df %>% - filter(!is.na(antibiotic)) %>% - pivot_wider(names_from = antibiotic, - values_from = observations, - values_fill = list(observations = 0)) %>% - filter((R + S) >= minimum) - df_matrix <- df %>% - ungroup() %>% - select(R, S) %>% - as.matrix() + # df <- df %>% + # filter_at(col_ab, all_vars(!is.na(.))) %>% + # mutate(year = year(pull(., col_date))) %>% + # group_by_at(c("year", col_ab)) %>% + # summarise(n()) + # if (df %>% pull(col_ab) %>% n_distinct(na.rm = TRUE) < 2) { + # stop("No variety in antimicrobial interpretations - all isolates are '", + # df %>% pull(col_ab) %>% unique(), "'.", + # call. = FALSE) + # } + # + # colnames(df) <- c("year", "antibiotic", "observations") + + df <- subset(df, sum(df$R + df$S, na.rm = TRUE) >= minimum) + + # return(df) + # + # df <- df %>% + # filter(!is.na(antibiotic)) %>% + # pivot_wider(names_from = antibiotic, + # values_from = observations, + # values_fill = list(observations = 0)) %>% + # filter((R + S) >= minimum) + # df_matrix <- df %>% + # ungroup() %>% + # select(R, S) %>% + # as.matrix() + df_matrix <- as.matrix(df[, c("R", "S"), drop = FALSE]) + if (NROW(df) == 0) { stop("There are no observations.") } @@ -272,49 +280,39 @@ resistance_predict <- function(x, # prepare the output dataframe df_prediction <- data.frame(year = unlist(years), value = prediction, - stringsAsFactors = FALSE) %>% - - mutate(se_min = value - se, - se_max = value + se) + se_min = prediction - se, + se_max = prediction + se, + stringsAsFactors = FALSE) if (model == "poisson") { - df_prediction <- df_prediction %>% - mutate(value = value %>% - format(scientific = FALSE) %>% - as.integer(), - se_min = as.integer(se_min), - se_max = as.integer(se_max)) + df_prediction$value <- as.integer(format(df_prediction$value, scientific = FALSE)) + df_prediction$se_min <- as.integer(df_prediction$se_min) + df_prediction$se_max <- as.integer(df_prediction$se_max) + } else { - df_prediction <- df_prediction %>% - # se_max not above 1 - mutate(se_max = ifelse(se_max > 1, 1, se_max)) + # se_max not above 1 + df_prediction$se_max <- ifelse(df_prediction$se_max > 1, 1, df_prediction$se_max) } - df_prediction <- df_prediction %>% - # se_min not below 0 - mutate(se_min = ifelse(se_min < 0, 0, se_min)) + # se_min not below 0 + df_prediction$se_min <- ifelse(df_prediction$se_min < 0, 0, df_prediction$se_min) - df_observations <- df %>% - ungroup() %>% - transmute(year, - observations = R + S, - observed = R / (R + S)) + df_observations <- data.frame(year = df$year, + observations = df$R + df$S, + observed = df$R / (df$R + df$S), + stringsAsFactors = FALSE) df_prediction <- df_prediction %>% - left_join(df_observations, by = "year") %>% - mutate(estimated = value) + left_join(df_observations, by = "year") + df_prediction$estimated <- df_prediction$value if (preserve_measurements == TRUE) { # replace estimated data by observed data - df_prediction <- df_prediction %>% - mutate(value = ifelse(!is.na(observed), observed, value), - se_min = ifelse(!is.na(observed), NA, se_min), - se_max = ifelse(!is.na(observed), NA, se_max)) + df_prediction$value <- ifelse(!is.na(df_prediction$observed), df_prediction$observed, df_prediction$value) + df_prediction$se_min <- ifelse(!is.na(df_prediction$observed), NA, df_prediction$se_min) + df_prediction$se_max <- ifelse(!is.na(df_prediction$observed), NA, df_prediction$se_max) } - df_prediction <- df_prediction %>% - mutate(value = case_when(value > 1 ~ 1, - value < 0 ~ 0, - TRUE ~ value)) %>% - arrange(year) + df_prediction$value <- ifelse(df_prediction$value > 1, 1, ifelse(df_prediction$value < 0, 0, df_prediction$value)) + df_prediction <- df_prediction[order(df_prediction$year), ] structure( .Data = df_prediction, @@ -332,7 +330,6 @@ rsi_predict <- resistance_predict #' @exportMethod plot.mic #' @export -#' @importFrom dplyr filter #' @importFrom graphics plot axis arrows points #' @rdname resistance_predict plot.resistance_predict <- function(x, main = paste("Resistance Prediction of", x_name), ...) { @@ -366,14 +363,13 @@ plot.resistance_predict <- function(x, main = paste("Resistance Prediction of", length = 0.05, angle = 90, code = 3, lwd = 1.5) # overlay grey points for prediction - points(x = filter(x, is.na(observations))$year, - y = filter(x, is.na(observations))$value, + points(x = subset(x, is.na(observations))$year, + y = subset(x, is.na(observations))$value, pch = 19, col = "grey40") } #' @rdname resistance_predict -#' @importFrom dplyr filter #' @export ggplot_rsi_predict <- function(x, main = paste("Resistance Prediction of", x_name), @@ -392,7 +388,7 @@ ggplot_rsi_predict <- function(x, } p <- ggplot2::ggplot(x, ggplot2::aes(x = year, y = value)) + - ggplot2::geom_point(data = filter(x, !is.na(observations)), + ggplot2::geom_point(data = subset(x, !is.na(observations)), size = 2) + scale_y_percent(limits = c(0, 1)) + ggplot2::labs(title = main, @@ -408,7 +404,7 @@ ggplot_rsi_predict <- function(x, } p <- p + # overlay grey points for prediction - ggplot2::geom_point(data = filter(x, is.na(observations)), + ggplot2::geom_point(data = subset(x, is.na(observations)), size = 2, colour = "grey40") p diff --git a/R/rsi.R b/R/rsi.R index 1715b6ca8..3355b4c89 100755 --- a/R/rsi.R +++ b/R/rsi.R @@ -29,12 +29,15 @@ #' @param ab any (vector of) text that can be coerced to a valid antimicrobial code with [as.ab()] #' @param uti (Urinary Tract Infection) A vector with [logical]s (`TRUE` or `FALSE`) to specify whether a UTI specific interpretation from the guideline should be chosen. For using [as.rsi()] on a [data.frame], this can also be a column containing [logical]s or when left blank, the data set will be search for a 'specimen' and rows containing 'urin' in that column will be regarded isolates from a UTI. See *Examples*. #' @inheritParams first_isolate -#' @param guideline defaults to the latest included EUCAST guideline, run `unique(rsi_translation$guideline)` for all options +#' @param guideline defaults to the latest included EUCAST guideline, see Details for all options #' @param threshold maximum fraction of invalid antimicrobial interpretations of `x`, please see *Examples* #' @param ... parameters passed on to methods -#' @details Run `unique(rsi_translation$guideline)` for a list of all supported guidelines. The repository of this package contains [this machine readable version](https://gitlab.com/msberends/AMR/blob/master/data-raw/rsi_translation.txt) of these guidelines. +#' @details +#' When using [as.rsi()] on untransformed data, the data will be cleaned to only contain values S, I and R. When using the function on data with class [`mic`] (using [as.mic()]) or class [`disk`] (using [as.disk()]), the data will be interpreted based on the guideline set with the `guideline` parameter. #' -#' These guidelines are machine readable, since [](https://gitlab.com/msberends/AMR/blob/master/data-raw/rsi_translation.txt). +#' Supported guidelines to be used as input for the `guideline` parameter are: `r paste0('"', sort(unique(AMR::rsi_translation$guideline)), '"', collapse = ", ")`. Simply using `"CLSI"` or `"EUCAST"` for input will automatically select the latest version of that guideline. +#' +#' The repository of this package [contains a machine readable version](https://gitlab.com/msberends/AMR/blob/master/data-raw/rsi_translation.txt) of all guidelines. This is a CSV file consisting of `r format(nrow(AMR::rsi_translation), big.mark = ",")` rows and `r ncol(AMR::rsi_translation)` columns. This file is machine readable, since it contains one row for every unique combination of the test method (MIC or disk diffusion), the antimicrobial agent and the microorganism. This **allows for easy implementation of these rules in laboratory information systems (LIS)**. #' #' After using [as.rsi()], you can use [eucast_rules()] to (1) apply inferred susceptibility and resistance based on results of other antimicrobials and (2) apply intrinsic resistance based on taxonomic properties of a microorganism. #' @@ -113,7 +116,6 @@ #' is.rsi(rsi_data) #' plot(rsi_data) # for percentages #' barplot(rsi_data) # for frequencies -#' freq(rsi_data) # frequency table with informative header #' #' library(dplyr) #' example_isolates %>% @@ -216,7 +218,7 @@ as.rsi.mic <- function(x, mo, ab = deparse(substitute(x)), guideline = "EUCAST", mo_coerced <- suppressWarnings(as.mo(mo)) guideline_coerced <- get_guideline(guideline) if (is.na(ab_coerced)) { - message(red(paste0("Unknown drug: `", bold(ab), "`. Rename this column to a drug name or code, and check the output with as.ab()."))) + message(font_red(paste0("Unknown drug: `", font_bold(ab), "`. Rename this column to a drug name or code, and check the output with as.ab()."))) return(as.rsi(rep(NA, length(x)))) } if (length(mo_coerced) == 1) { @@ -226,16 +228,16 @@ as.rsi.mic <- function(x, mo, ab = deparse(substitute(x)), guideline = "EUCAST", uti <- rep(uti, length(x)) } - message(blue(paste0("=> Interpreting MIC values of `", bold(ab), "` (", + message(font_blue(paste0("=> Interpreting MIC values of `", font_bold(ab), "` (", ifelse(ab_coerced != ab, paste0(ab_coerced, ", "), ""), - ab_name(ab_coerced, tolower = TRUE), ") using guideline ", bold(guideline_coerced), " ... ")), + ab_name(ab_coerced, tolower = TRUE), ") using guideline ", font_bold(guideline_coerced), " ... ")), appendLF = FALSE) result <- exec_as.rsi(method = "mic", x = x, mo = mo_coerced, ab = ab_coerced, guideline = guideline_coerced, - uti = uti) # exec_as.rsi will return message(blue(" OK.")) + uti = uti) # exec_as.rsi will return message(font_blue(" OK.")) result } @@ -253,7 +255,7 @@ as.rsi.disk <- function(x, mo, ab = deparse(substitute(x)), guideline = "EUCAST" mo_coerced <- suppressWarnings(as.mo(mo)) guideline_coerced <- get_guideline(guideline) if (is.na(ab_coerced)) { - message(red(paste0("Unknown drug: `", bold(ab), "`. Rename this column to a drug name or code, and check the output with as.ab()."))) + message(font_red(paste0("Unknown drug: `", font_bold(ab), "`. Rename this column to a drug name or code, and check the output with as.ab()."))) return(as.rsi(rep(NA, length(x)))) } if (length(mo_coerced) == 1) { @@ -263,21 +265,20 @@ as.rsi.disk <- function(x, mo, ab = deparse(substitute(x)), guideline = "EUCAST" uti <- rep(uti, length(x)) } - message(blue(paste0("=> Interpreting disk zones of `", bold(ab), "` (", + message(font_blue(paste0("=> Interpreting disk zones of `", font_bold(ab), "` (", ifelse(ab_coerced != ab, paste0(ab_coerced, ", "), ""), - ab_name(ab_coerced, tolower = TRUE), ") using guideline ", bold(guideline_coerced), " ... ")), + ab_name(ab_coerced, tolower = TRUE), ") using guideline ", font_bold(guideline_coerced), " ... ")), appendLF = FALSE) result <- exec_as.rsi(method = "disk", x = x, mo = mo_coerced, ab = ab_coerced, guideline = guideline_coerced, - uti = uti) # exec_as.rsi will return message(blue(" OK.")) + uti = uti) # exec_as.rsi will return message(font_blue(" OK.")) result } #' @rdname as.rsi -#' @importFrom crayon red blue bold #' @export as.rsi.data.frame <- function(x, col_mo = NULL, guideline = "EUCAST", uti = NULL, ...) { # try to find columns based on type @@ -316,9 +317,9 @@ as.rsi.data.frame <- function(x, col_mo = NULL, guideline = "EUCAST", uti = NULL } else { plural <- c("", "s", "a ") } - message(blue(paste0("NOTE: Assuming value", plural[1], " ", + message(font_blue(paste0("NOTE: Assuming value", plural[1], " ", paste(paste0('"', values, '"'), collapse = ", "), - " in column `", bold(col_specimen), + " in column `", font_bold(col_specimen), "` reflect", plural[2], " ", plural[3], "urinary tract infection", plural[1], ".\n Use `as.rsi(uti = FALSE)` to prevent this."))) } else { # no data about UTI's found @@ -336,12 +337,12 @@ as.rsi.data.frame <- function(x, col_mo = NULL, guideline = "EUCAST", uti = NULL # not even a valid AB code return(FALSE) } else if (!check & all_valid_mics(y)) { - message(blue(paste0("NOTE: Assuming column `", ab, "` (", + message(font_blue(paste0("NOTE: Assuming column `", ab, "` (", ifelse(ab_coerced != ab, paste0(ab_coerced, ", "), ""), ab_name(ab_coerced, tolower = TRUE), ") contains MIC values."))) return(TRUE) } else if (!check & all_valid_disks(y)) { - message(blue(paste0("NOTE: Assuming column `", ab, "` (", + message(font_blue(paste0("NOTE: Assuming column `", ab, "` (", ifelse(ab_coerced != ab, paste0(ab_coerced, ", "), ""), ab_name(ab_coerced, tolower = TRUE), ") contains disk zones."))) return(TRUE) @@ -380,16 +381,10 @@ as.rsi.data.frame <- function(x, col_mo = NULL, guideline = "EUCAST", uti = NULL x } -#' @importFrom dplyr %>% filter pull get_guideline <- function(guideline) { guideline_param <- toupper(guideline) if (guideline_param %in% c("CLSI", "EUCAST")) { - guideline_param <- rsi_translation %>% - filter(guideline %like% guideline_param) %>% - pull(guideline) %>% - sort() %>% - rev() %>% - .[1] + guideline_param <- rev(sort(subset(rsi_translation, guideline %like% guideline_param)$guideline))[1L] } if (!guideline_param %like% " ") { # like 'EUCAST2020', should be 'EUCAST 2020' @@ -406,8 +401,6 @@ get_guideline <- function(guideline) { } -#' @importFrom dplyr %>% case_when desc arrange filter n_distinct -#' @importFrom crayon green red bold exec_as.rsi <- function(method, x, mo, ab, guideline, uti) { if (method == "mic") { x <- as.mic(x) # when as.rsi.mic is called directly @@ -427,14 +420,14 @@ exec_as.rsi <- function(method, x, mo, ab, guideline, uti) { guideline_coerced <- get_guideline(guideline) if (guideline_coerced != guideline) { - message(blue(paste0("Note: Using guideline ", bold(guideline_coerced), " as input for `guideline`."))) + message(font_blue(paste0("Note: Using guideline ", font_bold(guideline_coerced), " as input for `guideline`."))) } new_rsi <- rep(NA_character_, length(x)) ab_param <- ab trans <- rsi_translation %>% - filter(guideline == guideline_coerced & method == method_param & ab == ab_param) %>% - mutate(lookup = paste(mo, ab)) + subset(guideline == guideline_coerced & method == method_param & ab == ab_param) + trans$lookup <- paste(trans$mo, trans$ab) lookup_mo <- paste(mo, ab) lookup_genus <- paste(mo_genus, ab) @@ -445,15 +438,15 @@ exec_as.rsi <- function(method, x, mo, ab, guideline, uti) { lookup_other <- paste(mo_other, ab) if (all(trans$uti == TRUE, na.rm = TRUE) & all(uti == FALSE)) { - message(red("WARNING.")) - warning("Interpretation of ", bold(ab_name(ab, tolower = TRUE)), " for some microorganisms is only available for (uncomplicated) urinary tract infections (UTI).\n Use parameter 'uti' to set which isolates are from urine. See ?as.rsi.", call. = FALSE) + message(font_red("WARNING.")) + warning("Interpretation of ", font_bold(ab_name(ab, tolower = TRUE)), " for some microorganisms is only available for (uncomplicated) urinary tract infections (UTI).\n Use parameter 'uti' to set which isolates are from urine. See ?as.rsi.", call. = FALSE) warned <- TRUE } for (i in seq_len(length(x))) { get_record <- trans %>% # no UTI for now - filter(lookup %in% c(lookup_mo[i], + subset(lookup %in% c(lookup_mo[i], lookup_genus[i], lookup_family[i], lookup_order[i], @@ -465,14 +458,13 @@ exec_as.rsi <- function(method, x, mo, ab, guideline, uti) { get_record <- get_record %>% # be as specific as possible (i.e. prefer species over genus): # desc(uti) = TRUE on top and FALSE on bottom - arrange(desc(uti), desc(nchar(mo))) %>% # 'uti' is a column in rsi_translation - .[1L, ] + arrange(desc(uti), desc(nchar(mo))) # 'uti' is a column in rsi_translation } else { get_record <- get_record %>% filter(uti == FALSE) %>% # 'uti' is a column in rsi_translation - arrange(desc(nchar(mo))) %>% - .[1L, ] + arrange(desc(nchar(mo))) } + get_record <- get_record[1L, ] if (NROW(get_record) > 0) { if (is.na(x[i])) { @@ -481,20 +473,20 @@ exec_as.rsi <- function(method, x, mo, ab, guideline, uti) { mic_input <- x[i] mic_S <- as.mic(get_record$breakpoint_S) mic_R <- as.mic(get_record$breakpoint_R) - new_rsi[i] <- case_when(isTRUE(which(levels(mic_input) == mic_input) <= which(levels(mic_S) == mic_S)) ~ "S", - isTRUE(which(levels(mic_input) == mic_input) >= which(levels(mic_R) == mic_R)) ~ "R", - !is.na(get_record$breakpoint_S) & !is.na(get_record$breakpoint_R) ~ "I", - TRUE ~ NA_character_) + new_rsi[i] <- ifelse(isTRUE(which(levels(mic_input) == mic_input) <= which(levels(mic_S) == mic_S)), "S", + ifelse(isTRUE(which(levels(mic_input) == mic_input) >= which(levels(mic_R) == mic_R)), "R", + ifelse(!is.na(get_record$breakpoint_S) & !is.na(get_record$breakpoint_R), "I", + NA_character_))) } else if (method == "disk") { - new_rsi[i] <- case_when(isTRUE(as.double(x[i]) >= as.double(get_record$breakpoint_S)) ~ "S", - isTRUE(as.double(x[i]) <= as.double(get_record$breakpoint_R)) ~ "R", - !is.na(get_record$breakpoint_S) & !is.na(get_record$breakpoint_R) ~ "I", - TRUE ~ NA_character_) + new_rsi[i] <- ifelse(isTRUE(as.double(x[i]) >= as.double(get_record$breakpoint_S)), "S", + ifelse(isTRUE(as.double(x[i]) <= as.double(get_record$breakpoint_R)), "R", + ifelse(!is.na(get_record$breakpoint_S) & !is.na(get_record$breakpoint_R), "I", + NA_character_))) } } } if (warned == FALSE) { - message(green("OK.")) + message(font_green("OK.")) } structure(.Data = factor(new_rsi, levels = c("S", "I", "R"), ordered = TRUE), class = c("rsi", "ordered", "factor")) @@ -537,7 +529,6 @@ is.rsi.eligible <- function(x, threshold = 0.05) { #' @exportMethod print.rsi #' @export -#' @importFrom dplyr %>% #' @noRd print.rsi <- function(x, ...) { cat("Class 'rsi'\n") @@ -570,7 +561,6 @@ summary.rsi <- function(object, ...) { #' @exportMethod plot.rsi #' @export -#' @importFrom dplyr %>% group_by summarise filter mutate if_else n_distinct #' @importFrom graphics plot text #' @noRd plot.rsi <- function(x, @@ -626,7 +616,6 @@ plot.rsi <- function(x, #' @exportMethod barplot.rsi #' @export -#' @importFrom dplyr %>% group_by summarise #' @importFrom graphics barplot axis par #' @noRd barplot.rsi <- function(height, @@ -660,14 +649,13 @@ barplot.rsi <- function(height, } #' @importFrom pillar pillar_shaft -#' @importFrom crayon bgGreen bgYellow bgRed black white #' @export pillar_shaft.rsi <- function(x, ...) { out <- trimws(format(x)) - out[is.na(x)] <- pillar::style_subtle(" NA") - out[x == "S"] <- bgGreen(white(" S ")) - out[x == "I"] <- bgYellow(black(" I ")) - out[x == "R"] <- bgRed(white(" R ")) + out[is.na(x)] <- font_subtle(" NA") + out[x == "S"] <- font_green_bg(font_white(" S ")) + out[x == "I"] <- font_yellow_bg(font_black(" I ")) + out[x == "R"] <- font_red_bg(font_white(" R ")) pillar::new_pillar_shaft_simple(out, align = "left", width = 3) } diff --git a/R/rsi_calc.R b/R/rsi_calc.R index 896ec5ff7..2347638de 100755 --- a/R/rsi_calc.R +++ b/R/rsi_calc.R @@ -19,27 +19,13 @@ # Visit our website for more info: https://msberends.gitlab.io/AMR. # # ==================================================================== # -#' @importFrom rlang enquos as_label dots2vars <- function(...) { # this function is to give more informative output about # variable names in count_* and proportion_* functions - paste( - unlist( - lapply(enquos(...), - function(x) { - l <- as_label(x) - if (l != ".") { - l - } else { - character(0) - } - }) - ), - collapse = ", ") + dots <- substitute(list(...)) + paste(as.character(dots)[2:length(dots)], collapse = ", ") } -#' @importFrom dplyr %>% pull all_vars any_vars filter_all funs mutate_all -#' @importFrom cleaner percentage rsi_calc <- function(..., ab_result, minimum = 0, @@ -72,10 +58,10 @@ rsi_calc <- function(..., dots <- dots[dots != "."] if (length(dots) == 0 | all(dots == "df")) { # for complete data.frames, like example_isolates %>% select(amcl, gent) %>% proportion_S() - # and the old rsi function, that has "df" as name of the first parameter + # and the old rsi function, which has "df" as name of the first parameter x <- dots_df } else { - x <- dots_df[, dots] + x <- dots_df[, dots[dots %in% colnames(dots_df)]] } } else if (ndots == 1) { # only 1 variable passed (can also be data.frame), like: proportion_S(example_isolates$amcl) and example_isolates$amcl %>% proportion_S() @@ -85,8 +71,8 @@ rsi_calc <- function(..., x <- NULL try(x <- as.data.frame(dots), silent = TRUE) if (is.null(x)) { - # support for: with(example_isolates, proportion_S(amcl, gent)) - x <- as.data.frame(rlang::list2(...)) + # support for example_isolates %>% group_by(hospital_id) %>% summarise(amox = susceptibility(GEN, AMX)) + x <- as.data.frame(list(...)) } } @@ -113,7 +99,7 @@ rsi_calc <- function(..., # this will give a warning for invalid results, of all input columns (so only 1 warning) rsi_integrity_check <- as.rsi(rsi_integrity_check) } - + if (only_all_tested == TRUE) { # THE NUMBER OF ISOLATES WHERE *ALL* ABx ARE S/I/R x <- apply(X = x %>% mutate_all(as.integer), @@ -128,8 +114,8 @@ rsi_calc <- function(..., other_values_filter <- base::apply(x, 1, function(y) { base::all(y %in% other_values) & base::any(is.na(y)) }) - numerator <- x %>% filter_all(any_vars(. %in% ab_result)) %>% nrow() - denominator <- x %>% filter(!other_values_filter) %>% nrow() + numerator <- sum(as.logical(by(x, seq_len(nrow(x)), function(row) any(unlist(row) %in% ab_result, na.rm = TRUE)))) + denominator <- nrow(x[!other_values_filter, ]) } } else { # x is not a data.frame @@ -167,9 +153,7 @@ rsi_calc <- function(..., } } -#' @importFrom dplyr %>% summarise_if mutate select everything bind_rows arrange -#' @importFrom tidyr pivot_longer -rsi_calc_df <- function(type, # "proportion" or "count" +rsi_calc_df <- function(type, # "proportion", "count" or "both" data, translate_ab = "name", language = get_locale(), @@ -199,63 +183,106 @@ rsi_calc_df <- function(type, # "proportion" or "count" if (as.character(translate_ab) %in% c("TRUE", "official")) { translate_ab <- "name" } + + # select only groups and antibiotics + if (has_groups(data)) { + data_has_groups <- TRUE + groups <- setdiff(names(get_groups(data)), ".rows") # get_groups is from poorman.R + data <- data[, c(groups, colnames(data)[sapply(data, is.rsi)]), drop = FALSE] + } else { + data_has_groups <- FALSE + data <- data[, colnames(data)[sapply(data, is.rsi)], drop = FALSE] + } - get_summaryfunction <- function(int, type) { - # look for proportion_S, count_S, etc: - int_fn <- get(paste0(type, "_", int), envir = asNamespace("AMR")) - - suppressWarnings( - if (type == "proportion") { - summ <- summarise_if(.tbl = data, - .predicate = is.rsi, - .funs = int_fn, - minimum = minimum, - as_percent = as_percent) - } else if (type == "count") { - summ <- summarise_if(.tbl = data, - .predicate = is.rsi, - .funs = int_fn) + data <- as.data.frame(data, stringsAsFactors = FALSE) + if (isTRUE(combine_SI) | isTRUE(combine_IR)) { + for (i in seq_len(ncol(data))) { + if (is.rsi(data[, i, drop = TRUE])) { + data[, i] <- as.character(data[, i, drop = TRUE]) + if (isTRUE(combine_SI)) { + data[, i] <- gsub("(I|S)", "SI", data[, i, drop = TRUE]) + } else if (isTRUE(combine_IR)) { + data[, i] <- gsub("(I|R)", "IR", data[, i, drop = TRUE]) + } } - ) - summ %>% - mutate(interpretation = int) %>% - select(interpretation, everything()) + } } - resS <- get_summaryfunction("S", type) - resI <- get_summaryfunction("I", type) - resR <- get_summaryfunction("R", type) - resSI <- get_summaryfunction("SI", type) - resIR <- get_summaryfunction("IR", type) - data.groups <- group_vars(data) + sum_it <- function(.data) { + out <- data.frame(antibiotic = character(0), + interpretation = character(0), + value = double(0), + isolates <- integer(0), + stringsAsFactors = FALSE) + if (data_has_groups) { + group_values <- unique(.data[, which(colnames(.data) %in% groups), drop = FALSE]) + rownames(group_values) <- NULL + .data <- .data[, which(!colnames(.data) %in% groups), drop = FALSE] + } + for (i in seq_len(ncol(.data))) { + col_results <- as.data.frame(as.matrix(table(.data[, i, drop = TRUE]))) + col_results$interpretation <- rownames(col_results) + col_results$isolates <- col_results[, 1, drop = TRUE] + if (nrow(col_results) > 0) { + if (sum(col_results$isolates, na.rm = TRUE) >= minimum) { + col_results$value <- col_results$isolates / sum(col_results$isolates, na.rm = TRUE) + } else { + col_results$value <- rep(NA_real_, NROW(col_results)) + } + out_new <- data.frame(antibiotic = ab_property(colnames(.data)[i], property = translate_ab, language = language), + interpretation = col_results$interpretation, + value = col_results$value, + isolates = col_results$isolates, + stringsAsFactors = FALSE) + if (data_has_groups) { + out_new <- cbind(group_values, out_new) + } + out <- rbind(out, out_new) + } + } + out + } - if (isFALSE(combine_SI) & isFALSE(combine_IR)) { - res <- bind_rows(resS, resI, resR) %>% - mutate(interpretation = factor(interpretation, - levels = c("S", "I", "R"), - ordered = TRUE)) - + # support dplyr groups + apply_group <- function(.data, fn, groups, ...) { + grouped <- split(x = .data, f = lapply(groups, function(x, .data) as.factor(.data[, x]), .data)) + res <- do.call(rbind, unname(lapply(grouped, fn, ...))) + if (any(groups %in% colnames(res))) { + class(res) <- c("grouped_data", class(res)) + attr(res, "groups") <- groups[groups %in% colnames(res)] + } + res + } + + if (data_has_groups) { + out <- apply_group(data, "sum_it", groups) + } else { + out <- sum_it(data) + } + + # apply factors for right sorting in interpretation + if (isTRUE(combine_SI)) { + out$interpretation <- factor(out$interpretation, levels = c("SI", "R"), ordered = TRUE) } else if (isTRUE(combine_IR)) { - res <- bind_rows(resS, resIR) %>% - mutate(interpretation = factor(interpretation, - levels = c("S", "IR"), - ordered = TRUE)) - - } else if (isTRUE(combine_SI)) { - res <- bind_rows(resSI, resR) %>% - mutate(interpretation = factor(interpretation, - levels = c("SI", "R"), - ordered = TRUE)) + out$interpretation <- factor(out$interpretation, levels = c("S", "IR"), ordered = TRUE) + } else { + out$interpretation <- as.rsi(out$interpretation) } - res <- res %>% - pivot_longer(-c(interpretation, data.groups), names_to = "antibiotic") %>% - select(antibiotic, everything()) %>% - arrange(antibiotic, interpretation) - - if (!translate_ab == FALSE) { - res <- res %>% mutate(antibiotic = ab_property(antibiotic, property = translate_ab, language = language)) + if (data_has_groups) { + # ordering by the groups and two more: "antibiotic" and "interpretation" + out <- out[do.call("order", out[, seq_len(length(groups) + 2)]), ] + } else { + out <- out[order(out$antibiotic, out$interpretation), ] } - as.data.frame(res, stringsAsFactors = FALSE) + if (type == "proportion") { + out <- subset(out, select = -c(isolates)) + } else if (type == "count") { + out$value <- out$isolates + out <- subset(out, select = -c(isolates)) + } + + rownames(out) <- NULL + out } diff --git a/R/rsi_df.R b/R/rsi_df.R index 5e38d7822..e2af8191f 100644 --- a/R/rsi_df.R +++ b/R/rsi_df.R @@ -29,28 +29,14 @@ rsi_df <- function(data, combine_SI = TRUE, combine_IR = FALSE) { - proportions <- rsi_calc_df(type = "proportion", - data = data, - translate_ab = translate_ab, - language = language, - minimum = minimum, - as_percent = as_percent, - combine_SI = combine_SI, - combine_IR = combine_IR, - combine_SI_missing = missing(combine_SI)) - - counts <- rsi_calc_df(type = "count", - data = data, - translate_ab = FALSE, - language = "en", - minimum = minimum, - as_percent = as_percent, - combine_SI = combine_SI, - combine_IR = combine_IR, - combine_SI_missing = missing(combine_SI)) - - data.frame(proportions, - isolates = counts$value, - stringsAsFactors = FALSE) + rsi_calc_df(type = "both", + data = data, + translate_ab = translate_ab, + language = language, + minimum = minimum, + as_percent = as_percent, + combine_SI = combine_SI, + combine_IR = combine_IR, + combine_SI_missing = missing(combine_SI)) } diff --git a/R/sysdata.rda b/R/sysdata.rda index 3f958c2fa..644ed2681 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/R/vctrs.R b/R/tidyverse.R similarity index 85% rename from R/vctrs.R rename to R/tidyverse.R index a6d4132bd..ba6e65eb9 100644 --- a/R/vctrs.R +++ b/R/tidyverse.R @@ -19,15 +19,35 @@ # Visit our website for more info: https://msberends.gitlab.io/AMR. # # ==================================================================== # -#' `vctrs` methods +#' Methods for tidyverse #' #' These methods are needed to support methods used by the tidyverse, like joining and transforming data, with new classes that come with this package. #' @inheritSection lifecycle Stable lifecycle #' @inheritSection AMR Read more on our website! #' @keywords internal -#' @name AMR-vctrs +#' @name AMR-tidyverse NULL +#' @rdname AMR-tidyverse +#' @exportMethod scale_type.mo +#' @export +scale_type.mo <- function(x) { + # fix for: + # "Don't know how to automatically pick scale for object of type mo. Defaulting to continuous." + # "Error: Discrete value supplied to continuous scale" + "discrete" +} + +#' @rdname AMR-tidyverse +#' @exportMethod scale_type.ab +#' @export +scale_type.ab <- function(x) { + # fix for: + # "Don't know how to automatically pick scale for object of type mo. Defaulting to continuous." + # "Error: Discrete value supplied to continuous scale" + "discrete" +} + # Class mo ---------------------------------------------------------------- @@ -46,7 +66,7 @@ vec_ptype_full.mo <- function(x, ...) { "mo" } -#' @rdname AMR-vctrs +#' @rdname AMR-tidyverse #' @export vec_ptype2.mo <- function(x, y, ...) { UseMethod("vec_ptype2.mo", y) @@ -65,13 +85,14 @@ vec_ptype2.mo.character <- function(x, y, ...) { } #' @method vec_ptype2.character mo +#' @exportMethod vec_ptype2.character.mo #' @importFrom vctrs vec_ptype2.character #' @export vec_ptype2.character.mo <- function(x, y, ...) { y } -#' @rdname AMR-vctrs +#' @rdname AMR-tidyverse #' @export vec_cast.mo <- function(x, to, ...) { UseMethod("vec_cast.mo") @@ -96,12 +117,11 @@ vec_cast.mo.default <- function(x, to, ...) { vec_default_cast(x, to) } -# @method vec_cast.character mo +#' @method vec_cast.character mo #' @exportMethod vec_cast.character.mo -#' @importFrom vctrs vec_cast +#' @importFrom vctrs vec_cast vec_cast.character #' @export vec_cast.character.mo <- function(x, to, ...) { - # purrr::map_chr(x, stringr::str_c, collapse = " ") unclass(x) } @@ -123,7 +143,7 @@ vec_ptype_full.ab <- function(x, ...) { "ab" } -#' @rdname AMR-vctrs +#' @rdname AMR-tidyverse #' @export vec_ptype2.ab <- function(x, y, ...) { UseMethod("vec_ptype2.ab", y) @@ -142,13 +162,14 @@ vec_ptype2.ab.character <- function(x, y, ...) { } #' @method vec_ptype2.character ab +#' @exportMethod vec_ptype2.character.ab #' @importFrom vctrs vec_ptype2.character #' @export vec_ptype2.character.ab <- function(x, y, ...) { y } -#' @rdname AMR-vctrs +#' @rdname AMR-tidyverse #' @export vec_cast.ab <- function(x, to, ...) { UseMethod("vec_cast.ab") @@ -173,12 +194,11 @@ vec_cast.ab.default <- function(x, to, ...) { vec_default_cast(x, to) } -# @method vec_cast.character ab +#' @method vec_cast.character ab #' @exportMethod vec_cast.character.ab -#' @importFrom vctrs vec_cast +#' @importFrom vctrs vec_cast vec_cast.character #' @export vec_cast.character.ab <- function(x, to, ...) { - # purrr::map_chr(x, stringr::str_c, collapse = " ") unclass(x) } diff --git a/R/translate.R b/R/translate.R index d0dcca120..8a369f6ab 100755 --- a/R/translate.R +++ b/R/translate.R @@ -21,15 +21,15 @@ #' Translate strings from AMR package #' -#' For language-dependent output of AMR functions, like [mo_name()], [mo_type()] and [ab_name()]. +#' For language-dependent output of AMR functions, like [mo_name()], [mo_gramstain()], [mo_type()] and [ab_name()]. #' @inheritSection lifecycle Stable lifecycle #' @details Strings will be translated to foreign languages if they are defined in a local translation file. Additions to this file can be suggested at our repository. The file can be found here: . #' -#' Currently supported languages can be found if running: `unique(AMR:::translations_file$lang)`. +#' Currently supported languages are (besides English): `r paste(sort(gsub(";.*", "", ISOcodes::ISO_639_2[which(ISOcodes::ISO_639_2$Alpha_2 %in% unique(AMR:::translations_file$lang)), "Name"])), collapse = ", ")`. Not all these languages currently have translations available for all antimicrobial agents and colloquial microorganism names. #' #' Please suggest your own translations [by creating a new issue on our repository](https://gitlab.com/msberends/AMR/issues/new?issue[title]=Translation\%20suggestion). #' -#' This file will be read by all functions where a translated output can be desired, like all [mo_property()] functions ([mo_fullname()], [mo_type()], etc.). +#' This file will be read by all functions where a translated output can be desired, like all [mo_property()] functions ([mo_name()], [mo_gramstain()], [mo_type()], etc.). #' #' The system language will be used at default, if that language is supported. The system language can be overwritten with `Sys.setenv(AMR_locale = yourlanguage)`. #' @inheritSection AMR Read more on our website! @@ -68,7 +68,7 @@ get_locale <- function() { if (!is.null(getOption("AMR_locale", default = NULL))) { return(getOption("AMR_locale")) } - + lang <- Sys.getlocale("LC_COLLATE") # Check the locale settings for a start with one of these languages: @@ -82,13 +82,13 @@ get_locale <- function() { "de" } else if (grepl("^(Dutch|Nederlands|nl_|NL_)", lang, ignore.case = FALSE)) { "nl" - } else if (grepl("^(Spanish|Espa.ol|es_|ES_)", lang, ignore.case = FALSE)) { + } else if (grepl("^(Spanish|Espa.+ol|es_|ES_)", lang, ignore.case = FALSE)) { "es" } else if (grepl("^(Italian|Italiano|it_|IT_)", lang, ignore.case = FALSE)) { "it" - } else if (grepl("^(French|Fran.ais|fr_|FR_)", lang, ignore.case = FALSE)) { + } else if (grepl("^(French|Fran.+ais|fr_|FR_)", lang, ignore.case = FALSE)) { "fr" - } else if (grepl("^(Portuguese|Portugu.s|pt_|PT_)", lang, ignore.case = FALSE)) { + } else if (grepl("^(Portuguese|Portugu.+s|pt_|PT_)", lang, ignore.case = FALSE)) { "pt" } else { # other language -> set to English @@ -97,9 +97,8 @@ get_locale <- function() { } # translate strings based on inst/translations.tsv -#' @importFrom dplyr %>% filter translate_AMR <- function(from, language = get_locale(), only_unknown = FALSE) { - + if (is.null(language)) { return(from) } @@ -115,9 +114,9 @@ translate_AMR <- function(from, language = get_locale(), only_unknown = FALSE) { call. = FALSE) } - df_trans <- df_trans %>% filter(lang == language) + df_trans <- df_trans %>% subset(lang == language) if (only_unknown == TRUE) { - df_trans <- df_trans %>% filter(pattern %like% "unknown") + df_trans <- df_trans %>% subset(pattern %like% "unknown") } # default case sensitive if value if 'ignore.case' is missing: diff --git a/R/zzz.R b/R/zzz.R index 27090fb94..19062ce3e 100755 --- a/R/zzz.R +++ b/R/zzz.R @@ -19,18 +19,16 @@ # Visit our website for more info: https://msberends.gitlab.io/AMR. # # ==================================================================== # -#' @importFrom data.table as.data.table setkey .onLoad <- function(libname, pkgname) { # get new functions not available in older versions of R backports::import(pkgname) - # register data - assign(x = "microorganismsDT", - value = make_DT(), + assign(x = "MO_lookup", + value = create_MO_lookup(), envir = asNamespace("AMR")) - assign(x = "microorganisms.oldDT", - value = make_oldDT(), + assign(x = "MO.old_lookup", + value = create_MO.old_lookup(), envir = asNamespace("AMR")) assign(x = "mo_codes_v0.5.0", @@ -41,52 +39,43 @@ # maybe add survey later: "https://www.surveymonkey.com/r/AMR_for_R" -#' @importFrom data.table as.data.table setkey -#' @importFrom dplyr %>% mutate case_when -make_DT <- function() { - microorganismsDT <- AMR::microorganisms %>% - mutate(kingdom_index = case_when(kingdom == "Bacteria" ~ 1, - kingdom == "Fungi" ~ 2, - kingdom == "Protozoa" ~ 3, - kingdom == "Archaea" ~ 4, - TRUE ~ 99), - # for fullname_lower: keep only dots, letters, - # numbers, slashes, spaces and dashes - fullname_lower = gsub("[^.a-z0-9/ \\-]+", "", - # use this paste instead of `fullname` to - # work with Viridans Group Streptococci, etc. - tolower(trimws(ifelse(genus == "", - fullname, - paste(genus, species, subspecies))))), - # add a column with only "e coli" like combinations - g_species = gsub("^([a-z])[a-z]+ ([a-z]+) ?.*", "\\1 \\2", fullname_lower)) %>% - as.data.table() +create_MO_lookup <- function() { + MO_lookup <- AMR::microorganisms + + MO_lookup$kingdom_index <- 99 + MO_lookup[which(MO_lookup$kingdom == "Bacteria" | MO_lookup$mo == "UNKNOWN"), "kingdom_index"] <- 1 + MO_lookup[which(MO_lookup$kingdom == "Fungi"), "kingdom_index"] <- 2 + MO_lookup[which(MO_lookup$kingdom == "Protozoa"), "kingdom_index"] <- 3 + MO_lookup[which(MO_lookup$kingdom == "Archaea"), "kingdom_index"] <- 4 + + # use this paste instead of `fullname` to + # work with Viridans Group Streptococci, etc. + MO_lookup$fullname_lower <- tolower(trimws(paste(MO_lookup$genus, + MO_lookup$species, + MO_lookup$subspecies))) + MO_lookup[MO_lookup$genus == "" | grepl("^[(]unknown ", MO_lookup$fullname), "fullname_lower"] <- tolower(trimws(MO_lookup[MO_lookup$genus == "" | grepl("^[(]unknown ", MO_lookup$fullname), + "fullname"])) + MO_lookup$fullname_lower <- gsub("[^.a-z0-9/ \\-]+", "",MO_lookup$fullname_lower) + + # add a column with only "e coli" like combinations + MO_lookup$g_species <- gsub("^([a-z])[a-z]+ ([a-z]+) ?.*", "\\1 \\2", MO_lookup$fullname_lower) # so arrange data on prevalence first, then kingdom, then full name - setkey(microorganismsDT, - prevalence, - kingdom_index, - fullname_lower) - microorganismsDT + MO_lookup[order(MO_lookup$prevalence, MO_lookup$kingdom_index, MO_lookup$fullname_lower),] } -#' @importFrom data.table as.data.table setkey -#' @importFrom dplyr %>% mutate -make_oldDT <- function() { - microorganisms.oldDT <- AMR::microorganisms.old %>% - mutate( - # for fullname_lower: keep only dots, letters, - # numbers, slashes, spaces and dashes - fullname_lower = gsub("[^.a-z0-9/ \\-]+", "", tolower(fullname)), - # add a column with only "e coli" like combinations - g_species = gsub("^([a-z])[a-z]+ ([a-z]+) ?.*", "\\1 \\2", fullname_lower)) %>% - as.data.table() +create_MO.old_lookup <- function() { + MO.old_lookup <- AMR::microorganisms.old + + # use this paste instead of `fullname` to + # work with Viridans Group Streptococci, etc. + MO.old_lookup$fullname_lower <- gsub("[^.a-z0-9/ \\-]+", "", tolower(trimws(MO.old_lookup$fullname))) + + # add a column with only "e coli" like combinations + MO.old_lookup$g_species <- gsub("^([a-z])[a-z]+ ([a-z]+) ?.*", "\\1 \\2", MO.old_lookup$fullname_lower) # so arrange data on prevalence first, then full name - setkey(microorganisms.oldDT, - prevalence, - fullname) - microorganisms.oldDT + MO.old_lookup[order(MO.old_lookup$prevalence, MO.old_lookup$fullname_lower),] } make_trans_tbl <- function() { diff --git a/README.md b/README.md index 9006a3f0f..d17b51112 100755 --- a/README.md +++ b/README.md @@ -3,38 +3,14 @@ # `AMR` (for R) -### Not a developer? Then please visit our website [https://msberends.gitlab.io/AMR](https://msberends.gitlab.io/AMR) to read about this package. +This is the development source of the `AMR` package for R. Not a developer? Then please visit our website [https://msberends.gitlab.io/AMR](https://msberends.gitlab.io/AMR) to read about this package. -**It contains documentation about all of the included functions and also a comprehensive tutorial about how to conduct AMR analysis.** +*NOTE: this source code is on GitLab (https://gitlab.com/msberends/AMR) and GitHub (https://github.com/msberends/AMR).* -## Development source - -*NOTE: the original source code is on GitLab (https://gitlab.com/msberends/AMR). There is a mirror repository on GitHub (https://github.com/msberends/AMR). As the mirror process is automated by GitLab, both repositories always contain the latest changes.* - -This is the **development source** of `AMR`, a free and open-source [R package](https://www.r-project.org) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial properties by using evidence-based methods. - -## Authors -Matthijs S. Berends1,2, -Christian F. Luz1, -Alex W. Friedrich1, -Bhanu N.M. Sinha1, -Casper J. Albers3, -Corinna Glasner1 - -1 Department of Medical Microbiology, University of Groningen, University Medical Center Groningen, Groningen, the Netherlands - [rug.nl](http://www.rug.nl) [umcg.nl](http://www.umcg.nl)
-2 Certe Medical Diagnostics & Advice, Groningen, the Netherlands - [certe.nl](http://www.certe.nl)
-3 Heymans Institute for Psychological Research, University of Groningen, Groningen, the Netherlands - [rug.nl](http://www.rug.nl)
- - - - - - - -## How to get this package +### How to get this package Please see [our website](https://msberends.gitlab.io/AMR/#get-this-package). -## Copyright +### Copyright This R package is licensed under the [GNU General Public License (GPL) v2.0](https://gitlab.com/msberends/AMR/blob/master/LICENSE). In a nutshell, this means that this package: diff --git a/_pkgdown.yml b/_pkgdown.yml index eec943c5a..37c5c0861 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -162,10 +162,9 @@ reference: 'like' function can be useful: `if (x %like% y) {...}`. contents: - "`get_locale`" - - "`extended-functions`" - "`like`" - "`reexports`" - - "`AMR-vctrs`" + - "`AMR-tidyverse`" - title: Deprecated functions desc: > These functions are deprecated, meaning that they will still diff --git a/data-raw/country_analysis.R b/data-raw/country_analysis.R index 941c434f4..4d37c19e7 100644 --- a/data-raw/country_analysis.R +++ b/data-raw/country_analysis.R @@ -52,7 +52,7 @@ unique_ip <- unique(data$ipaddress) ip_tbl <- GET_df(unique_ip[1]) p <- AMR:::progress_estimated(n = length(unique_ip) - 1, min_time = 0) for (i in 2:length(unique_ip)) { - p$tick()$print() + p$tick() ip_tbl <- ip_tbl %>% bind_rows(GET_df(unique_ip[i])) } diff --git a/data-raw/eucast_rules.tsv b/data-raw/eucast_rules.tsv index 37bee0b9e..21ed2e1a5 100644 --- a/data-raw/eucast_rules.tsv +++ b/data-raw/eucast_rules.tsv @@ -128,24 +128,24 @@ genus_species is Kingella kingae TCY R DOX R Kingella kingae Breakpoints genus_species is Burkholderia pseudomallei TCY S DOX S Burkholderia pseudomallei Breakpoints genus_species is Burkholderia pseudomallei TCY I DOX I Burkholderia pseudomallei Breakpoints genus_species is Burkholderia pseudomallei TCY R DOX R Burkholderia pseudomallei Breakpoints -order is Enterobacterales PEN, glycopeptides, FUS, macrolides, LIN, streptogramins, RIF, DAP, LNZ R Table 01: Intrinsic resistance in Enterobacteriaceae Expert Rules -fullname like ^Citrobacter (koseri|amalonaticus|sedlakii|farmeri|rodentium) aminopenicillins, TIC R Table 01: Intrinsic resistance in Enterobacteriaceae Expert Rules -fullname like ^Citrobacter (freundii|braakii|murliniae|werkmanii|youngae) aminopenicillins, AMC, CZO, FOX R Table 01: Intrinsic resistance in Enterobacteriaceae Expert Rules -genus_species is Enterobacter cloacae aminopenicillins, AMC, CZO, FOX R Table 01: Intrinsic resistance in Enterobacteriaceae Expert Rules -genus_species is Enterobacter aerogenes aminopenicillins, AMC, CZO, FOX R Table 01: Intrinsic resistance in Enterobacteriaceae Expert Rules -genus_species is Escherichia hermanni aminopenicillins, TIC R Table 01: Intrinsic resistance in Enterobacteriaceae Expert Rules -genus_species is Hafnia alvei aminopenicillins, AMC, CZO, FOX R Table 01: Intrinsic resistance in Enterobacteriaceae Expert Rules -genus is Klebsiella aminopenicillins, TIC R Table 01: Intrinsic resistance in Enterobacteriaceae Expert Rules -genus_species is Morganella morganii aminopenicillins, AMC, CZO, tetracyclines, polymyxins, NIT R Table 01: Intrinsic resistance in Enterobacteriaceae Expert Rules -genus_species is Proteus mirabilis tetracyclines, TGC, polymyxins, NIT R Table 01: Intrinsic resistance in Enterobacteriaceae Expert Rules -genus_species is Proteus penneri aminopenicillins, CZO, CXM, tetracyclines, TGC, polymyxins, NIT R Table 01: Intrinsic resistance in Enterobacteriaceae Expert Rules -genus_species is Proteus vulgaris aminopenicillins, CZO, CXM, tetracyclines, TGC, polymyxins, NIT R Table 01: Intrinsic resistance in Enterobacteriaceae Expert Rules -genus_species is Providencia rettgeri aminopenicillins, AMC, CZO, CXM, tetracyclines, TGC, polymyxins, NIT R Table 01: Intrinsic resistance in Enterobacteriaceae Expert Rules -genus_species is Providencia stuartii aminopenicillins, AMC, CZO, CXM, tetracyclines, TGC, polymyxins, NIT R Table 01: Intrinsic resistance in Enterobacteriaceae Expert Rules -genus is Raoultella aminopenicillins, TIC R Table 01: Intrinsic resistance in Enterobacteriaceae Expert Rules -genus_species is Serratia marcescens aminopenicillins, AMC, CZO, FOX, CXM, DOX, TCY, polymyxins, NIT R Table 01: Intrinsic resistance in Enterobacteriaceae Expert Rules -genus_species is Yersinia enterocolitica aminopenicillins, AMC, TIC, CZO, FOX R Table 01: Intrinsic resistance in Enterobacteriaceae Expert Rules -genus_species is Yersinia pseudotuberculosis PLB, COL R Table 01: Intrinsic resistance in Enterobacteriaceae Expert Rules +order is Enterobacterales PEN, glycopeptides, FUS, macrolides, LIN, streptogramins, RIF, DAP, LNZ R Table 01: Intrinsic resistance in Enterobacterales (at the time: Enterobacteriaceae) Expert Rules +fullname like ^Citrobacter (koseri|amalonaticus|sedlakii|farmeri|rodentium) aminopenicillins, TIC R Table 01: Intrinsic resistance in Enterobacterales (at the time: Enterobacteriaceae) Expert Rules +fullname like ^Citrobacter (freundii|braakii|murliniae|werkmanii|youngae) aminopenicillins, AMC, CZO, FOX R Table 01: Intrinsic resistance in Enterobacterales (at the time: Enterobacteriaceae) Expert Rules +genus_species is Enterobacter cloacae aminopenicillins, AMC, CZO, FOX R Table 01: Intrinsic resistance in Enterobacterales (at the time: Enterobacteriaceae) Expert Rules +genus_species is Enterobacter aerogenes aminopenicillins, AMC, CZO, FOX R Table 01: Intrinsic resistance in Enterobacterales (at the time: Enterobacteriaceae) Expert Rules +genus_species is Escherichia hermanni aminopenicillins, TIC R Table 01: Intrinsic resistance in Enterobacterales (at the time: Enterobacteriaceae) Expert Rules +genus_species is Hafnia alvei aminopenicillins, AMC, CZO, FOX R Table 01: Intrinsic resistance in Enterobacterales (at the time: Enterobacteriaceae) Expert Rules +genus is Klebsiella aminopenicillins, TIC R Table 01: Intrinsic resistance in Enterobacterales (at the time: Enterobacteriaceae) Expert Rules +genus_species is Morganella morganii aminopenicillins, AMC, CZO, tetracyclines, polymyxins, NIT R Table 01: Intrinsic resistance in Enterobacterales (at the time: Enterobacteriaceae) Expert Rules +genus_species is Proteus mirabilis tetracyclines, TGC, polymyxins, NIT R Table 01: Intrinsic resistance in Enterobacterales (at the time: Enterobacteriaceae) Expert Rules +genus_species is Proteus penneri aminopenicillins, CZO, CXM, tetracyclines, TGC, polymyxins, NIT R Table 01: Intrinsic resistance in Enterobacterales (at the time: Enterobacteriaceae) Expert Rules +genus_species is Proteus vulgaris aminopenicillins, CZO, CXM, tetracyclines, TGC, polymyxins, NIT R Table 01: Intrinsic resistance in Enterobacterales (at the time: Enterobacteriaceae) Expert Rules +genus_species is Providencia rettgeri aminopenicillins, AMC, CZO, CXM, tetracyclines, TGC, polymyxins, NIT R Table 01: Intrinsic resistance in Enterobacterales (at the time: Enterobacteriaceae) Expert Rules +genus_species is Providencia stuartii aminopenicillins, AMC, CZO, CXM, tetracyclines, TGC, polymyxins, NIT R Table 01: Intrinsic resistance in Enterobacterales (at the time: Enterobacteriaceae) Expert Rules +genus is Raoultella aminopenicillins, TIC R Table 01: Intrinsic resistance in Enterobacterales (at the time: Enterobacteriaceae) Expert Rules +genus_species is Serratia marcescens aminopenicillins, AMC, CZO, FOX, CXM, DOX, TCY, polymyxins, NIT R Table 01: Intrinsic resistance in Enterobacterales (at the time: Enterobacteriaceae) Expert Rules +genus_species is Yersinia enterocolitica aminopenicillins, AMC, TIC, CZO, FOX R Table 01: Intrinsic resistance in Enterobacterales (at the time: Enterobacteriaceae) Expert Rules +genus_species is Yersinia pseudotuberculosis PLB, COL R Table 01: Intrinsic resistance in Enterobacterales (at the time: Enterobacteriaceae) Expert Rules genus one_of Achromobacter, Acinetobacter, Alcaligenes, Bordatella, Burkholderia, Elizabethkingia, Flavobacterium, Ochrobactrum, Pseudomonas, Stenotrophomonas PEN, FOX, CXM, glycopeptides, FUS, macrolides, LIN, streptogramins, RIF, DAP, LNZ R Table 02: Intrinsic resistance in non-fermentative Gram-negative bacteria Expert Rules genus_species is Acinetobacter baumannii aminopenicillins, AMC, CZO, CTX, CRO, ATM, ETP, TMP, FOS, DOX, TCY R Table 02: Intrinsic resistance in non-fermentative Gram-negative bacteria Expert Rules genus_species is Acinetobacter pittii aminopenicillins, AMC, CZO, CTX, CRO, ATM, ETP, TMP, FOS, DOX, TCY R Table 02: Intrinsic resistance in non-fermentative Gram-negative bacteria Expert Rules diff --git a/data-raw/internals.R b/data-raw/internals.R index a247a0253..f3493fa50 100644 --- a/data-raw/internals.R +++ b/data-raw/internals.R @@ -70,15 +70,14 @@ rm(translations_file) rm(microorganisms.translation) # Save to raw data to repository ---- -library(dplyr, warn.conflicts = FALSE, quietly = TRUE) usethis::ui_done(paste0("Saving raw data to {usethis::ui_value('/data-raw/')}")) devtools::load_all(quiet = TRUE) # give official names to ABs and MOs -write.table(rsi_translation %>% mutate(ab = ab_name(ab), mo = mo_name(mo)), +write.table(dplyr::mutate(rsi_translation, ab = ab_name(ab), mo = mo_name(mo)), "data-raw/rsi_translation.txt", sep = "\t", na = "", row.names = FALSE) -write.table(microorganisms %>% mutate_if(~!is.numeric(.), as.character), +write.table(dplyr::mutate_if(microorganisms, ~!is.numeric(.), as.character), "data-raw/microorganisms.txt", sep = "\t", na = "", row.names = FALSE) -write.table(antibiotics %>% mutate_if(~!is.numeric(.), as.character), +write.table(dplyr::mutate_if(antibiotics, ~!is.numeric(.), as.character), "data-raw/antibiotics.txt", sep = "\t", na = "", row.names = FALSE) -write.table(antivirals %>% mutate_all(as.character), +write.table(dplyr::mutate_all(antivirals, as.character), "data-raw/antivirals.txt", sep = "\t", na = "", row.names = FALSE) diff --git a/R/extended.R b/data-raw/poorman_prepend.R similarity index 56% rename from R/extended.R rename to data-raw/poorman_prepend.R index ccb0ef593..afa3a68f1 100644 --- a/R/extended.R +++ b/data-raw/poorman_prepend.R @@ -19,30 +19,22 @@ # Visit our website for more info: https://msberends.gitlab.io/AMR. # # ==================================================================== # -#' Extended functions -#' -#' These functions are extensions of functions in other packages. -#' @inheritSection lifecycle Stable lifecycle -#' @inheritSection AMR Read more on our website! -#' @export -#' @keywords internal -#' @name extended-functions -#' @rdname extended-functions -#' @exportMethod scale_type.mo -#' @export -scale_type.mo <- function(x) { - # fix for: - # "Don't know how to automatically pick scale for object of type mo. Defaulting to continuous." - # "Error: Discrete value supplied to continuous scale" - "discrete" -} +# ------------------------------------------------ +# THIS FILE WAS CREATED AUTOMATICALLY! +# Source file: data-raw/reproduction_of_poorman.R +# ------------------------------------------------ + +# Poorman: a package to replace all dplyr functions with base R so we can lose dependency on dplyr. +# These functions were downloaded from https://github.com/nathaneastwood/poorman, +# from this commit: https://github.com/nathaneastwood/poorman/tree/{commit} +# +# All code below was released under MIT license, that permits 'free of charge, to any person obtaining a +# copy of the software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +# is furnished to do so', given that a copyright notice is given in the software. +# +# Copyright notice as found on https://github.com/nathaneastwood/poorman/blob/master/LICENSE on 2 May 2020: +# YEAR: 2020 +# COPYRIGHT HOLDER: Nathan Eastwood -#' @rdname extended-functions -#' @exportMethod scale_type.ab -#' @export -scale_type.ab <- function(x) { - # fix for: - # "Don't know how to automatically pick scale for object of type mo. Defaulting to continuous." - # "Error: Discrete value supplied to continuous scale" - "discrete" -} diff --git a/data-raw/reproduction_of_poorman.R b/data-raw/reproduction_of_poorman.R new file mode 100644 index 000000000..f204d53dd --- /dev/null +++ b/data-raw/reproduction_of_poorman.R @@ -0,0 +1,38 @@ +# get complete filenames of all R files in the GitHub repository of nathaneastwood/poorman + +commit <- "7d76d77f8f7bc663bf30fb5a161abb49801afa17" + +files <- xml2::read_html(paste0("https://github.com/nathaneastwood/poorman/tree/", commit, "/R")) %>% + rvest::html_nodes("table") %>% + rvest::html_table() +files <- files[[1]][,"Name"] + +# remove files with only pkg specific code +files <- files[!files %in% c("zzz.R", "init.R")] +files <- paste0("https://raw.githubusercontent.com/nathaneastwood/poorman/", commit, "/R/", + files[grepl("[.]R$", files)]) + +# add our prepend file, containing info about the source of the data +files <- c("data-raw/poorman_prepend.R", files) + +# read all contents to a character vector +contents <- character(0) +sapply(files, function(file) { + contents <<- c(contents, readLines(file)) + invisible() +}) + +# remove lines starting with "#'" and NULL and write to file +contents <- contents[!grepl("^(#'|NULL|\"_PACKAGE)", contents)] + +# now make it independent on UseMethod, since we will not export these functions +contents <- gsub('UseMethod[(]"(.*?)"[)]', + 'if ("grouped_data" %in% class(.data)) {||| \\1.grouped_data(.data, ...)||| } else {||| \\1.default(.data, ...)||| }', + paste(contents, collapse = "|||"), + perl = TRUE) %>% + # add commit to intro part + gsub("{commit}", commit, ., fixed = TRUE) %>% + strsplit(split = "|||", fixed = TRUE) %>% + unlist() + +writeLines(contents, "R/aa_helper_functions_dplyr.R") diff --git a/docs/404.html b/docs/404.html index e5e247070..29dfcd1f6 100644 --- a/docs/404.html +++ b/docs/404.html @@ -81,7 +81,7 @@ AMR (for R) - 1.1.0.9003 + 1.1.0.9004 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 0e47b0325..d0b110e59 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -81,7 +81,7 @@ AMR (for R) - 1.1.0.9003 + 1.1.0.9004 diff --git a/docs/articles/index.html b/docs/articles/index.html index cf817da61..f367bf215 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.1.0.9003 + 1.1.0.9004 diff --git a/docs/authors.html b/docs/authors.html index 0d136c182..ee6d3c466 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -81,7 +81,7 @@ AMR (for R) - 1.1.0.9003 + 1.1.0.9004 diff --git a/docs/index.html b/docs/index.html index d5bdb5d97..6e830105a 100644 --- a/docs/index.html +++ b/docs/index.html @@ -43,7 +43,7 @@ AMR (for R) - 1.1.0.9003 + 1.1.0.9004 diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index ea06d7a75..62d7365aa 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -10,7 +10,7 @@ articles: WHONET: WHONET.html benchmarks: benchmarks.html resistance_predict: resistance_predict.html -last_built: 2020-05-01T07:13Z +last_built: 2020-05-16T11:03Z urls: reference: https://msberends.gitlab.io/AMR/reference article: https://msberends.gitlab.io/AMR/articles diff --git a/docs/reference/AMR-deprecated.html b/docs/reference/AMR-deprecated.html index 5fc2f1f38..b0a4bb286 100644 --- a/docs/reference/AMR-deprecated.html +++ b/docs/reference/AMR-deprecated.html @@ -82,7 +82,7 @@ AMR (for R) - 1.1.0 + 1.1.0.9004 @@ -235,9 +235,7 @@

These functions are so-called 'Deprecated'. They will be removed in a future release. Using the functions will give a warning with the name of the function it has been replaced by (if there is one).

-
p.symbol(...)
-
-portion_R(...)
+    
portion_R(...)
 
 portion_IR(...)
 
@@ -277,7 +275,7 @@ The lifecycle of this function is retired
 
 
-

Site built with pkgdown 1.5.0.

+

Site built with pkgdown 1.5.1.

diff --git a/docs/reference/AMR-tidyverse.html b/docs/reference/AMR-tidyverse.html new file mode 100644 index 000000000..88cecd947 --- /dev/null +++ b/docs/reference/AMR-tidyverse.html @@ -0,0 +1,291 @@ + + + + + + + + +Methods for tidyverse — AMR-tidyverse • AMR (for R) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + + +
+ +
+
+ + +
+

These methods are needed to support methods used by the tidyverse, like joining and transforming data, with new classes that come with this package.

+
+ +
scale_type.mo(x)
+
+scale_type.ab(x)
+
+vec_ptype2.mo(x, y, ...)
+
+vec_cast.mo(x, to, ...)
+
+vec_ptype2.ab(x, y, ...)
+
+vec_cast.ab(x, to, ...)
+ + +

Stable lifecycle

+ + + +


+The lifecycle of this function is stable. In a stable function, we are largely happy with the unlying code, and major changes are unlikely. This means that the unlying code will generally evolve by adding new arguments; we will avoid removing arguments or changing the meaning of existing arguments.

+

If the unlying code needs breaking changes, they will occur gradually. To begin with, the function or argument will be deprecated; it will continue to work but will emit an message informing you of the change. Next, typically after at least one newly released version on CRAN, the message will be transformed to an error.

+

Read more on our website!

+ + + +

On our website https://msberends.gitlab.io/AMR you can find a comprehensive tutorial about how to conduct AMR analysis, the complete documentation of all functions (which reads a lot easier than here in R) and an example analysis using WHONET data.

+ +
+ +
+ + + +
+ + + + + + + + diff --git a/docs/reference/antibiotics.html b/docs/reference/antibiotics.html index dc5fc3973..8e3f313c3 100644 --- a/docs/reference/antibiotics.html +++ b/docs/reference/antibiotics.html @@ -82,7 +82,7 @@ AMR (for R) - 1.1.0.9000 + 1.1.0.9004 diff --git a/docs/reference/as.mic.html b/docs/reference/as.mic.html index acecca177..2457654e3 100644 --- a/docs/reference/as.mic.html +++ b/docs/reference/as.mic.html @@ -82,7 +82,7 @@ AMR (for R) - 1.1.0 + 1.1.0.9004 @@ -291,9 +291,8 @@ The lifecycle of this function is stableab = "AMX", guideline = "EUCAST") -plot(mic_data) -barplot(mic_data) -freq(mic_data)
+plot(mic_data) +barplot(mic_data)
@@ -275,7 +275,7 @@ reference_df -

a data.frame to use for extra reference when translating x to a valid mo. See set_mo_source() and get_mo_source() to automate the usage of your own codes (e.g. used in your analysis or organisation).

+

a data.frame to be used for extra reference when translating x to a valid mo. See set_mo_source() and get_mo_source() to automate the usage of your own codes (e.g. used in your analysis or organisation).

... @@ -299,10 +299,10 @@ B_KLBSL_PNMN_RHNS Klebsiella pneumoniae rhinoscleromatis | | | | | | | | - | | | ---> subspecies, a 4-5 letter acronym - | | ----> species, a 4-5 letter acronym - | ----> genus, a 5-7 letter acronym - ----> taxonomic kingdom: A (Archaea), AN (Animalia), B (Bacteria), + | | | ---&gt; subspecies, a 4-5 letter acronym + | | ----&gt; species, a 4-5 letter acronym + | ----&gt; genus, a 5-7 letter acronym + ----&gt; taxonomic kingdom: A (Archaea), AN (Animalia), B (Bacteria), C (Chromista), F (Fungi), P (Protozoa) @@ -455,7 +455,7 @@ This package contains the complete taxonomic tree of almost all microorganisms (
-

Site built with pkgdown 1.5.0.

+

Site built with pkgdown 1.5.1.

diff --git a/docs/reference/as.rsi.html b/docs/reference/as.rsi.html index 3a502a148..7effc3c7d 100644 --- a/docs/reference/as.rsi.html +++ b/docs/reference/as.rsi.html @@ -82,7 +82,7 @@ AMR (for R) - 1.1.0 + 1.1.0.9004 @@ -285,7 +285,7 @@ guideline -

defaults to the latest included EUCAST guideline, run unique(rsi_translation$guideline) for all options

+

defaults to the latest included EUCAST guideline, see Details for all options

uti @@ -306,8 +306,9 @@

Ordered factor with new class rsi

Details

-

Run unique(rsi_translation$guideline) for a list of all supported guidelines. The repository of this package contains this machine readable version of these guidelines.

-

These guidelines are machine readable, since .

+

When using as.rsi() on untransformed data, the data will be cleaned to only contain values S, I and R. When using the function on data with class mic (using as.mic()) or class disk (using as.disk()), the data will be interpreted based on the guideline set with the guideline parameter.

+

Supported guidelines to be used as input for the guideline parameter are: "CLSI 2010", "CLSI 2011", "CLSI 2012", "CLSI 2013", "CLSI 2014", "CLSI 2015", "CLSI 2016", "CLSI 2017", "CLSI 2018", "CLSI 2019", "EUCAST 2011", "EUCAST 2012", "EUCAST 2013", "EUCAST 2014", "EUCAST 2015", "EUCAST 2016", "EUCAST 2017", "EUCAST 2018", "EUCAST 2019", "EUCAST 2020". Simply using "CLSI" or "EUCAST" for input will automatically select the latest version of that guideline.

+

The repository of this package contains a machine readable version of all guidelines. This is a CSV file consisting of 18,964 rows and 10 columns. This file is machine readable, since it contains one row for every unique combination of the test method (MIC or disk diffusion), the antimicrobial agent and the microorganism. This allows for easy implementation of these rules in laboratory information systems (LIS).

After using as.rsi(), you can use eucast_rules() to (1) apply inferred susceptibility and resistance based on results of other antimicrobials and (2) apply intrinsic resistance based on taxonomic properties of a microorganism.

The function is.rsi.eligible() returns TRUE when a columns contains at most 5% invalid antimicrobial interpretations (not S and/or I and/or R), and FALSE otherwise. The threshold of 5% can be set with the threshold parameter.

Interpretation of R and S/I

@@ -396,9 +397,8 @@ The lifecycle of this function is stablersi_data <- as.rsi(c(rep("S", 474), rep("I", 36), rep("R", 370))) is.rsi(rsi_data) -plot(rsi_data) # for percentages +plot(rsi_data) # for percentages barplot(rsi_data) # for frequencies -freq(rsi_data) # frequency table with informative header library(dplyr) example_isolates %>% @@ -430,7 +430,7 @@ The lifecycle of this function is stable
-

Site built with pkgdown 1.5.0.

+

Site built with pkgdown 1.5.1.

diff --git a/docs/reference/catalogue_of_life_version.html b/docs/reference/catalogue_of_life_version.html index 6fdd0bf93..8711449d9 100644 --- a/docs/reference/catalogue_of_life_version.html +++ b/docs/reference/catalogue_of_life_version.html @@ -82,7 +82,7 @@ AMR (for R) - 1.1.0 + 1.1.0.9004 @@ -260,10 +260,6 @@ This package contains the complete taxonomic tree of almost all microorganisms ( -

Examples

-
library(dplyr)
-microorganisms %>% freq(kingdom)
-microorganisms %>% group_by(kingdom) %>% freq(phylum, nmax = NULL)
-

Site built with pkgdown 1.5.0.

+

Site built with pkgdown 1.5.1.

diff --git a/docs/reference/count.html b/docs/reference/count.html index 74ec5cd96..ddf12e802 100644 --- a/docs/reference/count.html +++ b/docs/reference/count.html @@ -83,7 +83,7 @@ count_resistant() should be used to count resistant isolates, count_susceptible( AMR (for R) - 1.1.0 + 1.1.0.9004 @@ -304,7 +304,7 @@ count_resistant() should be used to count resistant isolates, count_susceptible(

These functions are meant to count isolates. Use the resistance()/susceptibility() functions to calculate microbial resistance/susceptibility.

The function count_resistant() is equal to the function count_R(). The function count_susceptible() is equal to the function count_SI().

The function n_rsi() is an alias of count_all(). They can be used to count all available isolates, i.e. where all input antibiotics have an available result (S, I or R). Their use is equal to n_distinct(). Their function is equal to count_susceptible(...) + count_resistant(...).

-

The function count_df() takes any variable from data that has an rsi class (created with as.rsi()) and counts the number of S's, I's and R's. The function rsi_df() works exactly like count_df(), but adds the percentage of S, I and R.

+

The function count_df() takes any variable from data that has an rsi class (created with as.rsi()) and counts the number of S's, I's and R's. It also supports grouped variables. The function rsi_df() works exactly like count_df(), but adds the percentage of S, I and R.

Stable lifecycle

@@ -338,21 +338,21 @@ A microorganism is categorised as Susceptible, Increased exposure when -------- -------- ---------- ----------- ---------- ----------- S or I S or I X X X X R S or I X X X X - <NA> S or I X X - - + &lt;NA&gt; S or I X X - - S or I R X X X X R R - X - X - <NA> R - - - - - S or I <NA> X X - - - R <NA> - - - - - <NA> <NA> - - - - + &lt;NA&gt; R - - - - + S or I &lt;NA&gt; X X - - + R &lt;NA&gt; - - - - + &lt;NA&gt; &lt;NA&gt; - - - - --------------------------------------------------------------------

Please note that, in combination therapies, for only_all_tested = TRUE applies that:

    count_S()    +   count_I()    +   count_R()    = count_all()
   proportion_S() + proportion_I() + proportion_R() = 1
-

and that, in combination therapies, for only_all_tested = FALSE applies that:

    count_S()    +   count_I()    +   count_R()    >= count_all()
-  proportion_S() + proportion_I() + proportion_R() >= 1
+

and that, in combination therapies, for only_all_tested = FALSE applies that:

    count_S()    +   count_I()    +   count_R()    &gt;= count_all()
+  proportion_S() + proportion_I() + proportion_R() &gt;= 1
 

Using only_all_tested has no impact when only using one antibiotic as input.

@@ -399,7 +399,7 @@ A microorganism is categorised as Susceptible, Increased exposure when S = count_S(CIP), n1 = count_all(CIP), # the actual total; sum of all three n2 = n_rsi(CIP), # same - analogous to n_distinct - total = n()) # NOT the number of tested isolates! + total = n()) # NOT the number of tested isolates! # Count co-resistance between amoxicillin/clav acid and gentamicin, # so we can see that combination therapy does a lot more than mono therapy. @@ -437,7 +437,7 @@ A microorganism is categorised as Susceptible, Increased exposure when
-

Site built with pkgdown 1.5.0.

+

Site built with pkgdown 1.5.1.

diff --git a/docs/reference/filter_ab_class.html b/docs/reference/filter_ab_class.html index 4825616e5..344b1ac22 100644 --- a/docs/reference/filter_ab_class.html +++ b/docs/reference/filter_ab_class.html @@ -6,7 +6,7 @@ -Filter isolates on result in antibiotic class — filter_ab_class • AMR (for R) +Filter isolates on result in antimicrobial class — filter_ab_class • AMR (for R) @@ -48,8 +48,8 @@ - - + + @@ -82,7 +82,7 @@ AMR (for R) - 1.1.0 + 1.1.0.9004 @@ -226,13 +226,13 @@
-

Filter isolates on results in specific antibiotic variables based on their antibiotic class. This makes it easy to filter on isolates that were tested for e.g. any aminoglycoside.

+

Filter isolates on results in specific antimicrobial classes. This makes it easy to filter on isolates that were tested for e.g. any aminoglycoside.

filter_ab_class(x, ab_class, result = NULL, scope = "any", ...)
@@ -298,7 +298,8 @@ The lifecycle of this function is stableIf the unlying code needs breaking changes, they will occur gradually. To begin with, the function or argument will be deprecated; it will continue to work but will emit an message informing you of the change. Next, typically after at least one newly released version on CRAN, the message will be transformed to an error.

Examples

-
library(dplyr)
+    
if (FALSE) {
+library(dplyr)
 
 # filter on isolates that have any result for any aminoglycoside
 example_isolates %>% filter_aminoglycosides()
@@ -325,7 +326,8 @@ The lifecycle of this function is stable# all aminoglycosides and all fluoroquinolones
 example_isolates %>%
   filter_aminoglycosides("R", "all") %>%
-  filter_fluoroquinolones("R", "all")
+ filter_fluoroquinolones("R", "all") +}
@@ -352,7 +352,7 @@

Source

-

Methodology of this function is based on:

+

Methodology of this function is strictly based on:

M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition, 2014, Clinical and Laboratory Standards Institute (CLSI). https://clsi.org/standards/products/microbiology/documents/m39/.

Value

@@ -405,6 +405,7 @@ The lifecycle of this function is stable# `example_isolates` is a dataset available in the AMR package. # See ?example_isolates. +if (FALSE) { library(dplyr) # Filter on first isolates: example_isolates %>% @@ -425,14 +426,12 @@ The lifecycle of this function is stable# Have a look at A and B. # B is more reliable because every isolate is counted only once. -# Gentamicin resitance in hospital D appears to be 3.7% higher than +# Gentamicin resistance in hospital D appears to be 3.7% higher than # when you (erroneously) would have used all isolates for analysis. ## OTHER EXAMPLES: -if (FALSE) { - # Short-hand versions: example_isolates %>% filter_first_isolate() diff --git a/docs/reference/ggplot_pca.html b/docs/reference/ggplot_pca.html index 1aac97824..c5048f487 100644 --- a/docs/reference/ggplot_pca.html +++ b/docs/reference/ggplot_pca.html @@ -82,7 +82,7 @@ AMR (for R) - 1.1.0 + 1.1.0.9004
@@ -382,6 +382,7 @@ The lifecycle of this function is maturing<
# `example_isolates` is a dataset available in the AMR package.
 # See ?example_isolates.
 
+if (FALSE) {
 # See ?pca for more info about Principal Component Analysis (PCA).
 library(dplyr)
 pca_model <- example_isolates %>%
@@ -394,7 +395,8 @@ The lifecycle of this function is maturing<
 biplot(pca_model)
 
 # new 
-ggplot_pca(pca_model)
+ggplot_pca(pca_model) +}
-

Site built with pkgdown 1.5.0.

+

Site built with pkgdown 1.5.1.

diff --git a/docs/reference/ggplot_rsi.html b/docs/reference/ggplot_rsi.html index 906294d3e..f15eaccd1 100644 --- a/docs/reference/ggplot_rsi.html +++ b/docs/reference/ggplot_rsi.html @@ -82,7 +82,7 @@ AMR (for R) - 1.1.0 + 1.1.0.9004 @@ -484,30 +484,6 @@ The lifecycle of this function is maturing< title = "AMR of Anti-UTI Drugs Per Hospital", x.title = "Hospital", datalabels = FALSE) - -# genuine analysis: check 3 most prevalent microorganisms -example_isolates %>% - # create new bacterial ID's, with all CoNS under the same group (Becker et al.) - mutate(mo = as.mo(mo, Becker = TRUE)) %>% - # filter on top three bacterial ID's - filter(mo %in% top_freq(freq(.$mo), 3)) %>% - # filter on first isolates - filter_first_isolate() %>% - # get short MO names (like "E. coli") - mutate(bug = mo_shortname(mo, Becker = TRUE)) %>% - # select this short name and some antiseptic drugs - select(bug, CXM, GEN, CIP) %>% - # group by MO - group_by(bug) %>% - # plot the thing, putting MOs on the facet - ggplot_rsi(x = "antibiotic", - facet = "bug", - translate_ab = FALSE, - nrow = 1, - title = "AMR of Top Three Microorganisms In Blood Culture Isolates", - subtitle = expression(paste("Only First Isolates, CoNS grouped according to Becker ", - italic("et al."), " (2014)")), - x.title = "Antibiotic (EARS-Net code)") }
-

Site built with pkgdown 1.5.0.

+

Site built with pkgdown 1.5.1.

diff --git a/docs/reference/index.html b/docs/reference/index.html index f64c1dd66..4abe82b92 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.1.0.9003 + 1.1.0.9004 @@ -350,7 +350,7 @@

inner_join_microorganisms() left_join_microorganisms() right_join_microorganisms() full_join_microorganisms() semi_join_microorganisms() anti_join_microorganisms()

-

Join a table with microorganisms

+

Join microorganisms to a data set

@@ -429,7 +429,7 @@

filter_ab_class() filter_aminoglycosides() filter_carbapenems() filter_cephalosporins() filter_1st_cephalosporins() filter_2nd_cephalosporins() filter_3rd_cephalosporins() filter_4th_cephalosporins() filter_5th_cephalosporins() filter_fluoroquinolones() filter_glycopeptides() filter_macrolides() filter_tetracyclines()

-

Filter isolates on result in antibiotic class

+

Filter isolates on result in antimicrobial class

@@ -585,12 +585,6 @@

Translate strings from AMR package

- -

scale_type.mo() scale_type.ab()

- -

Extended functions

- -

like() `%like%` `%like_case%`

@@ -604,9 +598,9 @@ -

vec_ptype2.mo() vec_cast.mo() vec_ptype2.ab() vec_cast.ab()

+

scale_type.mo() scale_type.ab() vec_ptype2.mo() vec_cast.mo() vec_ptype2.ab() vec_cast.ab()

-

vctrs methods

+

Methods for tidyverse

@@ -623,7 +617,7 @@ -

p.symbol() portion_R() portion_IR() portion_I() portion_SI() portion_S() portion_df()

+

portion_R() portion_IR() portion_I() portion_SI() portion_S() portion_df()

Deprecated functions

diff --git a/docs/reference/join.html b/docs/reference/join.html index 6aa3e6a95..62a86c375 100644 --- a/docs/reference/join.html +++ b/docs/reference/join.html @@ -6,7 +6,7 @@ -Join a table with <a href='microorganisms.html'>microorganisms</a> — join • AMR (for R) +Join <a href='microorganisms.html'>microorganisms</a> to a data set — join • AMR (for R) @@ -48,7 +48,7 @@ - + @@ -82,7 +82,7 @@ AMR (for R) - 1.1.0 + 1.1.0.9004 @@ -226,7 +226,7 @@
@@ -264,13 +264,14 @@ ... -

other parameters to pass on to dplyr::join()

+

other parameters to pass on to dplyr::join()

Details

-

Note: As opposed to the dplyr::join() functions of dplyr, character vectors are supported and at default existing columns will get a suffix "2" and the newly joined columns will not get a suffix. See dplyr::join() for more information.

+

Note: As opposed to the join() functions of dplyr, character vectors are supported and at default existing columns will get a suffix "2" and the newly joined columns will not get a suffix.

+

These functions rely on merge(), a base R function to do joins.

Stable lifecycle

@@ -288,6 +289,7 @@ The lifecycle of this function is stableleft_join_microorganisms(as.mo("K. pneumoniae")) left_join_microorganisms("B_KLBSL_PNE") +if (FALSE) { library(dplyr) example_isolates %>% left_join_microorganisms() @@ -299,7 +301,8 @@ The lifecycle of this function is stablestringsAsFactors = FALSE) colnames(df) df_joined <- left_join_microorganisms(df, "bacteria") -colnames(df_joined) +colnames(df_joined) +}
@@ -324,7 +324,8 @@

Details

-

The function key_antibiotics() returns a character vector with 12 antibiotic results for every isolate. These isolates can then be compared using key_antibiotics_equal(), to check if two isolates have generally the same antibiogram. Missing and invalid values are replaced with a dot ("."). The first_isolate() function only uses this function on the same microbial species from the same patient. Using this, an MRSA will be included after a susceptible S. aureus (MSSA) found within the same episode (see episode parameter of first_isolate()). Without key antibiotic comparison it would not.

+

The function key_antibiotics() returns a character vector with 12 antibiotic results for every isolate. These isolates can then be compared using key_antibiotics_equal(), to check if two isolates have generally the same antibiogram. Missing and invalid values are replaced with a dot (".") by key_antibiotics() and ignored by key_antibiotics_equal().

+

The first_isolate() function only uses this function on the same microbial species from the same patient. Using this, e.g. an MRSA will be included after a susceptible S. aureus (MSSA) is found within the same patient episode. Without key antibiotic comparison it would not. See first_isolate() for more info.

At default, the antibiotics that are used for Gram-positive bacteria are:

  • Amoxicillin

  • Amoxicillin/clavulanic acid

  • @@ -427,7 +428,7 @@ The lifecycle of this function is stable
    -

    Site built with pkgdown 1.5.0.

    +

    Site built with pkgdown 1.5.1.

    diff --git a/docs/reference/like.html b/docs/reference/like.html index 1088b1a90..642fe0c14 100644 --- a/docs/reference/like.html +++ b/docs/reference/like.html @@ -82,7 +82,7 @@ AMR (for R) - 1.1.0 + 1.1.0.9004
@@ -260,13 +260,19 @@

Source

-

Idea from the like function from the data.table package, but made it case insensitive at default and let it support multiple patterns. Also, if the regex fails the first time, it tries again with perl = TRUE.

+

Idea from the like function from the data.table package

Value

A logical vector

Details

-

When running a regular expression fails, these functions try again with base::grepl(..., perl = TRUE).

+

The %like% function:

    +
  • Is case insensitive (use %like_case% for case-sensitive matching)

  • +
  • Supports multiple patterns

  • +
  • Checks if pattern is a regular expression and sets fixed = TRUE if not, to greatly improve speed

  • +
  • Tries again with perl = TRUE if regex fails

  • +
+

Using RStudio? This function can also be inserted from the Addins menu and can have its own Keyboard Shortcut like Ctrl+Shift+L or Cmd+Shift+L (see Tools > Modify Keyboard Shortcuts...).

Stable lifecycle

@@ -299,11 +305,11 @@ The lifecycle of this function is stablea %like% b #> TRUE TRUE TRUE -# get frequencies of bacteria whose name start with 'Ent' or 'ent' +# get isolates whose name start with 'Ent' or 'ent' library(dplyr) example_isolates %>% filter(mo_name(mo) %like% "^ent") %>% - freq(mo_genus(mo)) + freq(mo) @@ -467,7 +467,7 @@ A microorganism is categorised as Susceptible, Increased exposure when
-

Site built with pkgdown 1.5.0.

+

Site built with pkgdown 1.5.1.

diff --git a/docs/reference/microorganisms.html b/docs/reference/microorganisms.html index 5f5ee7369..6bdf0b8de 100644 --- a/docs/reference/microorganisms.html +++ b/docs/reference/microorganisms.html @@ -82,7 +82,7 @@ AMR (for R) - 1.1.0.9000 + 1.1.0.9004 diff --git a/docs/reference/mo_property.html b/docs/reference/mo_property.html index 34450164e..3aff8b4d4 100644 --- a/docs/reference/mo_property.html +++ b/docs/reference/mo_property.html @@ -82,7 +82,7 @@ AMR (for R) - 1.1.0.9000 + 1.1.0.9004 diff --git a/docs/reference/pca.html b/docs/reference/pca.html index cd304523d..81a70a4fa 100644 --- a/docs/reference/pca.html +++ b/docs/reference/pca.html @@ -82,7 +82,7 @@ AMR (for R) - 1.1.0 + 1.1.0.9004 @@ -254,7 +254,7 @@ ... -

columns of x to be selected for PCA

+

columns of x to be selected for PCA, can be unquoted since it supports quasiquotation.

retx @@ -315,6 +315,7 @@ The lifecycle of this function is maturing<
# `example_isolates` is a dataset available in the AMR package.
 # See ?example_isolates.
 
+if (FALSE) {
 # calculate the resistance per group first
 library(dplyr)
 resistance_data <- example_isolates %>%
@@ -329,7 +330,8 @@ The lifecycle of this function is maturing<
 pca_result
 summary(pca_result)
 biplot(pca_result)
-ggplot_pca(pca_result) # a new and convenient plot function
+ggplot_pca(pca_result) # a new and convenient plot function +}
-

Site built with pkgdown 1.5.0.

+

Site built with pkgdown 1.5.1.

diff --git a/docs/reference/proportion.html b/docs/reference/proportion.html index cecfbc74b..d122e804a 100644 --- a/docs/reference/proportion.html +++ b/docs/reference/proportion.html @@ -49,7 +49,7 @@ - @@ -83,7 +83,7 @@ resistance() should be used to calculate resistance, susceptibility() should be AMR (for R) - 1.1.0 + 1.1.0.9004 @@ -233,7 +233,7 @@ resistance() should be used to calculate resistance, susceptibility() should be
-

These functions can be used to calculate the (co-)resistance or susceptibility of microbial isolates (i.e. percentage of S, SI, I, IR or R). All functions support quasiquotation with pipes, can be used in summarise()][dplyr::summarise()] and also support grouped variables, please see Examples.

+

These functions can be used to calculate the (co-)resistance or susceptibility of microbial isolates (i.e. percentage of S, SI, I, IR or R). All functions support quasiquotation with pipes, can be used in summarise() from the dplyr package and also supports grouped variables, please see Examples.

resistance() should be used to calculate resistance, susceptibility() should be used to calculate susceptibility.

@@ -323,7 +323,7 @@ resistance() should be used to calculate resistance, susceptibility() should be

The function resistance() is equal to the function proportion_R(). The function susceptibility() is equal to the function proportion_SI().

Remember that you should filter your table to let it contain only first isolates! This is needed to exclude duplicates and to reduce selection bias. Use first_isolate() to determine them in your data set.

These functions are not meant to count isolates, but to calculate the proportion of resistance/susceptibility. Use the count()][AMR::count()] functions to count isolates. The function susceptibility() is essentially equal to count_susceptible() / count_all(). Low counts can influence the outcome - the proportion functions may camouflage this, since they only return the proportion (albeit being dependent on the minimum parameter).

-

The function proportion_df() takes any variable from data that has an rsi class (created with as.rsi()) and calculates the proportions R, I and S. The function rsi_df() works exactly like proportion_df(), but adds the number of isolates.

+

The function proportion_df() takes any variable from data that has an rsi class (created with as.rsi()) and calculates the proportions R, I and S. It also supports grouped variables. The function rsi_df() works exactly like proportion_df(), but adds the number of isolates.

Combination therapy

@@ -336,21 +336,21 @@ resistance() should be used to calculate resistance, susceptibility() should be -------- -------- ---------- ----------- ---------- ----------- S or I S or I X X X X R S or I X X X X - <NA> S or I X X - - + &lt;NA&gt; S or I X X - - S or I R X X X X R R - X - X - <NA> R - - - - - S or I <NA> X X - - - R <NA> - - - - - <NA> <NA> - - - - + &lt;NA&gt; R - - - - + S or I &lt;NA&gt; X X - - + R &lt;NA&gt; - - - - + &lt;NA&gt; &lt;NA&gt; - - - - --------------------------------------------------------------------

Please note that, in combination therapies, for only_all_tested = TRUE applies that:

    count_S()    +   count_I()    +   count_R()    = count_all()
   proportion_S() + proportion_I() + proportion_R() = 1
-

and that, in combination therapies, for only_all_tested = FALSE applies that:

    count_S()    +   count_I()    +   count_R()    >= count_all()
-  proportion_S() + proportion_I() + proportion_R() >= 1
+

and that, in combination therapies, for only_all_tested = FALSE applies that:

    count_S()    +   count_I()    +   count_R()    &gt;= count_all()
+  proportion_S() + proportion_I() + proportion_R() &gt;= 1
 

Using only_all_tested has no impact when only using one antibiotic as input.

@@ -398,6 +398,7 @@ A microorganism is categorised as Susceptible, Increased exposure when proportion_IR(example_isolates$AMX) proportion_R(example_isolates$AMX) +if (FALSE) { library(dplyr) example_isolates %>% group_by(hospital_id) %>% @@ -410,7 +411,7 @@ A microorganism is categorised as Susceptible, Increased exposure when SI = susceptibility(CIP, as_percent = TRUE), n1 = count_all(CIP), # the actual total; sum of all three n2 = n_rsi(CIP), # same - analogous to n_distinct - total = n()) # NOT the number of tested isolates! + total = n()) # NOT the number of tested isolates! # Calculate co-resistance between amoxicillin/clav acid and gentamicin, # so we can see that combination therapy does a lot more than mono therapy: @@ -455,9 +456,6 @@ A microorganism is categorised as Susceptible, Increased exposure when group_by(hospital_id) %>% proportion_df(translate = FALSE) - -if (FALSE) { - # calculate current empiric combination therapy of Helicobacter gastritis: my_table %>% filter(first_isolate == TRUE, @@ -480,7 +478,7 @@ A microorganism is categorised as Susceptible, Increased exposure when
-

Site built with pkgdown 1.5.0.

+

Site built with pkgdown 1.5.1.

diff --git a/docs/reference/reexports.html b/docs/reference/reexports.html index 5134f7e28..c4c938b24 100644 --- a/docs/reference/reexports.html +++ b/docs/reference/reexports.html @@ -87,7 +87,7 @@ below to see their documentation. AMR (for R) - 1.1.0 + 1.1.0.9004 @@ -263,7 +263,7 @@ below to see their documentation.

-

Site built with pkgdown 1.5.0.

+

Site built with pkgdown 1.5.1.

diff --git a/docs/reference/resistance_predict.html b/docs/reference/resistance_predict.html index 9d7a39b0d..509a01123 100644 --- a/docs/reference/resistance_predict.html +++ b/docs/reference/resistance_predict.html @@ -82,7 +82,7 @@ AMR (for R) - 1.1.0 + 1.1.0.9004 @@ -266,7 +266,7 @@ ) # S3 method for resistance_predict -plot(x, main = paste("Resistance Prediction of", x_name), ...) +plot(x, main = paste("Resistance Prediction of", x_name), ...) ggplot_rsi_predict( x, @@ -392,25 +392,25 @@ A microorganism is categorised as Susceptible, Increased exposure when col_ab = "AMX", year_min = 2010, model = "binomial") -plot(x) +plot(x) ggplot_rsi_predict(x) -# use dplyr so you can actually read it: -library(dplyr) -x <- example_isolates %>% - filter_first_isolate() %>% - filter(mo_genus(mo) == "Staphylococcus") %>% - resistance_predict("PEN", model = "binomial") -plot(x) - - -# get the model from the object -mymodel <- attributes(x)$model -summary(mymodel) +# using dplyr: +if (!require("dplyr")) { + library(dplyr) + x <- example_isolates %>% + filter_first_isolate() %>% + filter(mo_genus(mo) == "Staphylococcus") %>% + resistance_predict("PEN", model = "binomial") + plot(x) + # get the model from the object + mymodel <- attributes(x)$model + summary(mymodel) +} # create nice plots with ggplot2 yourself -if (!require(ggplot2)) { +if (!require(ggplot2) & !require("dplyr")) { data <- example_isolates %>% filter(mo == as.mo("E. coli")) %>% @@ -451,7 +451,7 @@ A microorganism is categorised as Susceptible, Increased exposure when
-

Site built with pkgdown 1.5.0.

+

Site built with pkgdown 1.5.1.

diff --git a/docs/reference/translate.html b/docs/reference/translate.html index 9bb47ad30..e97a9da7d 100644 --- a/docs/reference/translate.html +++ b/docs/reference/translate.html @@ -49,7 +49,7 @@ - + @@ -82,7 +82,7 @@ AMR (for R) - 1.1.0 + 1.1.0.9004 @@ -232,7 +232,7 @@
-

For language-dependent output of AMR functions, like mo_name(), mo_type() and ab_name().

+

For language-dependent output of AMR functions, like mo_name(), mo_gramstain(), mo_type() and ab_name().

get_locale()
@@ -241,9 +241,9 @@

Details

Strings will be translated to foreign languages if they are defined in a local translation file. Additions to this file can be suggested at our repository. The file can be found here: https://gitlab.com/msberends/AMR/blob/master/data-raw/translations.tsv.

-

Currently supported languages can be found if running: unique(AMR:::translations_file$lang).

+

Currently supported languages are (besides English): Dutch, French, German, Italian, Portuguese, Spanish. Not all these languages currently have translations available for all antimicrobial agents and colloquial microorganism names.

Please suggest your own translations by creating a new issue on our repository.

-

This file will be read by all functions where a translated output can be desired, like all mo_property() functions (mo_fullname(), mo_type(), etc.).

+

This file will be read by all functions where a translated output can be desired, like all mo_property() functions (mo_name(), mo_gramstain(), mo_type(), etc.).

The system language will be used at default, if that language is supported. The system language can be overwritten with Sys.setenv(AMR_locale = yourlanguage).

Stable lifecycle

@@ -301,7 +301,7 @@ The lifecycle of this function is stable
-

Site built with pkgdown 1.5.0.

+

Site built with pkgdown 1.5.1.

diff --git a/docs/sitemap.xml b/docs/sitemap.xml index f065ffbe7..1e5963487 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -7,7 +7,7 @@ https://msberends.gitlab.io/AMR/reference/AMR-deprecated.html - https://msberends.gitlab.io/AMR/reference/AMR-vctrs.html + https://msberends.gitlab.io/AMR/reference/AMR-tidyverse.html https://msberends.gitlab.io/AMR/reference/AMR.html @@ -72,9 +72,6 @@ https://msberends.gitlab.io/AMR/reference/example_isolates_unclean.html - - https://msberends.gitlab.io/AMR/reference/extended-functions.html - https://msberends.gitlab.io/AMR/reference/filter_ab_class.html diff --git a/man/AMR-deprecated.Rd b/man/AMR-deprecated.Rd index eaa582d5c..7c1720665 100644 --- a/man/AMR-deprecated.Rd +++ b/man/AMR-deprecated.Rd @@ -2,7 +2,6 @@ % Please edit documentation in R/deprecated.R \name{AMR-deprecated} \alias{AMR-deprecated} -\alias{p.symbol} \alias{portion_R} \alias{portion_IR} \alias{portion_I} @@ -11,8 +10,6 @@ \alias{portion_df} \title{Deprecated functions} \usage{ -p.symbol(...) - portion_R(...) portion_IR(...) diff --git a/man/AMR-vctrs.Rd b/man/AMR-tidyverse.Rd similarity index 89% rename from man/AMR-vctrs.Rd rename to man/AMR-tidyverse.Rd index 15f8416f2..4ec9886b8 100644 --- a/man/AMR-vctrs.Rd +++ b/man/AMR-tidyverse.Rd @@ -1,13 +1,19 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/vctrs.R -\name{AMR-vctrs} -\alias{AMR-vctrs} +% Please edit documentation in R/tidyverse.R +\name{AMR-tidyverse} +\alias{AMR-tidyverse} +\alias{scale_type.mo} +\alias{scale_type.ab} \alias{vec_ptype2.mo} \alias{vec_cast.mo} \alias{vec_ptype2.ab} \alias{vec_cast.ab} -\title{\code{vctrs} methods} +\title{Methods for tidyverse} \usage{ +scale_type.mo(x) + +scale_type.ab(x) + vec_ptype2.mo(x, y, ...) vec_cast.mo(x, to, ...) diff --git a/man/as.mic.Rd b/man/as.mic.Rd index 340875726..d0dc3f3ad 100755 --- a/man/as.mic.Rd +++ b/man/as.mic.Rd @@ -56,7 +56,6 @@ as.rsi(x = as.mic(4), plot(mic_data) barplot(mic_data) -freq(mic_data) } \seealso{ \code{\link[=as.rsi]{as.rsi()}} diff --git a/man/as.mo.Rd b/man/as.mo.Rd index 5607e4f19..b6b60956e 100644 --- a/man/as.mo.Rd +++ b/man/as.mo.Rd @@ -39,7 +39,7 @@ This excludes \emph{Enterococci} at default (who are in group D), use \code{Lanc \item{allow_uncertain}{a number between \code{0} (or \code{"none"}) and \code{3} (or \code{"all"}), or \code{TRUE} (= \code{2}) or \code{FALSE} (= \code{0}) to indicate whether the input should be checked for less probable results, please see \emph{Details}} -\item{reference_df}{a \code{\link{data.frame}} to use for extra reference when translating \code{x} to a valid \code{\link{mo}}. See \code{\link[=set_mo_source]{set_mo_source()}} and \code{\link[=get_mo_source]{get_mo_source()}} to automate the usage of your own codes (e.g. used in your analysis or organisation).} +\item{reference_df}{a \code{\link{data.frame}} to be used for extra reference when translating \code{x} to a valid \code{\link{mo}}. See \code{\link[=set_mo_source]{set_mo_source()}} and \code{\link[=get_mo_source]{get_mo_source()}} to automate the usage of your own codes (e.g. used in your analysis or organisation).} \item{...}{other parameters passed on to functions} } diff --git a/man/as.rsi.Rd b/man/as.rsi.Rd index f1c02c0c0..3b12605ba 100755 --- a/man/as.rsi.Rd +++ b/man/as.rsi.Rd @@ -45,7 +45,7 @@ is.rsi.eligible(x, threshold = 0.05) \item{ab}{any (vector of) text that can be coerced to a valid antimicrobial code with \code{\link[=as.ab]{as.ab()}}} -\item{guideline}{defaults to the latest included EUCAST guideline, run \code{unique(rsi_translation$guideline)} for all options} +\item{guideline}{defaults to the latest included EUCAST guideline, see Details for all options} \item{uti}{(Urinary Tract Infection) A vector with \link{logical}s (\code{TRUE} or \code{FALSE}) to specify whether a UTI specific interpretation from the guideline should be chosen. For using \code{\link[=as.rsi]{as.rsi()}} on a \link{data.frame}, this can also be a column containing \link{logical}s or when left blank, the data set will be search for a 'specimen' and rows containing 'urin' in that column will be regarded isolates from a UTI. See \emph{Examples}.} @@ -60,9 +60,11 @@ Ordered factor with new class \code{\link{rsi}} Interpret MIC values and disk diffusion diameters according to EUCAST or CLSI, or clean up existing R/SI values. This transforms the input to a new class \code{\link{rsi}}, which is an ordered factor with levels \verb{S < I < R}. Invalid antimicrobial interpretations will be translated as \code{NA} with a warning. } \details{ -Run \code{unique(rsi_translation$guideline)} for a list of all supported guidelines. The repository of this package contains \href{https://gitlab.com/msberends/AMR/blob/master/data-raw/rsi_translation.txt}{this machine readable version} of these guidelines. +When using \code{\link[=as.rsi]{as.rsi()}} on untransformed data, the data will be cleaned to only contain values S, I and R. When using the function on data with class \code{\link{mic}} (using \code{\link[=as.mic]{as.mic()}}) or class \code{\link{disk}} (using \code{\link[=as.disk]{as.disk()}}), the data will be interpreted based on the guideline set with the \code{guideline} parameter. -These guidelines are machine readable, since \href{https://gitlab.com/msberends/AMR/blob/master/data-raw/rsi_translation.txt}{}. +Supported guidelines to be used as input for the \code{guideline} parameter are: "CLSI 2010", "CLSI 2011", "CLSI 2012", "CLSI 2013", "CLSI 2014", "CLSI 2015", "CLSI 2016", "CLSI 2017", "CLSI 2018", "CLSI 2019", "EUCAST 2011", "EUCAST 2012", "EUCAST 2013", "EUCAST 2014", "EUCAST 2015", "EUCAST 2016", "EUCAST 2017", "EUCAST 2018", "EUCAST 2019", "EUCAST 2020". Simply using \code{"CLSI"} or \code{"EUCAST"} for input will automatically select the latest version of that guideline. + +The repository of this package \href{https://gitlab.com/msberends/AMR/blob/master/data-raw/rsi_translation.txt}{contains a machine readable version} of all guidelines. This is a CSV file consisting of 18,964 rows and 10 columns. This file is machine readable, since it contains one row for every unique combination of the test method (MIC or disk diffusion), the antimicrobial agent and the microorganism. This \strong{allows for easy implementation of these rules in laboratory information systems (LIS)}. After using \code{\link[=as.rsi]{as.rsi()}}, you can use \code{\link[=eucast_rules]{eucast_rules()}} to (1) apply inferred susceptibility and resistance based on results of other antimicrobials and (2) apply intrinsic resistance based on taxonomic properties of a microorganism. @@ -154,7 +156,6 @@ rsi_data <- as.rsi(c(rep("S", 474), rep("I", 36), rep("R", 370))) is.rsi(rsi_data) plot(rsi_data) # for percentages barplot(rsi_data) # for frequencies -freq(rsi_data) # frequency table with informative header library(dplyr) example_isolates \%>\% diff --git a/man/catalogue_of_life_version.Rd b/man/catalogue_of_life_version.Rd index ef1892261..57207c409 100644 --- a/man/catalogue_of_life_version.Rd +++ b/man/catalogue_of_life_version.Rd @@ -28,11 +28,6 @@ This package contains the complete taxonomic tree of almost all microorganisms ( On our website \url{https://msberends.gitlab.io/AMR} you can find \href{https://msberends.gitlab.io/AMR/articles/AMR.html}{a comprehensive tutorial} about how to conduct AMR analysis, the \href{https://msberends.gitlab.io/AMR/reference}{complete documentation of all functions} (which reads a lot easier than here in R) and \href{https://msberends.gitlab.io/AMR/articles/WHONET.html}{an example analysis using WHONET data}. } -\examples{ -library(dplyr) -microorganisms \%>\% freq(kingdom) -microorganisms \%>\% group_by(kingdom) \%>\% freq(phylum, nmax = NULL) -} \seealso{ \link{microorganisms} } diff --git a/man/count.Rd b/man/count.Rd index d7445cae1..0e6aff56d 100644 --- a/man/count.Rd +++ b/man/count.Rd @@ -70,7 +70,7 @@ The function \code{\link[=count_resistant]{count_resistant()}} is equal to the f The function \code{\link[=n_rsi]{n_rsi()}} is an alias of \code{\link[=count_all]{count_all()}}. They can be used to count all available isolates, i.e. where all input antibiotics have an available result (S, I or R). Their use is equal to \code{\link[=n_distinct]{n_distinct()}}. Their function is equal to \code{count_susceptible(...) + count_resistant(...)}. -The function \code{\link[=count_df]{count_df()}} takes any variable from \code{data} that has an \code{\link{rsi}} class (created with \code{\link[=as.rsi]{as.rsi()}}) and counts the number of S's, I's and R's. The function \code{\link[=rsi_df]{rsi_df()}} works exactly like \code{\link[=count_df]{count_df()}}, but adds the percentage of S, I and R. +The function \code{\link[=count_df]{count_df()}} takes any variable from \code{data} that has an \code{\link{rsi}} class (created with \code{\link[=as.rsi]{as.rsi()}}) and counts the number of S's, I's and R's. It also supports grouped variables. The function \code{\link[=rsi_df]{rsi_df()}} works exactly like \code{\link[=count_df]{count_df()}}, but adds the percentage of S, I and R. } \section{Stable lifecycle}{ diff --git a/man/extended-functions.Rd b/man/extended-functions.Rd deleted file mode 100644 index 56a0778c6..000000000 --- a/man/extended-functions.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/extended.R -\name{extended-functions} -\alias{extended-functions} -\alias{scale_type.mo} -\alias{scale_type.ab} -\title{Extended functions} -\usage{ -scale_type.mo(x) - -scale_type.ab(x) -} -\description{ -These functions are extensions of functions in other packages. -} -\section{Stable lifecycle}{ - -\if{html}{\figure{lifecycle_stable.svg}{options: style=margin-bottom:5px} \cr} -The \link[AMR:lifecycle]{lifecycle} of this function is \strong{stable}. In a stable function, we are largely happy with the unlying code, and major changes are unlikely. This means that the unlying code will generally evolve by adding new arguments; we will avoid removing arguments or changing the meaning of existing arguments. - -If the unlying code needs breaking changes, they will occur gradually. To begin with, the function or argument will be deprecated; it will continue to work but will emit an message informing you of the change. Next, typically after at least one newly released version on CRAN, the message will be transformed to an error. -} - -\section{Read more on our website!}{ - -On our website \url{https://msberends.gitlab.io/AMR} you can find \href{https://msberends.gitlab.io/AMR/articles/AMR.html}{a comprehensive tutorial} about how to conduct AMR analysis, the \href{https://msberends.gitlab.io/AMR/reference}{complete documentation of all functions} (which reads a lot easier than here in R) and \href{https://msberends.gitlab.io/AMR/articles/WHONET.html}{an example analysis using WHONET data}. -} - -\keyword{internal} diff --git a/man/filter_ab_class.Rd b/man/filter_ab_class.Rd index 16b7554c4..207392cff 100644 --- a/man/filter_ab_class.Rd +++ b/man/filter_ab_class.Rd @@ -14,7 +14,7 @@ \alias{filter_glycopeptides} \alias{filter_macrolides} \alias{filter_tetracyclines} -\title{Filter isolates on result in antibiotic class} +\title{Filter isolates on result in antimicrobial class} \usage{ filter_ab_class(x, ab_class, result = NULL, scope = "any", ...) @@ -54,7 +54,7 @@ filter_tetracyclines(x, result = NULL, scope = "any", ...) \item{...}{parameters passed on to \code{filter_at} from the \code{dplyr} package} } \description{ -Filter isolates on results in specific antibiotic variables based on their antibiotic class. This makes it easy to filter on isolates that were tested for e.g. any aminoglycoside. +Filter isolates on results in specific antimicrobial classes. This makes it easy to filter on isolates that were tested for e.g. any aminoglycoside. } \details{ The \code{group} column in \link{antibiotics} data set will be searched for \code{ab_class} (case-insensitive). If no results are found, the \code{atc_group1} and \code{atc_group2} columns will be searched. Next, \code{x} will be checked for column names with a value in any abbreviations, codes or official names found in the \link{antibiotics} data set. @@ -68,6 +68,7 @@ If the unlying code needs breaking changes, they will occur gradually. To begin } \examples{ +\dontrun{ library(dplyr) # filter on isolates that have any result for any aminoglycoside @@ -97,3 +98,4 @@ example_isolates \%>\% filter_aminoglycosides("R", "all") \%>\% filter_fluoroquinolones("R", "all") } +} diff --git a/man/first_isolate.Rd b/man/first_isolate.Rd index b77c52620..d6eab0d25 100755 --- a/man/first_isolate.Rd +++ b/man/first_isolate.Rd @@ -6,7 +6,7 @@ \alias{filter_first_weighted_isolate} \title{Determine first (weighted) isolates} \source{ -Methodology of this function is based on: +Methodology of this function is strictly based on: \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition}, 2014, \emph{Clinical and Laboratory Standards Institute (CLSI)}. \url{https://clsi.org/standards/products/microbiology/documents/m39/}. } @@ -142,6 +142,7 @@ On our website \url{https://msberends.gitlab.io/AMR} you can find \href{https:// # `example_isolates` is a dataset available in the AMR package. # See ?example_isolates. +\dontrun{ library(dplyr) # Filter on first isolates: example_isolates \%>\% @@ -162,14 +163,12 @@ B <- example_isolates \%>\% # Have a look at A and B. # B is more reliable because every isolate is counted only once. -# Gentamicin resitance in hospital D appears to be 3.7\% higher than +# Gentamicin resistance in hospital D appears to be 3.7\% higher than # when you (erroneously) would have used all isolates for analysis. ## OTHER EXAMPLES: -\dontrun{ - # Short-hand versions: example_isolates \%>\% filter_first_isolate() diff --git a/man/ggplot_pca.Rd b/man/ggplot_pca.Rd index 9911ea373..dd81ddd89 100644 --- a/man/ggplot_pca.Rd +++ b/man/ggplot_pca.Rd @@ -114,6 +114,7 @@ The \link[AMR:lifecycle]{lifecycle} of this function is \strong{maturing}. The u # `example_isolates` is a dataset available in the AMR package. # See ?example_isolates. +\dontrun{ # See ?pca for more info about Principal Component Analysis (PCA). library(dplyr) pca_model <- example_isolates \%>\% @@ -128,3 +129,4 @@ biplot(pca_model) # new ggplot_pca(pca_model) } +} diff --git a/man/ggplot_rsi.Rd b/man/ggplot_rsi.Rd index f3b157823..e1b907f8a 100644 --- a/man/ggplot_rsi.Rd +++ b/man/ggplot_rsi.Rd @@ -219,29 +219,5 @@ example_isolates \%>\% title = "AMR of Anti-UTI Drugs Per Hospital", x.title = "Hospital", datalabels = FALSE) - -# genuine analysis: check 3 most prevalent microorganisms -example_isolates \%>\% - # create new bacterial ID's, with all CoNS under the same group (Becker et al.) - mutate(mo = as.mo(mo, Becker = TRUE)) \%>\% - # filter on top three bacterial ID's - filter(mo \%in\% top_freq(freq(.$mo), 3)) \%>\% - # filter on first isolates - filter_first_isolate() \%>\% - # get short MO names (like "E. coli") - mutate(bug = mo_shortname(mo, Becker = TRUE)) \%>\% - # select this short name and some antiseptic drugs - select(bug, CXM, GEN, CIP) \%>\% - # group by MO - group_by(bug) \%>\% - # plot the thing, putting MOs on the facet - ggplot_rsi(x = "antibiotic", - facet = "bug", - translate_ab = FALSE, - nrow = 1, - title = "AMR of Top Three Microorganisms In Blood Culture Isolates", - subtitle = expression(paste("Only First Isolates, CoNS grouped according to Becker ", - italic("et al."), " (2014)")), - x.title = "Antibiotic (EARS-Net code)") } } diff --git a/man/join.Rd b/man/join.Rd index 4d6914c70..e53fe763b 100755 --- a/man/join.Rd +++ b/man/join.Rd @@ -9,7 +9,7 @@ \alias{full_join_microorganisms} \alias{semi_join_microorganisms} \alias{anti_join_microorganisms} -\title{Join a table with \link{microorganisms}} +\title{Join \link{microorganisms} to a data set} \usage{ inner_join_microorganisms(x, by = NULL, suffix = c("2", ""), ...) @@ -36,7 +36,9 @@ anti_join_microorganisms(x, by = NULL, ...) Join the data set \link{microorganisms} easily to an existing table or character vector. } \details{ -\strong{Note:} As opposed to the \code{\link[dplyr:join]{dplyr::join()}} functions of \code{dplyr}, \code{\link{character}} vectors are supported and at default existing columns will get a suffix \code{"2"} and the newly joined columns will not get a suffix. See \code{\link[dplyr:join]{dplyr::join()}} for more information. +\strong{Note:} As opposed to the \code{\link[=join]{join()}} functions of \code{dplyr}, \code{\link{character}} vectors are supported and at default existing columns will get a suffix \code{"2"} and the newly joined columns will not get a suffix. + +These functions rely on \code{\link[=merge]{merge()}}, a base R function to do joins. } \section{Stable lifecycle}{ @@ -55,6 +57,7 @@ On our website \url{https://msberends.gitlab.io/AMR} you can find \href{https:// left_join_microorganisms(as.mo("K. pneumoniae")) left_join_microorganisms("B_KLBSL_PNE") +\dontrun{ library(dplyr) example_isolates \%>\% left_join_microorganisms() @@ -68,3 +71,4 @@ colnames(df) df_joined <- left_join_microorganisms(df, "bacteria") colnames(df_joined) } +} diff --git a/man/key_antibiotics.Rd b/man/key_antibiotics.Rd index 07fce8de0..8a2b53524 100755 --- a/man/key_antibiotics.Rd +++ b/man/key_antibiotics.Rd @@ -68,7 +68,9 @@ key_antibiotics_equal( These function can be used to determine first isolates (see \code{\link[=first_isolate]{first_isolate()}}). Using key antibiotics to determine first isolates is more reliable than without key antibiotics. These selected isolates will then be called first \emph{weighted} isolates. } \details{ -The function \code{\link[=key_antibiotics]{key_antibiotics()}} returns a character vector with 12 antibiotic results for every isolate. These isolates can then be compared using \code{\link[=key_antibiotics_equal]{key_antibiotics_equal()}}, to check if two isolates have generally the same antibiogram. Missing and invalid values are replaced with a dot (\code{"."}). The \code{\link[=first_isolate]{first_isolate()}} function only uses this function on the same microbial species from the same patient. Using this, an MRSA will be included after a susceptible \emph{S. aureus} (MSSA) found within the same episode (see \code{episode} parameter of \code{\link[=first_isolate]{first_isolate()}}). Without key antibiotic comparison it would not. +The function \code{\link[=key_antibiotics]{key_antibiotics()}} returns a character vector with 12 antibiotic results for every isolate. These isolates can then be compared using \code{\link[=key_antibiotics_equal]{key_antibiotics_equal()}}, to check if two isolates have generally the same antibiogram. Missing and invalid values are replaced with a dot (\code{"."}) by \code{\link[=key_antibiotics]{key_antibiotics()}} and ignored by \code{\link[=key_antibiotics_equal]{key_antibiotics_equal()}}. + +The \code{\link[=first_isolate]{first_isolate()}} function only uses this function on the same microbial species from the same patient. Using this, e.g. an MRSA will be included after a susceptible \emph{S. aureus} (MSSA) is found within the same patient episode. Without key antibiotic comparison it would not. See \code{\link[=first_isolate]{first_isolate()}} for more info. At default, the antibiotics that are used for \strong{Gram-positive bacteria} are: \itemize{ diff --git a/man/like.Rd b/man/like.Rd index d27cf7f94..818ca122e 100755 --- a/man/like.Rd +++ b/man/like.Rd @@ -6,7 +6,7 @@ \alias{\%like_case\%} \title{Pattern Matching} \source{ -Idea from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default and let it support multiple patterns. Also, if the regex fails the first time, it tries again with \code{perl = TRUE}. +Idea from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package} } \usage{ like(x, pattern, ignore.case = TRUE) @@ -29,7 +29,13 @@ A \code{\link{logical}} vector Convenient wrapper around \code{\link[=grep]{grep()}} to match a pattern: \code{x \%like\% pattern}. It always returns a \code{\link{logical}} vector and is always case-insensitive (use \code{x \%like_case\% pattern} for case-sensitive matching). Also, \code{pattern} can be as long as \code{x} to compare items of each index in both vectors, or they both can have the same length to iterate over all cases. } \details{ -When running a regular expression fails, these functions try again with \code{base::grepl(..., perl = TRUE)}. +The \verb{\%like\%} function: +\itemize{ +\item Is case insensitive (use \verb{\%like_case\%} for case-sensitive matching) +\item Supports multiple patterns +\item Checks if \code{pattern} is a regular expression and sets \code{fixed = TRUE} if not, to greatly improve speed +\item Tries again with \code{perl = TRUE} if regex fails +} Using RStudio? This function can also be inserted from the Addins menu and can have its own Keyboard Shortcut like \code{Ctrl+Shift+L} or \code{Cmd+Shift+L} (see \code{Tools} > \verb{Modify Keyboard Shortcuts...}). } @@ -61,11 +67,11 @@ b <- c( "case", "diff", "yet") a \%like\% b #> TRUE TRUE TRUE -# get frequencies of bacteria whose name start with 'Ent' or 'ent' +# get isolates whose name start with 'Ent' or 'ent' library(dplyr) example_isolates \%>\% - filter(mo_name(mo) \%like\% "^ent") \%>\% - freq(mo_genus(mo)) + filter(mo_name(mo) \%like\% "^ent") \%>\% + freq(mo) } \seealso{ \code{\link[base:grep]{base::grep()}} diff --git a/man/pca.Rd b/man/pca.Rd index a0142337b..e0f926d96 100644 --- a/man/pca.Rd +++ b/man/pca.Rd @@ -17,7 +17,7 @@ pca( \arguments{ \item{x}{a \link{data.frame} containing numeric columns} -\item{...}{columns of \code{x} to be selected for PCA} +\item{...}{columns of \code{x} to be selected for PCA, can be unquoted since it supports quasiquotation.} \item{retx}{a logical value indicating whether the rotated variables should be returned.} @@ -69,6 +69,7 @@ The \link[AMR:lifecycle]{lifecycle} of this function is \strong{maturing}. The u # `example_isolates` is a dataset available in the AMR package. # See ?example_isolates. +\dontrun{ # calculate the resistance per group first library(dplyr) resistance_data <- example_isolates \%>\% @@ -85,3 +86,4 @@ summary(pca_result) biplot(pca_result) ggplot_pca(pca_result) # a new and convenient plot function } +} diff --git a/man/proportion.Rd b/man/proportion.Rd index 63cb766d7..4deadbb62 100644 --- a/man/proportion.Rd +++ b/man/proportion.Rd @@ -74,7 +74,7 @@ rsi_df( A \code{\link{double}} or, when \code{as_percent = TRUE}, a \code{\link{character}}. } \description{ -These functions can be used to calculate the (co-)resistance or susceptibility of microbial isolates (i.e. percentage of S, SI, I, IR or R). All functions support quasiquotation with pipes, can be used in \code{summarise()}][dplyr::summarise()] and also support grouped variables, please see \emph{Examples}. +These functions can be used to calculate the (co-)resistance or susceptibility of microbial isolates (i.e. percentage of S, SI, I, IR or R). All functions support quasiquotation with pipes, can be used in \code{\link[=summarise]{summarise()}} from the \code{dplyr} package and also supports grouped variables, please see \emph{Examples}. \code{\link[=resistance]{resistance()}} should be used to calculate resistance, \code{\link[=susceptibility]{susceptibility()}} should be used to calculate susceptibility.\cr } @@ -85,7 +85,7 @@ The function \code{\link[=resistance]{resistance()}} is equal to the function \c These functions are not meant to count isolates, but to calculate the proportion of resistance/susceptibility. Use the \code{count()}][AMR::count()] functions to count isolates. The function \code{\link[=susceptibility]{susceptibility()}} is essentially equal to \code{count_susceptible() / count_all()}. \emph{Low counts can influence the outcome - the \code{proportion} functions may camouflage this, since they only return the proportion (albeit being dependent on the \code{minimum} parameter).} -The function \code{\link[=proportion_df]{proportion_df()}} takes any variable from \code{data} that has an \code{\link{rsi}} class (created with \code{\link[=as.rsi]{as.rsi()}}) and calculates the proportions R, I and S. The function \code{\link[=rsi_df]{rsi_df()}} works exactly like \code{\link[=proportion_df]{proportion_df()}}, but adds the number of isolates. +The function \code{\link[=proportion_df]{proportion_df()}} takes any variable from \code{data} that has an \code{\link{rsi}} class (created with \code{\link[=as.rsi]{as.rsi()}}) and calculates the proportions R, I and S. It also supports grouped variables. The function \code{\link[=rsi_df]{rsi_df()}} works exactly like \code{\link[=proportion_df]{proportion_df()}}, but adds the number of isolates. } \section{Combination therapy}{ @@ -160,6 +160,7 @@ proportion_I(example_isolates$AMX) proportion_IR(example_isolates$AMX) proportion_R(example_isolates$AMX) +\dontrun{ library(dplyr) example_isolates \%>\% group_by(hospital_id) \%>\% @@ -217,9 +218,6 @@ example_isolates \%>\% group_by(hospital_id) \%>\% proportion_df(translate = FALSE) - -\dontrun{ - # calculate current empiric combination therapy of Helicobacter gastritis: my_table \%>\% filter(first_isolate == TRUE, diff --git a/man/resistance_predict.Rd b/man/resistance_predict.Rd index 3356f1b38..6b4d159e6 100644 --- a/man/resistance_predict.Rd +++ b/man/resistance_predict.Rd @@ -134,22 +134,22 @@ x <- resistance_predict(example_isolates, plot(x) ggplot_rsi_predict(x) -# use dplyr so you can actually read it: -library(dplyr) -x <- example_isolates \%>\% - filter_first_isolate() \%>\% - filter(mo_genus(mo) == "Staphylococcus") \%>\% - resistance_predict("PEN", model = "binomial") -plot(x) - - -# get the model from the object -mymodel <- attributes(x)$model -summary(mymodel) +# using dplyr: +if (!require("dplyr")) { + library(dplyr) + x <- example_isolates \%>\% + filter_first_isolate() \%>\% + filter(mo_genus(mo) == "Staphylococcus") \%>\% + resistance_predict("PEN", model = "binomial") + plot(x) + # get the model from the object + mymodel <- attributes(x)$model + summary(mymodel) +} # create nice plots with ggplot2 yourself -if (!require(ggplot2)) { +if (!require(ggplot2) & !require("dplyr")) { data <- example_isolates \%>\% filter(mo == as.mo("E. coli")) \%>\% diff --git a/man/translate.Rd b/man/translate.Rd index 69db4fefd..d8199d5a6 100644 --- a/man/translate.Rd +++ b/man/translate.Rd @@ -8,16 +8,16 @@ get_locale() } \description{ -For language-dependent output of AMR functions, like \code{\link[=mo_name]{mo_name()}}, \code{\link[=mo_type]{mo_type()}} and \code{\link[=ab_name]{ab_name()}}. +For language-dependent output of AMR functions, like \code{\link[=mo_name]{mo_name()}}, \code{\link[=mo_gramstain]{mo_gramstain()}}, \code{\link[=mo_type]{mo_type()}} and \code{\link[=ab_name]{ab_name()}}. } \details{ Strings will be translated to foreign languages if they are defined in a local translation file. Additions to this file can be suggested at our repository. The file can be found here: \url{https://gitlab.com/msberends/AMR/blob/master/data-raw/translations.tsv}. -Currently supported languages can be found if running: \code{unique(AMR:::translations_file$lang)}. +Currently supported languages are (besides English): Dutch, French, German, Italian, Portuguese, Spanish. Not all these languages currently have translations available for all antimicrobial agents and colloquial microorganism names. Please suggest your own translations \href{https://gitlab.com/msberends/AMR/issues/new?issue[title]=Translation\%20suggestion}{by creating a new issue on our repository}. -This file will be read by all functions where a translated output can be desired, like all \code{\link[=mo_property]{mo_property()}} functions (\code{\link[=mo_fullname]{mo_fullname()}}, \code{\link[=mo_type]{mo_type()}}, etc.). +This file will be read by all functions where a translated output can be desired, like all \code{\link[=mo_property]{mo_property()}} functions (\code{\link[=mo_name]{mo_name()}}, \code{\link[=mo_gramstain]{mo_gramstain()}}, \code{\link[=mo_type]{mo_type()}}, etc.). The system language will be used at default, if that language is supported. The system language can be overwritten with \code{Sys.setenv(AMR_locale = yourlanguage)}. } diff --git a/tests/testthat/test-count.R b/tests/testthat/test-count.R index 615fb8137..fab02430a 100644 --- a/tests/testthat/test-count.R +++ b/tests/testthat/test-count.R @@ -54,7 +54,7 @@ test_that("counts work", { combination = count_susceptible(CIP, GEN)) %>% pull(combination), c(253, 465, 192, 558)) - + # count_df expect_equal( example_isolates %>% select(AMX) %>% count_df() %>% pull(value), diff --git a/tests/testthat/test-data.R b/tests/testthat/test-data.R index 201e2e9f2..b2b79772a 100644 --- a/tests/testthat/test-data.R +++ b/tests/testthat/test-data.R @@ -51,17 +51,17 @@ test_that("data sets are valid", { }) test_that("creation of data sets is valid", { - DT <- make_DT() - expect_lt(nrow(DT[prevalence == 1]), nrow(DT[prevalence == 2])) - expect_lt(nrow(DT[prevalence == 2]), nrow(DT[prevalence == 3])) + df <- create_MO_lookup() + expect_lt(nrow(df[which(df$prevalence == 1), ]), nrow(df[which(df$prevalence == 2), ])) + expect_lt(nrow(df[which(df$prevalence == 2), ]), nrow(df[which(df$prevalence == 3), ])) expect_true(all(c("mo", "fullname", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "rank", "col_id", "species_id", "source", "ref", "prevalence", - "kingdom_index", "fullname_lower", "g_species") %in% colnames(DT))) + "kingdom_index", "fullname_lower", "g_species") %in% colnames(df))) - oldDT <- make_oldDT() + olddf <- create_MO.old_lookup() expect_true(all(c("col_id", "col_id_new", "fullname", "ref", "prevalence", - "fullname_lower", "g_species") %in% colnames(oldDT))) + "fullname_lower", "g_species") %in% colnames(olddf))) old <- make_trans_tbl() expect_gt(length(old), 0) diff --git a/tests/testthat/test-deprecated.R b/tests/testthat/test-deprecated.R index 290c1d060..c771e1981 100644 --- a/tests/testthat/test-deprecated.R +++ b/tests/testthat/test-deprecated.R @@ -22,9 +22,6 @@ context("deprecated.R") test_that("deprecated functions work", { - expect_identical(suppressWarnings(p.symbol(seq(0, 1, 0.001))), - p_symbol(seq(0, 1, 0.001))) - expect_equal(suppressWarnings(portion_S(example_isolates$AMX)), proportion_S(example_isolates$AMX)) expect_equal(suppressWarnings(portion_SI(example_isolates$AMX)), proportion_SI(example_isolates$AMX)) expect_equal(suppressWarnings(portion_I(example_isolates$AMX)), proportion_I(example_isolates$AMX)) diff --git a/tests/testthat/test-eucast_rules.R b/tests/testthat/test-eucast_rules.R index 8f8af659f..cfd875c00 100755 --- a/tests/testthat/test-eucast_rules.R +++ b/tests/testthat/test-eucast_rules.R @@ -24,24 +24,24 @@ context("eucast_rules.R") test_that("EUCAST rules work", { skip_on_cran() - + # thoroughly check input table expect_equal(colnames(eucast_rules_file), c("if_mo_property", "like.is.one_of", "this_value", "and_these_antibiotics", "have_these_values", "then_change_these_antibiotics", "to_value", "reference.rule", "reference.rule_group")) - + expect_error(suppressWarnings(eucast_rules(example_isolates, col_mo = "Non-existing"))) expect_error(eucast_rules(x = "text")) expect_error(eucast_rules(data.frame(a = "test"))) expect_error(eucast_rules(data.frame(mo = "test"), rules = "invalid rules set")) expect_warning(eucast_rules(data.frame(mo = "Escherichia coli", vancomycin = "S", stringsAsFactors = TRUE))) - + expect_identical(colnames(example_isolates), colnames(suppressWarnings(eucast_rules(example_isolates)))) - + a <- data.frame(mo = c("Klebsiella pneumoniae", "Pseudomonas aeruginosa", "Enterobacter aerogenes"), @@ -54,7 +54,7 @@ test_that("EUCAST rules work", { stringsAsFactors = FALSE) expect_identical(suppressWarnings(eucast_rules(a, "mo", info = FALSE)), b) expect_identical(suppressWarnings(eucast_rules(a, "mo", info = TRUE)), b) - + a <- data.frame(mo = c("Staphylococcus aureus", "Streptococcus group A"), COL = "-", # Colistin @@ -64,7 +64,7 @@ test_that("EUCAST rules work", { COL = "R", # Colistin stringsAsFactors = FALSE) expect_equal(suppressWarnings(eucast_rules(a, "mo", info = FALSE)), b) - + # piperacillin must be R in Enterobacteriaceae when tica is R library(dplyr) expect_equal(suppressWarnings( @@ -78,25 +78,17 @@ test_that("EUCAST rules work", { unique() %>% as.character()), "R") - + # Azithromicin and Clarythromycin must be equal to Erythromycin - a <- suppressWarnings( - example_isolates %>% - transmute(mo, - ERY, - AZM = as.rsi("R"), - CLR = as.rsi("R")) %>% - eucast_rules(col_mo = "mo") %>% - pull(CLR)) - b <- suppressWarnings( - example_isolates %>% - select(mo, ERY) %>% - eucast_rules(col_mo = "mo") %>% - pull(ERY)) - + a <- eucast_rules(data.frame(mo = example_isolates$mo, + ERY = example_isolates$ERY, + AZM = as.rsi("R"), + CLR = as.rsi("R"), + stringsAsFactors = FALSE))$CLR + b <- example_isolates$ERY expect_identical(a[!is.na(b)], b[!is.na(b)]) - + # amox is inferred by benzylpenicillin in Kingella kingae expect_equal( suppressWarnings( @@ -108,11 +100,11 @@ test_that("EUCAST rules work", { , info = FALSE))$AMX ), "S") - + # also test norf expect_output(suppressWarnings(eucast_rules(example_isolates %>% mutate(NOR = "S", NAL = "S"), info = TRUE))) - + # check verbose output expect_output(suppressWarnings(eucast_rules(example_isolates, verbose = TRUE, info = TRUE))) - + }) diff --git a/tests/testthat/test-filter_ab_class.R b/tests/testthat/test-filter_ab_class.R index e09e114ba..84dc20d8e 100644 --- a/tests/testthat/test-filter_ab_class.R +++ b/tests/testthat/test-filter_ab_class.R @@ -22,7 +22,8 @@ context("filter_ab_class.R") test_that("ATC-group filtering works", { - library(dplyr) + skip_on_cran() + expect_gt(example_isolates %>% filter_ab_class("carbapenem") %>% nrow(), 0) expect_gt(example_isolates %>% filter_aminoglycosides() %>% nrow(), 0) expect_gt(example_isolates %>% filter_carbapenems() %>% nrow(), 0) @@ -40,5 +41,5 @@ test_that("ATC-group filtering works", { expect_error(example_isolates %>% filter_carbapenems(result = "test")) expect_error(example_isolates %>% filter_carbapenems(scope = "test")) - expect_warning(example_isolates %>% select(1:3) %>% filter_carbapenems()) + expect_message(example_isolates %>% select(1:3) %>% filter_carbapenems()) }) diff --git a/tests/testthat/test-first_isolate.R b/tests/testthat/test-first_isolate.R index 73c5cf6a7..eff0ff19a 100755 --- a/tests/testthat/test-first_isolate.R +++ b/tests/testthat/test-first_isolate.R @@ -37,34 +37,23 @@ test_that("first isolates work", { 1317) # first weighted isolates + ex_iso_with_keyab <- example_isolates + ex_iso_with_keyab$keyab <- key_antibiotics(example_isolates, warnings = FALSE) expect_equal( suppressWarnings( sum( - first_isolate(x = example_isolates %>% mutate(keyab = key_antibiotics(.)), + first_isolate(x = ex_iso_with_keyab, # let syntax determine arguments automatically type = "keyantibiotics", info = TRUE), na.rm = TRUE)), 1413) - # should be same for tibbles - expect_equal( - suppressWarnings( - sum( - first_isolate(x = example_isolates %>% dplyr::as_tibble() %>% mutate(keyab = key_antibiotics(.)), - # let syntax determine these automatically: - # col_date = "date", - # col_patient_id = "patient_id", - # col_mo = "mo", - # col_keyantibiotics = "keyab", - type = "keyantibiotics", - info = TRUE), - na.rm = TRUE)), - 1413) + # when not ignoring I expect_equal( suppressWarnings( sum( - first_isolate(x = example_isolates %>% mutate(keyab = key_antibiotics(.)), + first_isolate(x = ex_iso_with_keyab, col_date = "date", col_patient_id = "patient_id", col_mo = "mo", @@ -78,7 +67,7 @@ test_that("first isolates work", { expect_equal( suppressWarnings( sum( - first_isolate(x = example_isolates %>% mutate(keyab = key_antibiotics(.)), + first_isolate(x = ex_iso_with_keyab, col_date = "date", col_patient_id = "patient_id", col_mo = "mo", @@ -99,16 +88,16 @@ test_that("first isolates work", { info = TRUE, icu_exclude = TRUE), na.rm = TRUE), - 906) + 891) # set 1500 random observations to be of specimen type 'Urine' random_rows <- sample(x = 1:2000, size = 1500, replace = FALSE) + x <- example_isolates + x$specimen <- "Other" + x[random_rows, "specimen"] <- "Urine" expect_lt( sum( - first_isolate(x = mutate(example_isolates, - specimen = if_else(row_number() %in% random_rows, - "Urine", - "Other")), + first_isolate(x = x, col_date = "date", col_patient_id = "patient_id", col_mo = "mo", @@ -120,10 +109,7 @@ test_that("first isolates work", { # same, but now exclude ICU expect_lt( sum( - first_isolate(x = mutate(example_isolates, - specimen = if_else(row_number() %in% random_rows, - "Urine", - "Other")), + first_isolate(x = x, col_date = "date", col_patient_id = "patient_id", col_mo = "mo", @@ -136,17 +122,19 @@ test_that("first isolates work", { 1501) # "No isolates found" - expect_message(example_isolates %>% - mutate(specimen = "test") %>% - mutate(first = first_isolate(., "date", "patient_id", - col_mo = "mo", - col_specimen = "specimen", - filter_specimen = "something_unexisting", - info = TRUE))) + test_iso <- example_isolates + test_iso$specimen <- "test" + expect_message(first_isolate(test_iso, + "date", + "patient_id", + col_mo = "mo", + col_specimen = "specimen", + filter_specimen = "something_unexisting", + info = TRUE)) # printing of exclusion message - expect_message(example_isolates %>% - first_isolate(col_date = "date", + expect_message(first_isolate(example_isolates, + col_date = "date", col_mo = "mo", col_patient_id = "patient_id", col_testcode = "gender", @@ -190,23 +178,15 @@ test_that("first isolates work", { 1322) # unknown MOs - expect_equal(example_isolates %>% - mutate(mo = ifelse(mo == "B_ESCHR_COLI", "UNKNOWN", mo)) %>% - mutate(first = first_isolate(., include_unknown = FALSE)) %>% - .$first %>% - sum(), + test_unknown <- example_isolates + test_unknown$mo <- ifelse(test_unknown$mo == "B_ESCHR_COLI", "UNKNOWN", test_unknown$mo) + expect_equal(sum(first_isolate(test_unknown, include_unknown = FALSE)), 1062) - expect_equal(example_isolates %>% - mutate(mo = ifelse(mo == "B_ESCHR_COLI", "UNKNOWN", mo)) %>% - mutate(first = first_isolate(., include_unknown = TRUE)) %>% - .$first %>% - sum(), + expect_equal(sum(first_isolate(test_unknown, include_unknown = TRUE)), 1529) - expect_equal(example_isolates %>% - mutate(mo = ifelse(mo == "B_ESCHR_COLI", NA, mo)) %>% - mutate(first = first_isolate(.)) %>% - .$first %>% - sum(), + + test_unknown$mo <- ifelse(test_unknown$mo == "UNKNOWN", NA, test_unknown$mo) + expect_equal(sum(first_isolate(test_unknown)), 1062) }) diff --git a/tests/testthat/test-mdro.R b/tests/testthat/test-mdro.R index 324f05ac6..1b9c2b68d 100755 --- a/tests/testthat/test-mdro.R +++ b/tests/testthat/test-mdro.R @@ -25,8 +25,6 @@ test_that("mdro works", { skip_on_cran() - library(dplyr) - expect_error(suppressWarnings(mdro(example_isolates, country = "invalid", col_mo = "mo", info = TRUE))) expect_error(suppressWarnings(mdro(example_isolates, country = "fr", info = TRUE))) expect_error(mdro(example_isolates, guideline = c("BRMO", "MRGN"), info = TRUE)) @@ -100,7 +98,7 @@ test_that("mdro works", { expect_equal( # select only rifampicine, mo will be determined automatically (as M. tuberculosis), # number of mono-resistant strains should be equal to number of rifampicine-resistant strains - example_isolates %>% select(RIF) %>% mdr_tb() %>% freq() %>% pull(count) %>% .[2], + freq(mdr_tb(example_isolates[, "RIF", drop = FALSE]))$count[2], count_R(example_isolates$RIF)) sample_rsi <- function() { @@ -109,69 +107,113 @@ test_that("mdro works", { prob = c(0.5, 0.1, 0.4), replace = TRUE) } - expect_gt( - #suppressWarnings( - data.frame(rifampicin = sample_rsi(), - inh = sample_rsi(), - gatifloxacin = sample_rsi(), - eth = sample_rsi(), - pza = sample_rsi(), - MFX = sample_rsi(), - KAN = sample_rsi()) %>% - mdr_tb() %>% - n_distinct() - #) - , - 2) + x <- data.frame(rifampicin = sample_rsi(), + inh = sample_rsi(), + gatifloxacin = sample_rsi(), + eth = sample_rsi(), + pza = sample_rsi(), + MFX = sample_rsi(), + KAN = sample_rsi()) + expect_gt(n_distinct(mdr_tb(x)), 2) # check the guideline by Magiorakos et al. (2012), the default guideline - stau <- tribble( - ~mo, ~GEN, ~RIF, ~CPT, ~OXA, ~CIP, ~MFX, ~SXT, ~FUS, ~VAN, ~TEC, ~TLV, ~TGC, ~CLI, ~DAP, ~ERY, ~LNZ, ~CHL, ~FOS, ~QDA, ~TCY, ~DOX, ~MNO, - "S. aureus", "R", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", - "S. aureus", "R", "R", "R", "R", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", - "S. aureus", "S", "S", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", - "S. aureus", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R" - ) + stau <- data.frame(mo = c("S. aureus", "S. aureus", "S. aureus", "S. aureus"), + GEN = c("R", "R", "S", "R"), + RIF = c("S", "R", "S", "R"), + CPT = c("S", "R", "R", "R"), + OXA = c("S", "R", "R", "R"), + CIP = c("S", "S", "R", "R"), + MFX = c("S", "S", "R", "R"), + SXT = c("S", "S", "R", "R"), + FUS = c("S", "S", "R", "R"), + VAN = c("S", "S", "R", "R"), + TEC = c("S", "S", "R", "R"), + TLV = c("S", "S", "R", "R"), + TGC = c("S", "S", "R", "R"), + CLI = c("S", "S", "R", "R"), + DAP = c("S", "S", "R", "R"), + ERY = c("S", "S", "R", "R"), + LNZ = c("S", "S", "R", "R"), + CHL = c("S", "S", "R", "R"), + FOS = c("S", "S", "R", "R"), + QDA = c("S", "S", "R", "R"), + TCY = c("S", "S", "R", "R"), + DOX = c("S", "S", "R", "R"), + MNO = c("S", "S", "R", "R"), + stringsAsFactors = FALSE) expect_equal(as.integer(mdro(stau)), c(1:4)) expect_s3_class(mdro(stau, verbose = TRUE), "data.frame") - ente <- tribble( - ~mo, ~GEH, ~STH, ~IPM, ~MEM, ~DOR, ~CIP, ~LVX, ~MFX, ~VAN, ~TEC, ~TGC, ~DAP, ~LNZ, ~AMP, ~QDA, ~DOX, ~MNO, - "Enterococcus", "R", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", - "Enterococcus", "R", "R", "R", "R", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", - "Enterococcus", "S", "S", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", - "Enterococcus", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R" - ) + ente <- data.frame(mo = c("Enterococcus", "Enterococcus", "Enterococcus", "Enterococcus"), + GEH = c("R", "R", "S", "R"), + STH = c("S", "R", "S", "R"), + IPM = c("S", "R", "R", "R"), + MEM = c("S", "R", "R", "R"), + DOR = c("S", "S", "R", "R"), + CIP = c("S", "S", "R", "R"), + LVX = c("S", "S", "R", "R"), + MFX = c("S", "S", "R", "R"), + VAN = c("S", "S", "R", "R"), + TEC = c("S", "S", "R", "R"), + TGC = c("S", "S", "R", "R"), + DAP = c("S", "S", "R", "R"), + LNZ = c("S", "S", "R", "R"), + AMP = c("S", "S", "R", "R"), + QDA = c("S", "S", "R", "R"), + DOX = c("S", "S", "R", "R"), + MNO = c("S", "S", "R", "R"), + stringsAsFactors = FALSE) expect_equal(as.integer(mdro(ente)), c(1:4)) expect_s3_class(mdro(ente, verbose = TRUE), "data.frame") - entero <- tribble( - ~mo, ~GEN, ~TOB, ~AMK, ~NET, ~CPT, ~TCC, ~TZP, ~ETP, ~IPM, ~MEM, ~DOR, ~CZO, ~CXM, ~CTX, ~CAZ, ~FEP, ~FOX, ~CTT, ~CIP, ~SXT, ~TGC, ~ATM, ~AMP, ~AMC, ~SAM, ~CHL, ~FOS, ~COL, ~TCY, ~DOX, ~MNO, - "E. coli", "R", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", - "E. coli", "R", "R", "R", "R", "R", "R", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", - "E. coli", "S", "S", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", - "E. coli", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R" - ) + entero <- data.frame(mo = c("E. coli", "E. coli", "E. coli", "E. coli"), + GEN = c("R", "R", "S", "R"), TOB = c("S", "R", "S", "R"), + AMK = c("S", "R", "R", "R"), NET = c("S", "R", "R", "R"), + CPT = c("S", "R", "R", "R"), TCC = c("S", "R", "R", "R"), + TZP = c("S", "S", "R", "R"), ETP = c("S", "S", "R", "R"), + IPM = c("S", "S", "R", "R"), MEM = c("S", "S", "R", "R"), + DOR = c("S", "S", "R", "R"), CZO = c("S", "S", "R", "R"), + CXM = c("S", "S", "R", "R"), CTX = c("S", "S", "R", "R"), + CAZ = c("S", "S", "R", "R"), FEP = c("S", "S", "R", "R"), + FOX = c("S", "S", "R", "R"), CTT = c("S", "S", "R", "R"), + CIP = c("S", "S", "R", "R"), SXT = c("S", "S", "R", "R"), + TGC = c("S", "S", "R", "R"), ATM = c("S", "S", "R", "R"), + AMP = c("S", "S", "R", "R"), AMC = c("S", "S", "R", "R"), + SAM = c("S", "S", "R", "R"), CHL = c("S", "S", "R", "R"), + FOS = c("S", "S", "R", "R"), COL = c("S", "S", "R", "R"), + TCY = c("S", "S", "R", "R"), DOX = c("S", "S", "R", "R"), + MNO = c("S", "S", "R", "R"), + stringsAsFactors = FALSE) expect_equal(as.integer(mdro(entero)), c(1:4)) expect_s3_class(mdro(entero, verbose = TRUE), "data.frame") - pseud <- tribble( - ~mo, ~GEN, ~TOB, ~AMK, ~NET, ~IPM, ~MEM, ~DOR, ~CAZ, ~FEP, ~CIP, ~LVX, ~TCC, ~TZP, ~ATM, ~FOS, ~COL, ~PLB, - "P. aeruginosa", "R", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", - "P. aeruginosa", "R", "S", "S", "S", "R", "S", "S", "S", "R", "S", "S", "S", "S", "S", "S", "S", "S", - "P. aeruginosa", "S", "S", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", - "P. aeruginosa", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R" - ) + pseud <- data.frame(mo = c("P. aeruginosa", "P. aeruginosa", "P. aeruginosa", "P. aeruginosa"), + GEN = c("R", "R", "S", "R"), TOB = c("S", "S", "S", "R"), + AMK = c("S", "S", "R", "R"), NET = c("S", "S", "R", "R"), + IPM = c("S", "R", "R", "R"), MEM = c("S", "S", "R", "R"), + DOR = c("S", "S", "R", "R"), CAZ = c("S", "S", "R", "R"), + FEP = c("S", "R", "R", "R"), CIP = c("S", "S", "R", "R"), + LVX = c("S", "S", "R", "R"), TCC = c("S", "S", "R", "R"), + TZP = c("S", "S", "R", "R"), ATM = c("S", "S", "R", "R"), + FOS = c("S", "S", "R", "R"), COL = c("S", "S", "R", "R"), + PLB = c("S", "S", "R", "R"), + stringsAsFactors = FALSE) expect_equal(as.integer(mdro(pseud)), c(1:4)) expect_s3_class(mdro(pseud, verbose = TRUE), "data.frame") - acin <- tribble( - ~mo, ~GEN, ~TOB, ~AMK, ~NET, ~IPM, ~MEM, ~DOR, ~CIP, ~LVX, ~TZP, ~TCC, ~CTX, ~CRO, ~CAZ, ~FEP, ~SXT, ~SAM, ~COL, ~PLB, ~TCY, ~DOX, ~MNO, - "A. baumannii", "R", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", - "A. baumannii", "R", "R", "R", "R", "S", "R", "S", "S", "S", "S", "S", "S", "S", "S", "R", "S", "S", "S", "S", "S", "S", "S", - "A. baumannii", "S", "S", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", - "A. baumannii", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R" - ) + acin <- data.frame(mo = c("A. baumannii", "A. baumannii", "A. baumannii", "A. baumannii"), + GEN = c("R", "R", "S", "R"), TOB = c("S", "R", "S", "R"), + AMK = c("S", "R", "R", "R"), NET = c("S", "R", "R", "R"), + IPM = c("S", "S", "R", "R"), MEM = c("S", "R", "R", "R"), + DOR = c("S", "S", "R", "R"), CIP = c("S", "S", "R", "R"), + LVX = c("S", "S", "R", "R"), TZP = c("S", "S", "R", "R"), + TCC = c("S", "S", "R", "R"), CTX = c("S", "S", "R", "R"), + CRO = c("S", "S", "R", "R"), CAZ = c("S", "S", "R", "R"), + FEP = c("S", "R", "R", "R"), SXT = c("S", "S", "R", "R"), + SAM = c("S", "S", "R", "R"), COL = c("S", "S", "R", "R"), + PLB = c("S", "S", "R", "R"), TCY = c("S", "S", "R", "R"), + DOX = c("S", "S", "R", "R"), MNO = c("S", "S", "R", "R"), + stringsAsFactors = FALSE) expect_equal(as.integer(mdro(acin)), c(1:4)) expect_s3_class(mdro(acin, verbose = TRUE), "data.frame") diff --git a/tests/testthat/test-mo.R b/tests/testthat/test-mo.R index 1e37f0dcc..05574a150 100644 --- a/tests/testthat/test-mo.R +++ b/tests/testthat/test-mo.R @@ -25,7 +25,6 @@ test_that("as.mo works", { skip_on_cran() - library(dplyr) MOs <- microorganisms %>% filter(!is.na(mo), nchar(mo) > 3) expect_identical(as.character(MOs$mo), as.character(as.mo(MOs$mo))) @@ -64,7 +63,7 @@ test_that("as.mo works", { expect_equal(as.character(as.mo("MRSE")), "B_STPHY_EPDR") expect_equal(as.character(as.mo("VRE")), "B_ENTRC") - expect_equal(as.character(as.mo("MRPA")), "B_PSDMN_ARGN") + expect_equal(as.character(as.mo("MRPA")), "B_PSDMN_AERG") expect_equal(as.character(as.mo("PISP")), "B_STRPT_PNMN") expect_equal(as.character(as.mo("PRSP")), "B_STRPT_PNMN") expect_equal(as.character(as.mo("VISP")), "B_STRPT_PNMN") diff --git a/tests/testthat/test-mo_property.R b/tests/testthat/test-mo_property.R index d2095fcd6..239252d47 100644 --- a/tests/testthat/test-mo_property.R +++ b/tests/testthat/test-mo_property.R @@ -100,14 +100,12 @@ test_that("mo_property works", { expect_warning(mo_name(c("B_ESCHR_COL", "B_STPHY_AUR"))) # outcome of mo_fullname must always return the fullname from the data set - library(dplyr) - x <- microorganisms %>% - transmute(mo, - # fullname from the original data: - f1 = fullname, - # newly created fullname based on MO code: - f2 = mo_fullname(mo, language = "en")) %>% - filter(f1 != f2) - expect_equal(nrow(x), 0) - + x <- data.frame(mo = microorganisms$mo, + # fullname from the original data: + f1 = microorganisms$fullname, + # newly created fullname based on MO code: + f2 = mo_fullname(microorganisms$mo, language = "en"), + stringsAsFactors = FALSE) + expect_equal(nrow(subset(x, f1 != f2)), 0) + }) diff --git a/tests/testthat/test-pca.R b/tests/testthat/test-pca.R index 9826da0bb..63fb271c5 100644 --- a/tests/testthat/test-pca.R +++ b/tests/testthat/test-pca.R @@ -22,13 +22,20 @@ context("pca.R") test_that("PCA works", { - library(dplyr) - resistance_data <- example_isolates %>% - filter(mo %in% as.mo(c("E. coli", "K. pneumoniae", "S. aureus"))) %>% - select(mo, AMC, CXM, CTX, TOB, TMP) %>% - group_by(order = mo_order(mo), # group on anything, like order - genus = mo_genus(mo)) %>% # and genus as we do here - summarise_if(is.rsi, resistance, minimum = 0) + resistance_data <- structure(list(order = c("Bacillales", "Enterobacterales", "Enterobacterales"), + genus = c("Staphylococcus", "Escherichia", "Klebsiella"), + AMC = c(0.00425, 0.13062, 0.10344), + CXM = c(0.00425, 0.05376, 0.10344), + CTX = c(0.00000, 0.02396, 0.05172), + TOB = c(0.02325, 0.02597, 0.10344), + TMP = c(0.08387, 0.39141, 0.18367)), + class = c("grouped_df", "tbl_df", "tbl", "data.frame"), + row.names = c(NA, -3L), + groups = structure(list(order = c("Bacillales", "Enterobacterales"), + .rows = list(1L, 2:3)), + row.names = c(NA, -2L), + class = c("tbl_df", "tbl", "data.frame"), + .drop = TRUE)) pca_model <- pca(resistance_data) diff --git a/vignettes/AMR.Rmd b/vignettes/AMR.Rmd index 57a13206d..698b8a42d 100755 --- a/vignettes/AMR.Rmd +++ b/vignettes/AMR.Rmd @@ -32,7 +32,7 @@ Conducting antimicrobial resistance analysis unfortunately requires in-depth kno * Good questions (always start with these!) * A thorough understanding of (clinical) epidemiology, to understand the clinical and epidemiological relevance and possible bias of results -* A thorough understanding of (clinical) microbiology/infectious diseases, to understand which microorganisms are causal to which infections and the implications of pharmaceutical treatment +* A thorough understanding of (clinical) microbiology/infectious diseases, to understand which microorganisms are causal to which infections and the implications of pharmaceutical treatment, as well as understanding intrinsic and acquired microbial resistance * Experience with data analysis with microbiological tests and their results, to understand the determination and limitations of MIC values and their interpretations to RSI values * Availability of the biological taxonomy of microorganisms and probably normalisation factors for pharmaceuticals, such as defined daily doses (DDD) * Available (inter-)national guidelines, and profound methods to apply them @@ -48,11 +48,12 @@ For this tutorial, we will create fake demonstration data to work with. You can skip to [Cleaning the data](#cleaning-the-data) if you already have your own data ready. If you start your analysis, try to make the structure of your data generally look like this: ```{r example table, echo = FALSE, results = 'asis'} -knitr::kable(dplyr::tibble(date = Sys.Date(), - patient_id = c("abcd", "abcd", "efgh"), - mo = "Escherichia coli", - AMX = c("S", "S", "R"), - CIP = c("S", "R", "S")), +knitr::kable(data.frame(date = Sys.Date(), + patient_id = c("abcd", "abcd", "efgh"), + mo = "Escherichia coli", + AMX = c("S", "S", "R"), + CIP = c("S", "R", "S"), + stringsAsFactors = FALSE), align = "c") ``` @@ -61,13 +62,18 @@ As with many uses in R, we need some additional packages for AMR analysis. Our p Our `AMR` package depends on these packages and even extends their use and functions. -```{r lib packages, message = FALSE} +```{r lib packages, eval = FALSE} library(dplyr) library(ggplot2) library(AMR) # (if not yet installed, install with:) -# install.packages(c("tidyverse", "AMR")) +# install.packages(c("dplyr", "ggplot2", "AMR")) +``` + +```{r lib packages 2, echo = FALSE, results = 'asis'} +library(AMR) +library(dplyr) ``` # Creation of data diff --git a/vignettes/benchmarks.Rmd b/vignettes/benchmarks.Rmd index a7bcf21b1..f9f3c5d25 100755 --- a/vignettes/benchmarks.Rmd +++ b/vignettes/benchmarks.Rmd @@ -29,28 +29,22 @@ One of the most important features of this package is the complete microbial tax Using the `microbenchmark` package, we can review the calculation performance of this function. Its function `microbenchmark()` runs different input expressions independently of each other and measures their time-to-result. ```{r, message = FALSE, echo = FALSE} -library(dplyr) library(ggplot2) ggplot.bm <- function(df, title = NULL) { - p <- df %>% - group_by(expr) %>% - summarise(t = median(time) / 1e+06) %>% - arrange(t) %>% - mutate(expr = factor(as.character(expr), levels = rev(as.character(expr))), - t_round = round(t, 1)) + s <- summary(df)[order(summary(df)$median), ] suppressWarnings( print( - p %>% - ggplot(aes(x = expr, y = t)) + - geom_linerange(aes(ymin = 0, ymax = t), colour = "#555555") + - geom_text(aes(label = t_round, hjust = -0.5), size = 3) + + s %>% + ggplot(aes(x = expr, y = median)) + + geom_linerange(aes(ymin = 0, ymax = median), colour = "#555555") + + geom_text(aes(label = round(s$median, 0), hjust = -0.5), size = 3) + geom_point(size = 3, colour = "#555555") + coord_flip() + scale_y_log10(breaks = c(1, 2, 5, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000), - limits = c(1, max(p$t) * 2)) + + limits = c(1, max(s$median) * 2)) + labs(x = "Expression", y = "Median time in milliseconds (log scale)", title = title) ) ) @@ -58,7 +52,7 @@ ggplot.bm <- function(df, title = NULL) { ``` ```{r, message = FALSE} -library(microbenchmark) +microbenchmark <- microbenchmark::microbenchmark library(AMR) ``` @@ -105,7 +99,7 @@ M.semesiae <- microbenchmark(as.mo("metsem"), print(M.semesiae, unit = "ms", signif = 4) ``` -That takes `r round(mean(M.semesiae$time, na.rm = TRUE) / mean(S.aureus$time, na.rm = TRUE), 1)` times as much time on average. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like *Methanosarcina semesiae*) are always very fast and only take some thousands of seconds to coerce - they are the most probable input from most data sets. +Looking up arbitrary codes of less prevalent microorganisms costs the most time. Full names (like *Methanosarcina semesiae*) are always very fast and only take some thousands of seconds to coerce - they are the most probable input from most data sets. In the figure below, we compare *Escherichia coli* (which is very common) with *Prevotella brevis* (which is moderately common) and with *Methanosarcina semesiae* (which is uncommon): @@ -115,20 +109,22 @@ boxplot(microbenchmark( as.mo("Meth. semesiae"), as.mo("Prev. brevis"), as.mo("Esc. coli"), - times = 10), + times = 100), horizontal = TRUE, las = 1, unit = "s", log = TRUE, xlab = "", ylab = "Time in seconds (log)", main = "Benchmarks per prevalence") ``` -Uncommon microorganisms take a lot more time than common microorganisms. To relieve this pitfall and further improve performance, two important calculations take almost no time at all: **repetitive results** and **already precalculated results**. +Uncommon microorganisms take some more time than common microorganisms. To further improve performance, two important calculations take almost no time at all: **repetitive results** and **already precalculated results**. ### Repetitive results Repetitive results are unique values that are present more than once. Unique values will only be calculated once by `as.mo()`. We will use `mo_name()` for this test - a helper function that returns the full microbial name (genus, species and possibly subspecies) which uses `as.mo()` internally. -```{r, message = FALSE} +```{r, message = FALSE, eval = FALSE} library(dplyr) +``` +```{r, message = FALSE} # take all MO codes from the example_isolates data set x <- example_isolates$mo %>% # keep only the unique ones @@ -148,11 +144,11 @@ n_distinct(x) # now let's see: run_it <- microbenchmark(mo_name(x), - times = 100) + times = 10) print(run_it, unit = "ms", signif = 3) ``` -So transforming 500,000 values (!!) of `r n_distinct(x)` unique values only takes `r round(median(run_it$time, na.rm = TRUE) / 1e9, 2)` seconds (`r as.integer(median(run_it$time, na.rm = TRUE) / 1e6)` ms). You only lose time on your unique input values. +So transforming 500,000 values (!!) of `r n_distinct(x)` unique values only takes `r round(median(run_it$time, na.rm = TRUE) / 1e9, 2)` seconds. You only lose time on your unique input values. ### Precalculated results